ray/rllib/tuned_examples/marwil/cartpole-marwil.yaml

21 lines
750 B
YAML

# To generate training data, first run:
# $ ./train.py --run=PPO --env=CartPole-v0 \
# --stop='{"timesteps_total": 50000}' \
# --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}'
cartpole-marwil:
env: CartPole-v0
run: MARWIL
stop:
timesteps_total: 500000
config:
# Works for both torch and tf.
framework: tf
# In order to evaluate on an actual environment, use these following
# settings:
evaluation_num_workers: 1
evaluation_interval: 1
evaluation_config:
input: sampler
beta: 1.0 # Compare to behavior cloning (beta=0.0).
# The historic (offline) data file from the PPO run (at the top).
input: /tmp/out