2019-01-17 11:00:43 +08:00
|
|
|
# To generate training data, first run:
|
|
|
|
# $ ./train.py --run=PPO --env=CartPole-v0 \
|
|
|
|
# --stop='{"timesteps_total": 50000}' \
|
|
|
|
# --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}'
|
2020-05-26 11:10:27 +02:00
|
|
|
cartpole-marwil:
|
2019-01-17 11:00:43 +08:00
|
|
|
env: CartPole-v0
|
|
|
|
run: MARWIL
|
|
|
|
stop:
|
|
|
|
timesteps_total: 500000
|
|
|
|
config:
|
2020-05-27 16:19:13 +02:00
|
|
|
# Works for both torch and tf.
|
|
|
|
framework: tf
|
2020-07-14 05:07:16 +02:00
|
|
|
# In order to evaluate on an actual environment, use these following
|
|
|
|
# settings:
|
|
|
|
evaluation_num_workers: 1
|
|
|
|
evaluation_interval: 1
|
|
|
|
evaluation_config:
|
|
|
|
input: sampler
|
2020-09-09 17:33:21 +02:00
|
|
|
beta: 1.0 # Compare to behavior cloning (beta=0.0).
|
2020-07-14 05:07:16 +02:00
|
|
|
# The historic (offline) data file from the PPO run (at the top).
|
2019-01-17 11:00:43 +08:00
|
|
|
input: /tmp/out
|