ray/rllib/tuned_examples/cartpole-marwil.yaml

13 lines
418 B
YAML

# To generate training data, first run:
# $ ./train.py --run=PPO --env=CartPole-v0 \
# --stop='{"timesteps_total": 50000}' \
# --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}'
cartpole-marwil:
env: CartPole-v0
run: MARWIL
stop:
timesteps_total: 500000
config:
beta:
grid_search: [0, 1] # compare IL (beta=0) vs MARWIL
input: /tmp/out