# To generate training data, first run: # $ ./train.py --run=PPO --env=CartPole-v0 \ # --stop='{"timesteps_total": 50000}' \ # --config='{"use_pytorch": true, "output": "/tmp/out", "batch_mode": "complete_episodes"}' cartpole-marwil-torch: env: CartPole-v0 run: MARWIL stop: timesteps_total: 500000 config: beta: grid_search: [0, 1] # compare IL (beta=0) vs MARWIL input: /tmp/out