# To generate training data, first run: # $ ./train.py --run=PPO --env=CartPole-v0 \ # --stop='{"timesteps_total": 50000}' \ # --config='{"output": "dataset", "output_config": {"format": "json", "path": "/tmp/out", "max_num_samples_per_file": 1}, "batch_mode": "complete_episodes"}' cartpole-bc: env: CartPole-v0 run: BC stop: timesteps_total: 500000 config: # Works for both torch and tf. framework: tf # In order to evaluate on an actual environment, use these following # settings: evaluation_num_workers: 1 evaluation_interval: 1 evaluation_config: input: sampler # The historic (offline) data file from the PPO run (at the top). input: dataset input_config: format: json path: /tmp/out