2017-11-06 22:30:25 -08:00
|
|
|
cartpole-ppo:
|
|
|
|
env: CartPole-v0
|
2017-11-20 17:52:43 -08:00
|
|
|
run: PPO
|
2018-08-24 15:05:24 -07:00
|
|
|
num_samples: 3
|
2017-11-06 22:30:25 -08:00
|
|
|
stop:
|
|
|
|
episode_reward_mean: 200
|
|
|
|
time_total_s: 180
|
|
|
|
config:
|
2020-05-26 11:10:27 +02:00
|
|
|
# Works for both torch and tf.
|
2020-05-27 16:19:13 +02:00
|
|
|
framework: tf
|
2017-11-06 22:30:25 -08:00
|
|
|
num_workers: 1
|
|
|
|
num_sgd_iter:
|
|
|
|
grid_search: [1, 4]
|
2018-09-05 12:06:13 -07:00
|
|
|
sgd_minibatch_size:
|
2017-11-06 22:30:25 -08:00
|
|
|
grid_search: [128, 256, 512]
|
2019-11-09 20:39:03 -07:00
|
|
|
observation_filter: MeanStdFilter
|