2020-05-26 11:10:27 +02:00
|
|
|
cartpole-appo-vtrace:
|
2019-01-18 13:40:26 -08:00
|
|
|
env: CartPole-v0
|
|
|
|
run: APPO
|
|
|
|
stop:
|
2021-11-05 14:39:28 +01:00
|
|
|
episode_reward_mean: 180
|
2020-04-23 09:11:12 +02:00
|
|
|
timesteps_total: 200000
|
2019-01-18 13:40:26 -08:00
|
|
|
config:
|
2020-05-26 11:10:27 +02:00
|
|
|
# Works for both torch and tf.
|
2020-05-27 16:19:13 +02:00
|
|
|
framework: tf
|
2019-01-18 13:40:26 -08:00
|
|
|
num_envs_per_worker: 5
|
2021-11-05 14:39:28 +01:00
|
|
|
num_workers: 4
|
2019-01-18 13:40:26 -08:00
|
|
|
num_gpus: 0
|
2021-07-20 14:58:13 -04:00
|
|
|
observation_filter: MeanStdFilter
|
2021-11-05 14:39:28 +01:00
|
|
|
num_sgd_iter: 1
|
2021-07-20 14:58:13 -04:00
|
|
|
vf_loss_coeff: 0.01
|
2019-01-18 13:40:26 -08:00
|
|
|
vtrace: true
|
2021-11-03 10:01:34 +01:00
|
|
|
vtrace_drop_last_ts: false
|
2021-07-20 14:58:13 -04:00
|
|
|
model:
|
|
|
|
fcnet_hiddens: [32]
|
|
|
|
fcnet_activation: linear
|
|
|
|
vf_share_layers: true
|