2018-04-19 22:36:29 -07:00
|
|
|
# This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU
|
2018-04-30 00:18:15 -07:00
|
|
|
pendulum-apex-ddpg:
|
2018-04-19 22:36:29 -07:00
|
|
|
env: Pendulum-v0
|
2018-04-30 00:18:15 -07:00
|
|
|
run: APEX_DDPG
|
2018-04-19 22:36:29 -07:00
|
|
|
stop:
|
|
|
|
episode_reward_mean: -160
|
|
|
|
config:
|
2020-05-26 11:10:27 +02:00
|
|
|
# Works for both torch and tf.
|
2020-05-27 16:19:13 +02:00
|
|
|
framework: tf
|
2018-04-19 22:36:29 -07:00
|
|
|
use_huber: True
|
|
|
|
clip_rewards: False
|
|
|
|
num_workers: 16
|
|
|
|
n_step: 1
|
|
|
|
target_network_update_freq: 50000
|
|
|
|
tau: 1.0
|
2019-04-26 17:49:53 -07:00
|
|
|
evaluation_interval: 5
|
|
|
|
evaluation_num_episodes: 10
|