2018-11-22 12:03:20 +08:00
|
|
|
# This configuration can expect to reach -160 reward in 10k-20k timesteps
|
|
|
|
pendulum-ddpg:
|
|
|
|
env: Pendulum-v0
|
2019-04-26 17:49:53 -07:00
|
|
|
run: TD3
|
2018-11-22 12:03:20 +08:00
|
|
|
stop:
|
2019-04-26 17:49:53 -07:00
|
|
|
episode_reward_mean: -130
|
|
|
|
time_total_s: 900 # 10 minutes
|
2018-11-22 12:03:20 +08:00
|
|
|
config:
|
|
|
|
# === Model ===
|
|
|
|
actor_hiddens: [64, 64]
|
|
|
|
critic_hiddens: [64, 64]
|
|
|
|
|
|
|
|
# === Exploration ===
|
2019-04-26 17:49:53 -07:00
|
|
|
learning_starts: 5000
|
2020-03-01 20:53:35 +01:00
|
|
|
exploration_config:
|
|
|
|
random_timesteps: 5000
|
2018-11-22 12:03:20 +08:00
|
|
|
|
2019-04-26 17:49:53 -07:00
|
|
|
# === Evaluation ===
|
|
|
|
evaluation_interval: 1
|
|
|
|
evaluation_num_episodes: 5
|