2019-04-26 17:49:53 -07:00
|
|
|
invertedpendulum-td3:
|
|
|
|
# This is a TD3 with stopping conditions and network size tuned specifically
|
|
|
|
# for InvertedPendulum. Should be able to reach 1,000 reward (the maximum
|
|
|
|
# achievable) in 10,000 to 20,000 steps.
|
|
|
|
env: InvertedPendulum-v2
|
|
|
|
run: TD3
|
|
|
|
stop:
|
|
|
|
episode_reward_mean: 9999.9
|
|
|
|
time_total_s: 900 # 15 minutes
|
|
|
|
timesteps_total: 1000000
|
|
|
|
config:
|
2020-05-26 11:10:27 +02:00
|
|
|
# Works for both torch and tf.
|
2020-05-27 16:19:13 +02:00
|
|
|
framework: tf
|
2019-04-26 17:49:53 -07:00
|
|
|
# === Model ===
|
|
|
|
actor_hiddens: [32, 32]
|
|
|
|
critic_hiddens: [32, 32]
|
|
|
|
|
|
|
|
# === Exploration ===
|
2022-05-17 13:43:49 +02:00
|
|
|
replay_buffer_config:
|
|
|
|
learning_starts: 1000
|
2020-03-01 20:53:35 +01:00
|
|
|
exploration_config:
|
|
|
|
random_timesteps: 1000
|
2019-04-26 17:49:53 -07:00
|
|
|
|
|
|
|
# === Evaluation ===
|
2022-05-05 09:41:38 +02:00
|
|
|
evaluation_interval: 10
|
|
|
|
evaluation_duration: 5
|