ray/rllib/tuned_examples/td3/invertedpendulum-td3.yaml

26 lines
797 B
YAML

invertedpendulum-td3:
# This is a TD3 with stopping conditions and network size tuned specifically
# for InvertedPendulum. Should be able to reach 1,000 reward (the maximum
# achievable) in 10,000 to 20,000 steps.
env: InvertedPendulum-v2
run: TD3
stop:
episode_reward_mean: 9999.9
time_total_s: 900 # 15 minutes
timesteps_total: 1000000
config:
# Works for both torch and tf.
framework: tf
# === Model ===
actor_hiddens: [32, 32]
critic_hiddens: [32, 32]
# === Exploration ===
replay_buffer_config:
learning_starts: 1000
exploration_config:
random_timesteps: 1000
# === Evaluation ===
evaluation_interval: 10
evaluation_duration: 5