ray/rllib/tuned_examples/invertedpendulum-td3.yaml

22 lines
681 B
YAML

invertedpendulum-td3:
# This is a TD3 with stopping conditions and network size tuned specifically
# for InvertedPendulum. Should be able to reach 1,000 reward (the maximum
# achievable) in 10,000 to 20,000 steps.
env: InvertedPendulum-v2
run: TD3
stop:
episode_reward_mean: 9999.9
time_total_s: 900 # 15 minutes
timesteps_total: 1000000
config:
# === Model ===
actor_hiddens: [32, 32]
critic_hiddens: [32, 32]
# === Exploration ===
learning_starts: 1000
pure_exploration_steps: 1000
# === Evaluation ===
evaluation_interval: 1
evaluation_num_episodes: 5