mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
23 lines
681 B
YAML
23 lines
681 B
YAML
![]() |
invertedpendulum-td3:
|
||
|
# This is a TD3 with stopping conditions and network size tuned specifically
|
||
|
# for InvertedPendulum. Should be able to reach 1,000 reward (the maximum
|
||
|
# achievable) in 10,000 to 20,000 steps.
|
||
|
env: InvertedPendulum-v2
|
||
|
run: TD3
|
||
|
stop:
|
||
|
episode_reward_mean: 9999.9
|
||
|
time_total_s: 900 # 15 minutes
|
||
|
timesteps_total: 1000000
|
||
|
config:
|
||
|
# === Model ===
|
||
|
actor_hiddens: [32, 32]
|
||
|
critic_hiddens: [32, 32]
|
||
|
|
||
|
# === Exploration ===
|
||
|
learning_starts: 1000
|
||
|
pure_exploration_steps: 1000
|
||
|
|
||
|
# === Evaluation ===
|
||
|
evaluation_interval: 1
|
||
|
evaluation_num_episodes: 5
|