mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
28 lines
970 B
YAML
28 lines
970 B
YAML
mujoco-td3:
|
|
# Solve latest versions of the four hardest Mujoco tasks benchmarked in the
|
|
# original TD3 paper. Average return over 10 trials at end of 1,000,000
|
|
# timesteps (taken from Table 2 of the paper) are given in parens at the end
|
|
# of reach environment name.
|
|
#
|
|
# Paper is at https://arxiv.org/pdf/1802.09477.pdf
|
|
env:
|
|
grid_search:
|
|
- HalfCheetah-v2 # (9,532.99)
|
|
- Hopper-v2 # (3,304.75)
|
|
- Walker2d-v2 # (4,565.24)
|
|
- Ant-v2 # (4,185.06)
|
|
run: TD3
|
|
stop:
|
|
timesteps_total: 1000000
|
|
config:
|
|
# Works for both torch and tf.
|
|
framework: tf
|
|
# === Exploration ===
|
|
exploration_config:
|
|
random_timesteps: 10000
|
|
replay_buffer_config:
|
|
type: MultiAgentReplayBuffer
|
|
num_steps_sampled_before_learning_starts: 10000
|
|
# === Evaluation ===
|
|
evaluation_interval: 10
|
|
evaluation_duration: 10
|