2019-04-26 17:49:53 -07:00
|
|
|
mujoco-td3:
|
|
|
|
# Solve latest versions of the four hardest Mujoco tasks benchmarked in the
|
|
|
|
# original TD3 paper. Average return over 10 trials at end of 1,000,000
|
|
|
|
# timesteps (taken from Table 2 of the paper) are given in parens at the end
|
|
|
|
# of reach environment name.
|
|
|
|
#
|
|
|
|
# Paper is at https://arxiv.org/pdf/1802.09477.pdf
|
|
|
|
env:
|
|
|
|
grid_search:
|
|
|
|
- HalfCheetah-v2 # (9,532.99)
|
|
|
|
- Hopper-v2 # (3,304.75)
|
|
|
|
- Walker2d-v2 # (4,565.24)
|
|
|
|
- Ant-v2 # (4,185.06)
|
|
|
|
run: TD3
|
|
|
|
stop:
|
|
|
|
timesteps_total: 1000000
|
|
|
|
config:
|
2020-05-26 11:10:27 +02:00
|
|
|
# Works for both torch and tf.
|
|
|
|
use_pytorch: false
|
2019-04-26 17:49:53 -07:00
|
|
|
# === Exploration ===
|
|
|
|
learning_starts: 10000
|
2020-03-01 20:53:35 +01:00
|
|
|
exploration_config:
|
|
|
|
random_timesteps: 10000
|
2019-04-26 17:49:53 -07:00
|
|
|
|
|
|
|
# === Evaluation ===
|
|
|
|
evaluation_interval: 5
|
|
|
|
evaluation_num_episodes: 10
|