mujoco-td3: # Solve latest versions of the four hardest Mujoco tasks benchmarked in the # original TD3 paper. Average return over 10 trials at end of 1,000,000 # timesteps (taken from Table 2 of the paper) are given in parens at the end # of reach environment name. # # Paper is at https://arxiv.org/pdf/1802.09477.pdf env: grid_search: - HalfCheetah-v2 # (9,532.99) - Hopper-v2 # (3,304.75) - Walker2d-v2 # (4,565.24) - Ant-v2 # (4,185.06) run: TD3 stop: timesteps_total: 1000000 config: # Works for both torch and tf. framework: tf # === Exploration === exploration_config: random_timesteps: 10000 replay_buffer_config: type: MultiAgentReplayBuffer learning_starts: 10000 # === Evaluation === evaluation_interval: 10 evaluation_duration: 10