ray/rllib/tuned_examples/td3/mujoco-td3.yaml

mujoco-td3:
    # Solve latest versions of the four hardest Mujoco tasks benchmarked in the
    # original TD3 paper. Average return over 10 trials at end of 1,000,000
    # timesteps (taken from Table 2 of the paper) are given in parens at the end
    # of reach environment name.
    #
    # Paper is at https://arxiv.org/pdf/1802.09477.pdf
    env:
        grid_search:
            - HalfCheetah-v2  # (9,532.99)
            - Hopper-v2  # (3,304.75)
            - Walker2d-v2  # (4,565.24)
            - Ant-v2  # (4,185.06)
    run: TD3
    stop:
        timesteps_total: 1000000
    config:
        # Works for both torch and tf.
        framework: tf
        # === Exploration ===
        exploration_config:
            random_timesteps: 10000
        replay_buffer_config:
          type: MultiAgentReplayBuffer
        num_steps_sampled_before_learning_starts: 10000
        # === Evaluation ===
        evaluation_interval: 10
        evaluation_duration: 10