ray/rllib/tuned_examples/mujoco-td3.yaml

mujoco-td3:
    # Solve latest versions of the four hardest Mujoco tasks benchmarked in the
    # original TD3 paper. Average return over 10 trials at end of 1,000,000
    # timesteps (taken from Table 2 of the paper) are given in parens at the end
    # of reach environment name.
    #
    # Paper is at https://arxiv.org/pdf/1802.09477.pdf
    env:
        grid_search:
            - HalfCheetah-v2  # (9,532.99)
            - Hopper-v2  # (3,304.75)
            - Walker2d-v2  # (4,565.24)
            - Ant-v2  # (4,185.06)
    run: TD3
    stop:
        timesteps_total: 1000000
    config:
        # === Exploration ===
        learning_starts: 10000
        exploration_config:
            random_timesteps: 10000

        # === Evaluation ===
        evaluation_interval: 5
        evaluation_num_episodes: 10