ddpg-hopperbulletenv-v0:
    env: HopperBulletEnv-v0
    run: DDPG
    # Minimum reward and total ts (in given time_total_s) to pass this test.
    pass_criteria:
        episode_reward_mean: 110.0
        timesteps_total: 50000
    stop:
        time_total_s: 1800
    config:
        actor_hiddens: [256, 256]
        critic_hiddens: [256, 256]
        n_step: 3
        model: {}
        gamma: 0.99
        env_config: {}
        exploration_config:
            initial_scale: 1.0
            final_scale: 0.02
            scale_timesteps: 10000
            ou_base_scale: 0.1
            ou_theta: 0.15
            ou_sigma: 0.2
        min_sample_timesteps_per_iteration: 1000
        target_network_update_freq: 0
        tau: 0.001
        replay_buffer_config:
          capacity: 10000
          type: MultiAgentPrioritizedReplayBuffer
          prioritized_replay_alpha: 0.6
          prioritized_replay_beta: 0.4
          prioritized_replay_eps: 0.000001
          learning_starts: 500
          worker_side_prioritization: false
        clip_rewards: false
        actor_lr: 0.001
        critic_lr: 0.001
        use_huber: true
        huber_threshold: 1.0
        l2_reg: 0.000001
        rollout_fragment_length: 1
        train_batch_size: 48
        num_gpus: 1
        num_workers: 0
        num_gpus_per_worker: 0