# Note: HopperBulletEnv-v0 is not the same as MuJoCo's Hopper-v0. ddpg-hopperbulletenv-v0: env: HopperBulletEnv-v0 run: DDPG # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: episode_reward_mean: 120.0 timesteps_total: 50000 stop: time_total_s: 2000 config: actor_hiddens: [256, 256] critic_hiddens: [256, 256] n_step: 3 model: {} gamma: 0.99 env_config: {} exploration_config: initial_scale: 1.0 final_scale: 0.02 scale_timesteps: 10000 ou_base_scale: 0.1 ou_theta: 0.15 ou_sigma: 0.2 min_sample_timesteps_per_iteration: 1000 target_network_update_freq: 0 tau: 0.001 replay_buffer_config: type: MultiAgentPrioritizedReplayBuffer capacity: 10000 prioritized_replay_alpha: 0.6 prioritized_replay_beta: 0.4 prioritized_replay_eps: 0.000001 worker_side_prioritization: false learning_starts: 500 clip_rewards: False actor_lr: 0.001 critic_lr: 0.001 use_huber: False huber_threshold: 1.0 l2_reg: 0.000001 rollout_fragment_length: 1 train_batch_size: 48 num_workers: 0 num_gpus_per_worker: 0