ray/rllib/tuned_examples/ddpg/hopper-pybullet-ddpg.yaml

45 lines
No EOL
1.4 KiB
YAML

# Note: HopperBulletEnv-v0 is not the same as MuJoCo's Hopper-v0.
ddpg-hopperbulletenv-v0:
env: HopperBulletEnv-v0
run: DDPG
# Minimum reward and total ts (in given time_total_s) to pass this test.
pass_criteria:
episode_reward_mean: 120.0
timesteps_total: 50000
stop:
time_total_s: 2000
config:
actor_hiddens: [256, 256]
critic_hiddens: [256, 256]
n_step: 3
model: {}
gamma: 0.99
env_config: {}
exploration_config:
initial_scale: 1.0
final_scale: 0.02
scale_timesteps: 10000
ou_base_scale: 0.1
ou_theta: 0.15
ou_sigma: 0.2
min_sample_timesteps_per_iteration: 1000
target_network_update_freq: 0
tau: 0.001
replay_buffer_config:
type: MultiAgentPrioritizedReplayBuffer
capacity: 10000
prioritized_replay_alpha: 0.6
prioritized_replay_beta: 0.4
prioritized_replay_eps: 0.000001
worker_side_prioritization: false
num_steps_sampled_before_learning_starts: 500
clip_rewards: False
actor_lr: 0.001
critic_lr: 0.001
use_huber: False
huber_threshold: 1.0
l2_reg: 0.000001
rollout_fragment_length: 1
train_batch_size: 48
num_workers: 0
num_gpus_per_worker: 0