ray/rllib/tuned_examples/ppo/halfcheetah-ppo.yaml

25 lines
631 B
YAML

halfcheetah-ppo:
env: HalfCheetah-v2
run: PPO
stop:
episode_reward_mean: 9800
time_total_s: 10800
config:
# Works for both torch and tf.
framework: tf
gamma: 0.99
lambda: 0.95
kl_coeff: 1.0
num_sgd_iter: 32
lr: .0003
vf_loss_coeff: 0.5
clip_param: 0.2
sgd_minibatch_size: 4096
train_batch_size: 65536
num_workers: 16
num_gpus: 1
grad_clip: 0.5
num_envs_per_worker:
grid_search: [16, 32]
batch_mode: truncate_episodes
observation_filter: MeanStdFilter