ray/rllib/tuned_examples/ppo/humanoid-ppo-gae.yaml

23 lines
559 B
YAML

humanoid-ppo-gae:
env: Humanoid-v1
run: PPO
stop:
episode_reward_mean: 6000
config:
# Works for both torch and tf.
framework: tf
gamma: 0.995
lambda: 0.95
clip_param: 0.2
kl_coeff: 1.0
num_sgd_iter: 20
lr: .0001
sgd_minibatch_size: 32768
horizon: 5000
train_batch_size: 320000
model:
free_log_std: true
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter