ray/rllib/tuned_examples/ppo/walker2d-ppo.yaml

15 lines
380 B
YAML

walker2d-v1-ppo:
env: Walker2d-v1
run: PPO
config:
# Works for both torch and tf.
framework: tf
kl_coeff: 1.0
num_sgd_iter: 20
lr: .0001
sgd_minibatch_size: 32768
train_batch_size: 320000
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter