ray/rllib/tuned_examples/walker2d-ppo.yaml

13 lines
319 B
YAML

walker2d-v1-ppo:
env: Walker2d-v1
run: PPO
config:
kl_coeff: 1.0
num_sgd_iter: 20
lr: .0001
sgd_minibatch_size: 32768
train_batch_size: 320000
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter