ray/rllib/tuned_examples/hopper-ppo.yaml

14 lines
333 B
YAML

hopper-ppo:
env: Hopper-v1
run: PPO
config:
gamma: 0.995
kl_coeff: 1.0
num_sgd_iter: 20
lr: .0001
sgd_minibatch_size: 32768
train_batch_size: 160000
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter