walker2d-v1-ppo: env: Walker2d-v1 run: PPO config: kl_coeff: 1.0 num_sgd_iter: 20 lr: .0001 sgd_minibatch_size: 32768 train_batch_size: 320000 num_workers: 64 num_gpus: 4 batch_mode: complete_episodes observation_filter: MeanStdFilter