ray/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml

11 lines
587 B
YAML

humanoid-ppo-gae:
env: Humanoid-v1
run: PPO
stop:
episode_reward_mean: 6000
resources:
cpu: 64
gpu: 4
driver_cpu_limit: 4
config: {"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "write_logs": false}