ray/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml

humanoid-ppo-gae:
    env: Humanoid-v1
    run: PPO
    stop:
        episode_reward_mean: 6000
    resources:
        cpu: 64
        gpu: 4
        driver_cpu_limit: 4
    config: {"lambda": 0.95, "clip_param": 0.2, "kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": .0001, "sgd_batchsize": 32768, "horizon": 5000, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_workers": 64, "model": {"free_log_std": true}, "write_logs": false}