ray/rllib/tuned_examples/atari-ddppo.yaml

# Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO
# isn't actually any more efficient on Atari, since the network size is
# relatively small and the env doesn't require a GPU.
atari-ddppo:
    env:
        grid_search:
            - BreakoutNoFrameskip-v4
    run: DDPPO
    config:
        # Worker config: 10 workers, each of which requires a GPU.
        num_workers: 10
        num_gpus_per_worker: 1
        # Each worker will sample 100 * 5 envs per worker steps = 500 steps
        # per optimization round. This is 5000 steps summed across workers.
        rollout_fragment_length: 100
        num_envs_per_worker: 5
        # Each worker will take a minibatch of 50. There are 10 workers total,
        # so the effective minibatch size will be 500.
        sgd_minibatch_size: 50
        num_sgd_iter: 10
        # Params from standard PPO Atari config:
        lambda: 0.95
        kl_coeff: 0.5
        clip_rewards: True
        clip_param: 0.1
        vf_clip_param: 10.0
        entropy_coeff: 0.01
        batch_mode: truncate_episodes
        observation_filter: NoFilter
        vf_share_layers: true