# Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO # isn't actually any more efficient on Atari, since the network size is # relatively small and the env doesn't require a GPU. atari-ddppo: env: grid_search: - BreakoutNoFrameskip-v4 run: DDPPO config: # DDPPO only supports PyTorch so far. framework: torch # Worker config: 10 workers, each of which requires a GPU. num_workers: 10 num_gpus_per_worker: 1 # Each worker will sample 100 * 5 envs per worker steps = 500 steps # per optimization round. This is 5000 steps summed across workers. rollout_fragment_length: 100 num_envs_per_worker: 5 # Each worker will take a minibatch of 50. There are 10 workers total, # so the effective minibatch size will be 500. sgd_minibatch_size: 50 num_sgd_iter: 10 # Params from standard PPO Atari config: lambda: 0.95 kl_coeff: 0.5 clip_rewards: True clip_param: 0.1 vf_clip_param: 10.0 entropy_coeff: 0.01 batch_mode: truncate_episodes observation_filter: NoFilter model: vf_share_layers: true