# Runs on a single g3.16xl node # See https://github.com/ray-project/rl-experiments for results atari-ppo: env: grid_search: - BreakoutNoFrameskip-v4 - BeamRiderNoFrameskip-v4 - QbertNoFrameskip-v4 - SpaceInvadersNoFrameskip-v4 run: PPO config: lambda: 0.95 kl_coeff: 0.5 clip_rewards: True clip_param: 0.1 vf_clip_param: 10.0 entropy_coeff: 0.01 train_batch_size: 5000 rollout_fragment_length: 100 sgd_minibatch_size: 500 num_sgd_iter: 10 num_workers: 10 num_envs_per_worker: 5 batch_mode: truncate_episodes observation_filter: NoFilter vf_share_layers: true num_gpus: 1