ppo-breakoutnoframeskip-v4: env: BreakoutNoFrameskip-v4 run: PPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: episode_reward_mean: 50.0 timesteps_total: 7000000 stop: time_total_s: 7200 config: lambda: 0.95 kl_coeff: 0.5 clip_rewards: True clip_param: 0.1 vf_clip_param: 10.0 entropy_coeff: 0.01 train_batch_size: 5000 rollout_fragment_length: 100 sgd_minibatch_size: 500 num_sgd_iter: 10 num_workers: 10 num_envs_per_worker: 5 batch_mode: truncate_episodes observation_filter: NoFilter model: vf_share_layers: true num_gpus: 1