appo-pongnoframeskip-v4: env: PongNoFrameskip-v4 run: APPO # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: episode_reward_mean: 18.0 timesteps_total: 5000000 stop: time_total_s: 3600 config: vtrace: True use_kl_loss: False rollout_fragment_length: 50 train_batch_size: 750 num_workers: 31 broadcast_interval: 1 max_sample_requests_in_flight_per_worker: 1 num_multi_gpu_tower_stacks: 1 num_envs_per_worker: 8 num_sgd_iter: 2 vf_loss_coeff: 1.0 clip_param: 0.3 num_gpus: 1 grad_clip: 10 model: dim: 42