# This can reach 18-19 reward in ~3 minutes on p3.16xl head w/m4.16xl workers
#   128 workers -> 3 minutes (best case)
#    64 workers -> 4 minutes
#    32 workers -> 7 minutes
# See also: pong-impala.yaml, pong-impala-vectorized.yaml
pong-impala-fast:
    env: PongNoFrameskip-v4
    run: IMPALA
    config:
        rollout_fragment_length: 50
        train_batch_size: 1000
        num_workers: 128
        num_envs_per_worker: 5
        broadcast_interval: 5
        max_sample_requests_in_flight_per_worker: 1
        num_multi_gpu_tower_stacks: 4
        num_gpus: 2
        model:
          dim: 42