# This can reach 18-19 reward in ~3 minutes on p3.16xl head w/m4.16xl workers # 128 workers -> 3 minutes (best case) # 64 workers -> 4 minutes # 32 workers -> 7 minutes # See also: pong-impala.yaml, pong-impala-vectorized.yaml pong-impala-fast: env: PongNoFrameskip-v4 run: IMPALA config: sample_batch_size: 50 train_batch_size: 1000 num_workers: 128 num_envs_per_worker: 5 broadcast_interval: 5 max_sample_requests_in_flight_per_worker: 1 num_data_loader_buffers: 4 num_gpus: 2 model: dim: 42