# This can be expected to reach 20.8 reward within an hour when using a V100 GPU # (e.g. p3.2xl instance on AWS, and m4.4xl workers). It also can reach ~21 reward # within an hour with fewer workers (e.g. 4-8) but less reliably. pong-apex: env: PongNoFrameskip-v4 run: APEX config: target_network_update_freq: 50000 num_workers: 32 ## can also enable vectorization within processes # num_envs_per_worker: 4 lr: .0001 gamma: 0.99