# You can expect ~20 reward within 1.1m timesteps / 2.1 hours on a K80 GPU pong-deterministic-dqn: env: PongDeterministic-v4 run: DQN stop: episode_reward_mean: 20 time_total_s: 7200 config: num_gpus: 1 gamma: 0.99 lr: .0001 learning_starts: 10000 buffer_size: 50000 sample_batch_size: 4 train_batch_size: 32 schedule_max_timesteps: 2000000 exploration_final_eps: .01 exploration_fraction: .1 model: grayscale: True zero_mean: False dim: 42