# You can expect ~20 reward within 1.1m timesteps / 2.1 hours on a K80 GPU pong-deterministic-dqn: env: PongDeterministic-v4 run: DQN stop: episode_reward_mean: 20 time_total_s: 7200 config: num_gpus: 1 gamma: 0.99 lr: .0001 learning_starts: 10000 buffer_size: 50000 rollout_fragment_length: 4 train_batch_size: 32 exploration_config: epsilon_timesteps: 200000 final_epsilon: .01 model: grayscale: True zero_mean: False dim: 42