pong-deterministic-rainbow: env: PongDeterministic-v4 run: DQN stop: episode_reward_mean: 20 config: num_atoms: 51 noisy: True gamma: 0.99 lr: .0001 hiddens: [512] learning_starts: 10000 buffer_size: 50000 sample_batch_size: 4 train_batch_size: 32 exploration_config: epsilon_timesteps: 2 final_epsilon: 0.0 target_network_update_freq: 500 prioritized_replay: True prioritized_replay_alpha: 0.5 final_prioritized_replay_beta: 1.0 prioritized_replay_beta_annealing_timesteps: 400000 n_step: 3 gpu: True model: grayscale: True zero_mean: False dim: 42