ray/rllib/tuned_examples/pong-rainbow.yaml

30 lines
770 B
YAML
Raw Normal View History

pong-deterministic-rainbow:
env: PongDeterministic-v4
run: DQN
stop:
episode_reward_mean: 20
config:
num_atoms: 51
noisy: True
gamma: 0.99
lr: .0001
hiddens: [512]
learning_starts: 10000
buffer_size: 50000
sample_batch_size: 4
train_batch_size: 32
schedule_max_timesteps: 2000000
exploration_final_eps: 0.0
exploration_fraction: .000001
target_network_update_freq: 500
prioritized_replay: True
prioritized_replay_alpha: 0.5
beta_annealing_fraction: 0.2
final_prioritized_replay_beta: 1.0
n_step: 3
gpu: True
model:
grayscale: True
zero_mean: False
dim: 42