ray/rllib/tuned_examples/apex_dqn/pong-apex-dqn.yaml

25 lines
878 B
YAML

# This reaches ~19 reward in < 40 minutes (3M env steps) on a p3.8xlarge AWS instance.
# See https://app.wandb.ai/zplizzi/test/runs/ayuuhixr?workspace=user-zplizzi
# for training curves.
pong-apex:
env: PongNoFrameskip-v4
run: APEX
stop:
episode_reward_mean: 19.0
timesteps_total: 4000000
config:
# Works for both torch and tf.
framework: tf
target_network_update_freq: 20000
num_workers: 4
num_envs_per_worker: 8
lr: .00005
train_batch_size: 64
replay_buffer_config:
type: MultiAgentPrioritizedReplayBuffer
capacity: 1000000
# we should set compress_observations to True because few machines
# would be able to contain the replay buffers in memory otherwise
compress_observations: True
gamma: 0.99
training_intensity: 16