ray/rllib/tuned_examples/pong-apex.yaml
2019-12-12 10:57:55 -08:00

14 lines
432 B
YAML

# This reaches ~20 reward in 50 minutes (6M train steps, 2M env steps) on a
# p3.2xlarge AWS instance.
# See https://app.wandb.ai/zplizzi/test/runs/ayuuhixr?workspace=user-zplizzi
# for training curves.
pong-apex:
env: PongNoFrameskip-v4
run: APEX
config:
target_network_update_freq: 20000
num_workers: 4
num_envs_per_worker: 8
lr: .00005
train_batch_size: 64
gamma: 0.99