ray/rllib/tuned_examples/pong-apex.yaml

14 lines
489 B
YAML
Raw Normal View History

# This can be expected to reach 20.8 reward within an hour when using a V100 GPU
# (e.g. p3.2xl instance on AWS, and m4.4xl workers). It also can reach ~21 reward
# within an hour with fewer workers (e.g. 4-8) but less reliably.
pong-apex:
env: PongNoFrameskip-v4
run: APEX
config:
target_network_update_freq: 50000
num_workers: 32
## can also enable vectorization within processes
# num_envs_per_worker: 4
lr: .0001
gamma: 0.99