mirror of
https://github.com/vale981/ray
synced 2025-03-09 12:56:46 -04:00
13 lines
489 B
YAML
13 lines
489 B
YAML
# This can be expected to reach 20.8 reward within an hour when using a V100 GPU
|
|
# (e.g. p3.2xl instance on AWS, and m4.4xl workers). It also can reach ~21 reward
|
|
# within an hour with fewer workers (e.g. 4-8) but less reliably.
|
|
pong-apex:
|
|
env: PongNoFrameskip-v4
|
|
run: APEX
|
|
config:
|
|
target_network_update_freq: 50000
|
|
num_workers: 32
|
|
## can also enable vectorization within processes
|
|
# num_envs_per_worker: 4
|
|
lr: .0001
|
|
gamma: 0.99
|