ray/rllib/tuned_examples/pong-impala-fast.yaml

19 lines
596 B
YAML

# This can reach 18-19 reward in ~3 minutes on p3.16xl head w/m4.16xl workers
# 128 workers -> 3 minutes (best case)
# 64 workers -> 4 minutes
# 32 workers -> 7 minutes
# See also: pong-impala.yaml, pong-impala-vectorized.yaml
pong-impala-fast:
env: PongNoFrameskip-v4
run: IMPALA
config:
sample_batch_size: 50
train_batch_size: 1000
num_workers: 128
num_envs_per_worker: 5
broadcast_interval: 5
max_sample_requests_in_flight_per_worker: 1
num_data_loader_buffers: 4
num_gpus: 2
model:
dim: 42