mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00

* bulk rename * deprecation warn * update doc * update fig * line length * rename * make pytest comptaible * fix test * fi sys * rename * wip * fix more * lint * update svg * comments * lint * fix use of batch steps
30 lines
1.1 KiB
YAML
30 lines
1.1 KiB
YAML
# Basically the same as atari-ppo, but adapted for DDPPO. Note that DDPPO
|
|
# isn't actually any more efficient on Atari, since the network size is
|
|
# relatively small and the env doesn't require a GPU.
|
|
atari-ddppo:
|
|
env:
|
|
grid_search:
|
|
- BreakoutNoFrameskip-v4
|
|
run: DDPPO
|
|
config:
|
|
# Worker config: 10 workers, each of which requires a GPU.
|
|
num_workers: 10
|
|
num_gpus_per_worker: 1
|
|
# Each worker will sample 100 * 5 envs per worker steps = 500 steps
|
|
# per optimization round. This is 5000 steps summed across workers.
|
|
rollout_fragment_length: 100
|
|
num_envs_per_worker: 5
|
|
# Each worker will take a minibatch of 50. There are 10 workers total,
|
|
# so the effective minibatch size will be 500.
|
|
sgd_minibatch_size: 50
|
|
num_sgd_iter: 10
|
|
# Params from standard PPO Atari config:
|
|
lambda: 0.95
|
|
kl_coeff: 0.5
|
|
clip_rewards: True
|
|
clip_param: 0.1
|
|
vf_clip_param: 10.0
|
|
entropy_coeff: 0.01
|
|
batch_mode: truncate_episodes
|
|
observation_filter: NoFilter
|
|
vf_share_layers: true
|