ray/rllib/tuned_examples/pong-a3c.yaml
Eric Liang dd70720578
[rllib] Rename sample_batch_size => rollout_fragment_length (#7503)
* bulk rename

* deprecation warn

* update doc

* update fig

* line length

* rename

* make pytest comptaible

* fix test

* fi sys

* rename

* wip

* fix more

* lint

* update svg

* comments

* lint

* fix use of batch steps
2020-03-14 12:05:04 -07:00

30 lines
877 B
YAML

# This gets to ~19-20 reward in ~30 minutes / 4m steps on a m4.10xl instance
# TODO(rliaw): this has regressed in performance
pong-a3c:
env: PongDeterministic-v4
run: A3C
config:
num_workers: 16
rollout_fragment_length: 20
use_pytorch: false
vf_loss_coeff: 0.5
entropy_coeff: 0.01
gamma: 0.99
grad_clip: 40.0
lambda: 1.0
lr: 0.0001
observation_filter: NoFilter
preprocessor_pref: rllib
model:
use_lstm: true
conv_activation: elu
dim: 42
grayscale: true
zero_mean: false
# Reduced channel depth and kernel size from default
conv_filters: [
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
]