ray/rllib/tuned_examples/a3c/pong-a3c.yaml

31 lines
911 B
YAML

# This gets to ~19-20 reward in ~30 minutes / 4m steps on a m4.10xl instance
# TODO(rliaw): this has regressed in performance
pong-a3c:
env: PongDeterministic-v4
run: A3C
config:
# Works for both torch and tf.
framework: tf
num_workers: 16
rollout_fragment_length: 20
vf_loss_coeff: 0.5
entropy_coeff: 0.01
gamma: 0.99
grad_clip: 40.0
lambda: 1.0
lr: 0.0001
observation_filter: NoFilter
preprocessor_pref: rllib
model:
use_lstm: true
conv_activation: elu
dim: 42
grayscale: true
zero_mean: false
# Reduced channel depth and kernel size from default
conv_filters: [
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
]