mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
30 lines
871 B
YAML
30 lines
871 B
YAML
# This gets to ~19-20 reward in ~30 minutes / 4m steps on a m4.10xl instance
|
|
# TODO(rliaw): this has regressed in performance
|
|
pong-a3c:
|
|
env: PongDeterministic-v4
|
|
run: A3C
|
|
config:
|
|
num_workers: 16
|
|
sample_batch_size: 20
|
|
use_pytorch: false
|
|
vf_loss_coeff: 0.5
|
|
entropy_coeff: 0.01
|
|
gamma: 0.99
|
|
grad_clip: 40.0
|
|
lambda: 1.0
|
|
lr: 0.0001
|
|
observation_filter: NoFilter
|
|
preprocessor_pref: rllib
|
|
model:
|
|
use_lstm: true
|
|
conv_activation: elu
|
|
dim: 42
|
|
grayscale: true
|
|
zero_mean: false
|
|
# Reduced channel depth and kernel size from default
|
|
conv_filters: [
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
]
|