2018-06-27 22:51:04 -07:00
|
|
|
# This gets to ~19-20 reward in ~30 minutes / 4m steps on a m4.10xl instance
|
|
|
|
# TODO(rliaw): this has regressed in performance
|
2017-10-13 16:18:16 -07:00
|
|
|
pong-a3c:
|
|
|
|
env: PongDeterministic-v4
|
2017-11-20 17:52:43 -08:00
|
|
|
run: A3C
|
2017-10-13 16:18:16 -07:00
|
|
|
config:
|
2018-06-27 22:51:04 -07:00
|
|
|
num_workers: 16
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length: 20
|
2017-12-24 12:25:13 -08:00
|
|
|
use_pytorch: false
|
2017-12-30 00:24:54 -08:00
|
|
|
vf_loss_coeff: 0.5
|
2019-03-17 18:07:37 -07:00
|
|
|
entropy_coeff: 0.01
|
2017-12-30 00:24:54 -08:00
|
|
|
gamma: 0.99
|
|
|
|
grad_clip: 40.0
|
|
|
|
lambda: 1.0
|
|
|
|
lr: 0.0001
|
|
|
|
observation_filter: NoFilter
|
2018-08-23 17:49:10 -07:00
|
|
|
preprocessor_pref: rllib
|
2017-12-30 00:24:54 -08:00
|
|
|
model:
|
2018-06-26 13:17:15 -07:00
|
|
|
use_lstm: true
|
2018-06-27 22:51:04 -07:00
|
|
|
conv_activation: elu
|
2017-12-30 00:24:54 -08:00
|
|
|
dim: 42
|
|
|
|
grayscale: true
|
|
|
|
zero_mean: false
|
2018-06-27 22:51:04 -07:00
|
|
|
# Reduced channel depth and kernel size from default
|
|
|
|
conv_filters: [
|
|
|
|
[32, [3, 3], 2],
|
|
|
|
[32, [3, 3], 2],
|
|
|
|
[32, [3, 3], 2],
|
|
|
|
[32, [3, 3], 2],
|
|
|
|
]
|