mirror of
https://github.com/vale981/ray
synced 2025-03-12 22:26:39 -04:00
37 lines
1 KiB
YAML
37 lines
1 KiB
YAML
![]() |
a3c-pongdeterministic-v4:
|
||
|
env: PongDeterministic-v4
|
||
|
run: A3C
|
||
|
# Minimum reward and total ts (in given time_total_s) to pass this test.
|
||
|
pass_criteria:
|
||
|
episode_reward_mean: 18.0
|
||
|
timesteps_total: 5000000
|
||
|
stop:
|
||
|
time_total_s: 3600
|
||
|
# TODO(sven, jungong): fix A3C on torch and re-enable.
|
||
|
frameworks: [ "tf", "tf2" ]
|
||
|
config:
|
||
|
num_gpus: 0
|
||
|
num_workers: 16
|
||
|
rollout_fragment_length: 20
|
||
|
vf_loss_coeff: 0.5
|
||
|
entropy_coeff: 0.01
|
||
|
gamma: 0.99
|
||
|
grad_clip: 40.0
|
||
|
lambda: 1.0
|
||
|
lr: 0.0001
|
||
|
observation_filter: NoFilter
|
||
|
preprocessor_pref: rllib
|
||
|
model:
|
||
|
use_lstm: true
|
||
|
conv_activation: elu
|
||
|
dim: 42
|
||
|
grayscale: true
|
||
|
zero_mean: false
|
||
|
# Reduced channel depth and kernel size from default.
|
||
|
conv_filters: [
|
||
|
[32, [3, 3], 2],
|
||
|
[32, [3, 3], 2],
|
||
|
[32, [3, 3], 2],
|
||
|
[32, [3, 3], 2],
|
||
|
]
|