mirror of
https://github.com/vale981/ray
synced 2025-03-12 14:16:39 -04:00
36 lines
1 KiB
YAML
36 lines
1 KiB
YAML
a3c-pongdeterministic-v4:
|
|
env: PongDeterministic-v4
|
|
run: A3C
|
|
# Minimum reward and total ts (in given time_total_s) to pass this test.
|
|
pass_criteria:
|
|
episode_reward_mean: 18.0
|
|
timesteps_total: 5000000
|
|
stop:
|
|
time_total_s: 3600
|
|
# TODO(sven, jungong): fix A3C on torch and re-enable.
|
|
frameworks: [ "tf", "tf2" ]
|
|
config:
|
|
num_gpus: 0
|
|
num_workers: 16
|
|
rollout_fragment_length: 20
|
|
vf_loss_coeff: 0.5
|
|
entropy_coeff: 0.01
|
|
gamma: 0.99
|
|
grad_clip: 40.0
|
|
lambda: 1.0
|
|
lr: 0.0001
|
|
observation_filter: NoFilter
|
|
preprocessor_pref: rllib
|
|
model:
|
|
use_lstm: true
|
|
conv_activation: elu
|
|
dim: 42
|
|
grayscale: true
|
|
zero_mean: false
|
|
# Reduced channel depth and kernel size from default.
|
|
conv_filters: [
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
[32, [3, 3], 2],
|
|
]
|