ray/release/rllib_tests/learning_tests/yaml_files/a3c-pongdeterministic-v4.yaml

36 lines
1 KiB
YAML

a3c-pongdeterministic-v4:
env: PongDeterministic-v4
run: A3C
# Minimum reward and total ts (in given time_total_s) to pass this test.
pass_criteria:
episode_reward_mean: 18.0
timesteps_total: 5000000
stop:
time_total_s: 3600
# TODO(sven, jungong): fix A3C on torch and re-enable.
frameworks: [ "tf", "tf2" ]
config:
num_gpus: 0
num_workers: 16
rollout_fragment_length: 20
vf_loss_coeff: 0.5
entropy_coeff: 0.01
gamma: 0.99
grad_clip: 40.0
lambda: 1.0
lr: 0.0001
observation_filter: NoFilter
preprocessor_pref: rllib
model:
use_lstm: true
conv_activation: elu
dim: 42
grayscale: true
zero_mean: false
# Reduced channel depth and kernel size from default.
conv_filters: [
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
[32, [3, 3], 2],
]