a3c-pongdeterministic-v4: env: PongDeterministic-v4 run: A3C # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: episode_reward_mean: 18.0 timesteps_total: 5000000 stop: time_total_s: 3600 # TODO(sven, jungong): fix A3C on torch and re-enable. frameworks: [ "tf", "tf2" ] config: num_gpus: 0 num_workers: 16 rollout_fragment_length: 20 vf_loss_coeff: 0.5 entropy_coeff: 0.01 gamma: 0.99 grad_clip: 40.0 lambda: 1.0 lr: 0.0001 observation_filter: NoFilter preprocessor_pref: rllib model: use_lstm: true conv_activation: elu dim: 42 grayscale: true zero_mean: false # Reduced channel depth and kernel size from default. conv_filters: [ [32, [3, 3], 2], [32, [3, 3], 2], [32, [3, 3], 2], [32, [3, 3], 2], ]