2022-04-20 08:56:18 -07:00
|
|
|
# This reaches ~19 reward in < 40 minutes (3M env steps) on a p3.8xlarge AWS instance.
|
2019-12-12 10:57:55 -08:00
|
|
|
# See https://app.wandb.ai/zplizzi/test/runs/ayuuhixr?workspace=user-zplizzi
|
|
|
|
# for training curves.
|
2018-03-04 12:25:25 -08:00
|
|
|
pong-apex:
|
2018-03-11 21:14:38 -07:00
|
|
|
env: PongNoFrameskip-v4
|
2018-03-04 12:25:25 -08:00
|
|
|
run: APEX
|
2022-04-20 08:56:18 -07:00
|
|
|
stop:
|
|
|
|
episode_reward_mean: 19.0
|
|
|
|
timesteps_total: 4000000
|
2018-03-04 12:25:25 -08:00
|
|
|
config:
|
2020-05-27 16:19:13 +02:00
|
|
|
# Works for both torch and tf.
|
|
|
|
framework: tf
|
2019-12-12 10:57:55 -08:00
|
|
|
target_network_update_freq: 20000
|
|
|
|
num_workers: 4
|
|
|
|
num_envs_per_worker: 8
|
|
|
|
lr: .00005
|
|
|
|
train_batch_size: 64
|
2022-05-17 13:43:49 +02:00
|
|
|
replay_buffer_config:
|
|
|
|
type: MultiAgentPrioritizedReplayBuffer
|
|
|
|
capacity: 1000000
|
2018-03-04 12:25:25 -08:00
|
|
|
gamma: 0.99
|
2022-04-20 08:56:18 -07:00
|
|
|
training_intensity: 16
|