2021-09-06 12:14:20 +02:00
|
|
|
# Note here that with < 3 workers, APEX can behave a little unstably
|
2021-05-20 09:27:03 +02:00
|
|
|
# due to the (static) per-worker-epsilon distribution, which also makes
|
|
|
|
# evaluation w/o evaluation worker set harder.
|
|
|
|
# For an epsilon-free/greedy evaluation, use:
|
|
|
|
# evaluation_interval: 1
|
|
|
|
# evaluation_config:
|
|
|
|
# explore: False
|
2022-04-20 08:56:18 -07:00
|
|
|
cartpole-apex-dqn-training-itr:
|
2021-05-20 09:27:03 +02:00
|
|
|
env: CartPole-v0
|
|
|
|
run: APEX
|
|
|
|
stop:
|
2021-09-06 12:14:20 +02:00
|
|
|
episode_reward_mean: 150.0
|
|
|
|
timesteps_total: 250000
|
2021-05-20 09:27:03 +02:00
|
|
|
config:
|
|
|
|
# Works for both torch and tf.
|
|
|
|
framework: tf
|
|
|
|
# Make this work with only 5 CPUs and 0 GPUs:
|
2021-09-06 12:14:20 +02:00
|
|
|
num_workers: 3
|
2021-05-20 09:27:03 +02:00
|
|
|
optimizer:
|
|
|
|
num_replay_buffer_shards: 2
|
2021-09-06 12:14:20 +02:00
|
|
|
|
2021-05-20 09:27:03 +02:00
|
|
|
num_gpus: 0
|
|
|
|
|
2022-01-25 14:16:58 +01:00
|
|
|
min_time_s_per_reporting: 5
|
2021-05-20 09:27:03 +02:00
|
|
|
target_network_update_freq: 500
|
|
|
|
learning_starts: 1000
|
2022-05-02 12:51:14 +02:00
|
|
|
min_sample_timesteps_per_reporting: 1000
|
2021-05-20 09:27:03 +02:00
|
|
|
buffer_size: 20000
|
2022-04-20 08:56:18 -07:00
|
|
|
training_intensity: 4
|