ray/rllib/tuned_examples/apex_dqn/cartpole-apex-dqn.yaml

# Note here that with < 3 workers, APEX can behave a little unstably
# due to the (static) per-worker-epsilon distribution, which also makes
# evaluation w/o evaluation worker set harder.
# For an epsilon-free/greedy evaluation, use:
# evaluation_interval: 1
# evaluation_config:
#     explore: False
cartpole-apex-dqn-training-itr:
    env: CartPole-v0
    run: APEX
    stop:
        episode_reward_mean: 150.0
        timesteps_total: 250000
    config:
        # Works for both torch and tf.
        framework: tf
        # Make this work with only 5 CPUs and 0 GPUs:
        num_workers: 3
        optimizer:
          num_replay_buffer_shards: 2
        replay_buffer_config:
          type: MultiAgentPrioritizedReplayBuffer
          capacity: 20000
          learning_starts: 1000

        num_gpus: 0

        min_time_s_per_iteration: 5
        target_network_update_freq: 500
        min_sample_timesteps_per_iteration: 1000
        training_intensity: 4