ray/rllib/tuned_examples/dqn/cartpole-apex.yaml

34 lines
968 B
YAML

# Should reach 100+ reward in 100k-200k ts.
# Note here that with 2 workers, APEX can behave a little unstably
# due to the (static) per-worker-epsilon distribution, which also makes
# evaluation w/o evaluation worker set harder.
# For an epsilon-free/greedy evaluation, use:
# evaluation_interval: 1
# evaluation_config:
# explore: False
cartpole-apex-dqn:
env: CartPole-v0
run: APEX
stop:
episode_reward_mean: 100
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
# Make this work with only 5 CPUs and 0 GPUs:
num_workers: 2
optimizer:
num_replay_buffer_shards: 2
num_gpus: 0
min_iter_time_s: 5
target_network_update_freq: 500
learning_starts: 1000
timesteps_per_iteration: 1000
buffer_size: 20000
model:
fcnet_hiddens: [64]
fcnet_activation: linear
n_step: 3