ray/rllib/tuned_examples/apex_dqn/cartpole-apex-dqn-fake-gpus.yaml

23 lines
656 B
YAML

# Note here that with < 3 workers, APEX can behave a little unstably
# due to the (static) per-worker-epsilon distribution, which also makes
# evaluation w/o evaluation worker set harder.
# For an epsilon-free/greedy evaluation, use:
# evaluation_interval: 1
# evaluation_config:
# explore: False
cartpole-apex-dqn:
env: CartPole-v0
run: APEX
stop:
episode_reward_mean: 150.0
timesteps_total: 250000
config:
# Works for both torch and tf.
framework: tf
num_workers: 3
optimizer:
num_replay_buffer_shards: 1
# Fake 2 GPUs.
num_gpus: 2
_fake_gpus: true