mirror of
https://github.com/vale981/ray
synced 2025-03-10 13:26:39 -04:00
35 lines
968 B
YAML
35 lines
968 B
YAML
![]() |
# Should reach 100+ reward in 100k-200k ts.
|
||
|
# Note here that with 2 workers, APEX can behave a little unstably
|
||
|
# due to the (static) per-worker-epsilon distribution, which also makes
|
||
|
# evaluation w/o evaluation worker set harder.
|
||
|
# For an epsilon-free/greedy evaluation, use:
|
||
|
# evaluation_interval: 1
|
||
|
# evaluation_config:
|
||
|
# explore: False
|
||
|
cartpole-apex-dqn:
|
||
|
env: CartPole-v0
|
||
|
run: APEX
|
||
|
stop:
|
||
|
episode_reward_mean: 100
|
||
|
timesteps_total: 200000
|
||
|
config:
|
||
|
# Works for both torch and tf.
|
||
|
framework: tf
|
||
|
|
||
|
# Make this work with only 5 CPUs and 0 GPUs:
|
||
|
num_workers: 2
|
||
|
optimizer:
|
||
|
num_replay_buffer_shards: 2
|
||
|
num_gpus: 0
|
||
|
|
||
|
min_iter_time_s: 5
|
||
|
target_network_update_freq: 500
|
||
|
learning_starts: 1000
|
||
|
timesteps_per_iteration: 1000
|
||
|
buffer_size: 20000
|
||
|
|
||
|
model:
|
||
|
fcnet_hiddens: [64]
|
||
|
fcnet_activation: linear
|
||
|
n_step: 3
|