# Should reach 100+ reward in 100k-200k ts. # Note here that with 2 workers, APEX can behave a little unstably # due to the (static) per-worker-epsilon distribution, which also makes # evaluation w/o evaluation worker set harder. # For an epsilon-free/greedy evaluation, use: # evaluation_interval: 1 # evaluation_config: # explore: False cartpole-apex-dqn: env: CartPole-v0 run: APEX stop: episode_reward_mean: 100 timesteps_total: 200000 config: # Works for both torch and tf. framework: tf # Make this work with only 5 CPUs and 0 GPUs: num_workers: 2 optimizer: num_replay_buffer_shards: 2 num_gpus: 0 min_iter_time_s: 5 target_network_update_freq: 500 learning_starts: 1000 timesteps_per_iteration: 1000 buffer_size: 20000 model: fcnet_hiddens: [64] fcnet_activation: linear n_step: 3