# Note here that with < 3 workers, APEX can behave a little unstably # due to the (static) per-worker-epsilon distribution, which also makes # evaluation w/o evaluation worker set harder. # For an epsilon-free/greedy evaluation, use: # evaluation_interval: 1 # evaluation_config: # explore: False cartpole-apex-dqn: env: CartPole-v0 run: APEX stop: episode_reward_mean: 150.0 timesteps_total: 250000 config: # Works for both torch and tf. framework: tf num_workers: 3 optimizer: num_replay_buffer_shards: 1 # Fake 2 GPUs. num_gpus: 2 _fake_gpus: true