cartpole-a3c: env: CartPole-v0 run: A3C stop: episode_reward_mean: 150 timesteps_total: 200000 config: # Works for both torch and tf. framework: tf num_workers: 1 gamma: 0.95