cartpole-a2c-microbatch: env: CartPole-v0 run: A2C stop: episode_reward_mean: 150 timesteps_total: 1000000 config: # Works for both torch and tf. framework: tf num_workers: 2 gamma: 0.95 rollout_fragment_length: 20 microbatch_size: 40 train_batch_size: 120