cartpole-a2c-microbatch-tf: env: CartPole-v0 run: A2C stop: episode_reward_mean: 100 timesteps_total: 100000 config: use_pytorch: false num_workers: 1 gamma: 0.95 microbatch_size: 50 train_batch_size: 100