cartpole-appo: env: CartPole-v0 run: APPO stop: episode_reward_mean: 150 timesteps_total: 200000 config: # Works for both torch and tf. framework: tf rollout_fragment_length: 10 train_batch_size: 10 num_envs_per_worker: 5 num_workers: 1 num_gpus: 0 vtrace: false