cartpole-ppo: env: CartPole-v0 run: PPO stop: episode_reward_mean: 150 timesteps_total: 100000 config: num_workers: 1 batch_mode: complete_episodes observation_filter: MeanStdFilter