cartpole-ppo:
env: CartPole-v0
run: PPO
stop:
episode_reward_mean: 200
time_total_s: 300
config:
num_workers: 1
batch_mode: complete_episodes