pendulum-impala-tf: env: Pendulum-v1 run: IMPALA stop: episode_reward_mean: -700 timesteps_total: 500000