pendulum-impala-tf:
env: Pendulum-v1
run: IMPALA
stop:
episode_reward_mean: -700
timesteps_total: 500000