pendulum-td3: env: Pendulum-v0 run: TD3 stop: episode_reward_mean: -900 timesteps_total: 100000