pendulum-ddpg:
env: Pendulum-v0
run: DDPG
stop:
episode_reward_mean: -900
timesteps_total: 100000
config:
use_huber: True
clip_rewards: False