pendulum-sac: env: Pendulum-v0 run: SAC stop: episode_reward_mean: -300 # note that evaluation perf is higher timesteps_total: 10000 config: evaluation_interval: 1 # logged under evaluation/* metric keys soft_horizon: True clip_actions: False normalize_actions: True metrics_smoothing_episodes: 5