cartpole-sac: env: CartPoleContinuousBulletEnv-v0 run: SAC stop: episode_reward_mean: 100 timesteps_total: 100000 config: # Works for both torch and tf. framework: tf