ray/rllib/tuned_examples/regression_tests/pendulum-sac.yaml

10 lines
312 B
YAML

pendulum-sac:
env: Pendulum-v0
run: SAC
stop:
episode_reward_mean: -300 # note that evaluation perf is higher
timesteps_total: 15000
config:
evaluation_interval: 1 # logged under evaluation/* metric keys
soft_horizon: true
metrics_smoothing_episodes: 10