ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml

10 lines
224 B
YAML

pendulum-ddpg:
env: Pendulum-v0
run: DDPG
stop:
episode_reward_mean: -900
timesteps_total: 100000
config:
use_huber: True
clip_rewards: False
exploration_fraction: 0.1