ray/rllib/tuned_examples/ddpg/pendulum-apex-ddpg.yaml

17 lines
490 B
YAML

# This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU
pendulum-apex-ddpg:
env: Pendulum-v0
run: APEX_DDPG
stop:
episode_reward_mean: -160
config:
# Works for both torch and tf.
framework: tf
use_huber: True
clip_rewards: False
num_workers: 16
n_step: 1
target_network_update_freq: 50000
tau: 1.0
evaluation_interval: 5
evaluation_num_episodes: 10