mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
17 lines
486 B
YAML
17 lines
486 B
YAML
# This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU
|
|
pendulum-apex-ddpg:
|
|
env: Pendulum-v1
|
|
run: APEX_DDPG
|
|
stop:
|
|
episode_reward_mean: -160
|
|
config:
|
|
# Works for both torch and tf.
|
|
framework: tf
|
|
use_huber: True
|
|
clip_rewards: False
|
|
num_workers: 16
|
|
n_step: 1
|
|
target_network_update_freq: 50000
|
|
tau: 1.0
|
|
evaluation_interval: 5
|
|
evaluation_duration: 10
|