# This can be expected to reach -160 reward within 2.5 timesteps / ~250 seconds on a K40 GPU pendulum-apex-ddpg: env: Pendulum-v1 run: APEX_DDPG stop: episode_reward_mean: -160 config: # Works for both torch and tf. framework: tf use_huber: True clip_rewards: False num_workers: 16 n_step: 1 target_network_update_freq: 50000 tau: 1.0 evaluation_interval: 5 evaluation_duration: 10