ray/rllib/tuned_examples/ddpg/pendulum-ddpg-fake-gpus.yaml

31 lines
764 B
YAML

pendulum-ddpg-fake-gpus:
env: Pendulum-v1
run: DDPG
stop:
episode_reward_mean: -1000
timesteps_total: 40000
config:
# Works for both torch and tf.
seed: 42
framework: tf
actor_hiddens: [64, 64]
critic_hiddens: [64, 64]
n_step: 1
model: {}
gamma: 0.99
exploration_config:
final_scale: 0.02
timesteps_per_iteration: 600
buffer_size: 10000
clip_rewards: false
use_huber: true
learning_starts: 500
train_batch_size: 64
num_workers: 0
worker_side_prioritization: false
actor_lr: 0.0001
critic_lr: 0.0001
# Fake 2 GPUs.
num_gpus: 2
_fake_gpus: true