ray/rllib/tuned_examples/sac/pendulum-sac-fake-gpus.yaml

42 lines
1.2 KiB
YAML

pendulum-sac-fake-gpus:
env: Pendulum-v1
run: SAC
stop:
episode_reward_mean: -270
timesteps_total: 10000
config:
# Works for both torch and tf.
seed: 42
framework: tf
horizon: 200
soft_horizon: false
q_model_config:
fcnet_activation: relu
fcnet_hiddens: [ 256, 256 ]
policy_model_config:
fcnet_activation: relu
fcnet_hiddens: [ 256, 256 ]
tau: 0.005
target_entropy: auto
no_done_at_end: true
n_step: 1
rollout_fragment_length: 1
train_batch_size: 256
target_network_update_freq: 1
min_sample_timesteps_per_iteration: 1000
replay_buffer_config:
type: MultiAgentPrioritizedReplayBuffer
learning_starts: 256
num_workers: 0
metrics_smoothing_episodes: 5
# 1x batch size (despite 2 GPUs).
# train_batch_size: 256
optimization:
actor_learning_rate: 0.001
critic_learning_rate: 0.001
entropy_learning_rate: 0.001
# Fake 2 GPUs.
num_gpus: 2
_fake_gpus: true