ray/rllib/tuned_examples/sac/halfcheetah-pybullet-sac.yaml

37 lines
1 KiB
YAML

halfcheetah-pybullet-sac:
env: HalfCheetahBulletEnv-v0
run: SAC
stop:
episode_reward_mean: 9000
config:
# Works for both torch and tf.
framework: tf
horizon: 1000
soft_horizon: false
Q_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
policy_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: true
n_step: 1
rollout_fragment_length: 1
prioritized_replay: true
train_batch_size: 256
target_network_update_freq: 1
timesteps_per_iteration: 1000
learning_starts: 10000
optimization:
actor_learning_rate: 0.0003
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 1
clip_actions: false
normalize_actions: true
evaluation_interval: 1
metrics_smoothing_episodes: 5