ray/rllib/tuned_examples/pendulum-sac.yaml
Michael Luo 548df014ec SAC Performance Fixes (#6295)
* SAC Performance Fixes

* Small Changes

* Update sac_model.py

* fix normalize wrapper

* Update test_eager_support.py

Co-authored-by: Eric Liang <ekhliang@gmail.com>
2019-12-20 10:51:25 -08:00

37 lines
1.1 KiB
YAML

# Pendulum SAC can attain -150+ reward in 6-7k
# Configurations are the similar to original softlearning/sac codebase
pendulum_sac:
env: Pendulum-v0
run: SAC
stop:
episode_reward_mean: -150
config:
horizon: 200
soft_horizon: False
Q_model:
hidden_activation: relu
hidden_layer_sizes: [256, 256]
policy_model:
hidden_activation: relu
hidden_layer_sizes: [256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: True
n_step: 1
sample_batch_size: 1
prioritized_replay: False
train_batch_size: 256
target_network_update_freq: 1
timesteps_per_iteration: 1000
learning_starts: 256
exploration_enabled: True
optimization:
actor_learning_rate: 0.0003
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 0
clip_actions: False
normalize_actions: True
evaluation_interval: 1
metrics_smoothing_episodes: 5