ray/rllib/tuned_examples/halfcheetah-sac.yaml
Eric Liang dd70720578
[rllib] Rename sample_batch_size => rollout_fragment_length (#7503)
* bulk rename

* deprecation warn

* update doc

* update fig

* line length

* rename

* make pytest comptaible

* fix test

* fi sys

* rename

* wip

* fix more

* lint

* update svg

* comments

* lint

* fix use of batch steps
2020-03-14 12:05:04 -07:00

36 lines
1 KiB
YAML

# Our implementation of SAC can reach 9k reward in 400k timesteps
halfcheetah_sac:
env: HalfCheetah-v3
run: SAC
stop:
episode_reward_mean: 9000
config:
horizon: 1000
soft_horizon: False
Q_model:
hidden_activation: relu
hidden_layer_sizes: [256, 256]
policy_model:
hidden_activation: relu
hidden_layer_sizes: [256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: True
n_step: 1
rollout_fragment_length: 1
prioritized_replay: False
train_batch_size: 256
target_network_update_freq: 1
timesteps_per_iteration: 1000
learning_starts: 10000
optimization:
actor_learning_rate: 0.0003
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 0
clip_actions: False
normalize_actions: True
evaluation_interval: 1
metrics_smoothing_episodes: 5