ray/rllib/tuned_examples/cql/pendulum-cql.yaml
Michael Luo 4cbe13cdfd
[RLlib] CQL loss fn fixes, MuJoCo + Pendulum benchmarks, offline-RL example script w/ json file. (#15603)
Co-authored-by: Sven Mika <sven@anyscale.io>
Co-authored-by: sven1977 <svenmika1977@gmail.com>
2021-05-04 19:06:19 +02:00

45 lines
1.3 KiB
YAML

# Pendulum SAC can attain -150+ reward in 6-7k
# Configurations are the similar to original softlearning/sac codebase
pendulum-cql:
env: Pendulum-v0
run: CQL
stop:
episode_reward_mean: -300
#timesteps_total: 10000
config:
# Works for both torch and tf.
framework: torch
#input: sampler
horizon: 200
soft_horizon: true
Q_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
policy_model:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
tau: 0.005
target_entropy: auto
no_done_at_end: true
n_step: 3
rollout_fragment_length: 1
prioritized_replay: false
train_batch_size: 256
target_network_update_freq: 1
timesteps_per_iteration: 1000
learning_starts: 256
optimization:
actor_learning_rate: 0.0003
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 1
clip_actions: False
normalize_actions: true
evaluation_num_workers: 1
evaluation_interval: 1
metrics_smoothing_episodes: 5
bc_iters: 0
evaluation_config:
input: sampler