# Given a SAC-generated offline file generated via: # rllib train -f tuned_examples/sac/pendulum-sac.yaml --no-ray-ui # Pendulum CQL can attain ~ -300 reward in 10k from that file. pendulum-cql: env: Pendulum-v0 run: CQL stop: episode_reward_mean: -300 #timesteps_total: 10000 config: # Works for both torch and tf. framework: tf # Use one or more offline files or "input: sampler" for online learning. input: ["/your/json/file/here"] horizon: 200 soft_horizon: true Q_model: fcnet_activation: relu fcnet_hiddens: [256, 256] policy_model: fcnet_activation: relu fcnet_hiddens: [256, 256] tau: 0.005 target_entropy: auto no_done_at_end: true n_step: 3 rollout_fragment_length: 1 prioritized_replay: false train_batch_size: 256 target_network_update_freq: 1 timesteps_per_iteration: 1000 learning_starts: 256 optimization: actor_learning_rate: 0.0003 critic_learning_rate: 0.0003 entropy_learning_rate: 0.0003 num_workers: 0 num_gpus: 1 clip_actions: False normalize_actions: true evaluation_num_workers: 1 evaluation_interval: 1 metrics_smoothing_episodes: 5 bc_iters: 0 # Evaluate in an actual environment. evaluation_config: input: sampler explore: False