# TransformedActionPendulum SAC can attain -150+ reward in 6-7k # Configurations are the similar to original softlearning/sac codebase transformed-actions-pendulum-sac-dummy-torch: env: ray.rllib.examples.env.transformed_action_space_env.TransformedActionPendulum run: SAC stop: episode_reward_mean: -200 timesteps_total: 10000 config: # Works for both torch and tf. seed: 42 framework: torch # Test, whether SAC is able to learn in "distorted" action spaces. env_config: config: low: 300.0 high: 500.0 horizon: 200 soft_horizon: false q_model_config: fcnet_activation: relu fcnet_hiddens: [256, 256] policy_model_config: fcnet_activation: relu fcnet_hiddens: [256, 256] tau: 0.005 target_entropy: auto no_done_at_end: true n_step: 1 rollout_fragment_length: 1 train_batch_size: 256 target_network_update_freq: 1 min_sample_timesteps_per_iteration: 1000 replay_buffer_config: type: MultiAgentPrioritizedReplayBuffer learning_starts: 256 optimization: actor_learning_rate: 0.0003 critic_learning_rate: 0.0003 entropy_learning_rate: 0.0003 num_workers: 0 num_gpus: 0 metrics_smoothing_episodes: 5