sac-halfcheetahbulletenv-v0: env: HalfCheetahBulletEnv-v0 run: SAC # Minimum reward and total ts (in given time_total_s) to pass this test. pass_criteria: episode_reward_mean: 400.0 timesteps_total: 200000 stop: time_total_s: 1800 config: horizon: 1000 soft_horizon: false q_model_config: fcnet_activation: relu fcnet_hiddens: [256, 256] policy_model_config: fcnet_activation: relu fcnet_hiddens: [256, 256] tau: 0.005 target_entropy: auto no_done_at_end: false n_step: 3 rollout_fragment_length: 1 train_batch_size: 256 target_network_update_freq: 1 min_sample_timesteps_per_iteration: 1000 replay_buffer_config: learning_starts: 10000 type: MultiAgentPrioritizedReplayBuffer optimization: actor_learning_rate: 0.0003 critic_learning_rate: 0.0003 entropy_learning_rate: 0.0003 num_workers: 0 num_gpus: 1 metrics_smoothing_episodes: 5