pendulum-sac-fake-gpus:
      env: Pendulum-v1
      run: SAC
      stop:
          episode_reward_mean: -270
          timesteps_total: 10000
      config:
          # Works for both torch and tf.
          seed: 42
          framework: tf
          horizon: 200
          soft_horizon: false
          q_model_config:
            fcnet_activation: relu
            fcnet_hiddens: [ 256, 256 ]
          policy_model_config:
            fcnet_activation: relu
            fcnet_hiddens: [ 256, 256 ]
          tau: 0.005
          target_entropy: auto
          no_done_at_end: true
          n_step: 1
          rollout_fragment_length: 1
          train_batch_size: 256
          target_network_update_freq: 1
          min_sample_timesteps_per_iteration: 1000
          replay_buffer_config:
            type: MultiAgentPrioritizedReplayBuffer
            learning_starts: 256
          num_workers: 0
          metrics_smoothing_episodes: 5

          # 1x batch size (despite 2 GPUs).
          # train_batch_size: 256
          optimization:
            actor_learning_rate: 0.001
            critic_learning_rate: 0.001
            entropy_learning_rate: 0.001

          # Fake 2 GPUs.
          num_gpus: 2
          _fake_gpus: true