ray/rllib/tuned_examples/sac/pendulum-sac-fake-gpus.yaml

pendulum-sac-fake-gpus:
      env: Pendulum-v1
      run: SAC
      stop:
          episode_reward_mean: -270
          timesteps_total: 10000
      config:
          # Works for both torch and tf.
          seed: 42
          framework: tf
          horizon: 200
          soft_horizon: false
          Q_model:
            fcnet_activation: relu
            fcnet_hiddens: [ 256, 256 ]
          policy_model:
            fcnet_activation: relu
            fcnet_hiddens: [ 256, 256 ]
          tau: 0.005
          target_entropy: auto
          no_done_at_end: true
          n_step: 1
          rollout_fragment_length: 1
          prioritized_replay: true
          train_batch_size: 256
          target_network_update_freq: 1
          timesteps_per_iteration: 1000
          learning_starts: 256
          num_workers: 0
          metrics_smoothing_episodes: 5

          # 1x batch size (despite 2 GPUs).
          # train_batch_size: 256
          optimization:
            actor_learning_rate: 0.001
            critic_learning_rate: 0.001
            entropy_learning_rate: 0.001

          # Fake 2 GPUs.
          num_gpus: 2
          _fake_gpus: true
[RLlib] Move existing fake multi-GPU learning tests into separate buildkite job. (#18065) 2021-08-31 14:56:53 +02:00			`pendulum-sac-fake-gpus:`
[RLlib] [CI] Deflake longer running RLlib learning tests for off policy algorithms. Fix seeding issue in TransformedAction Environments (#21685) 2022-02-04 05:59:56 -08:00			`env: Pendulum-v1`
			`run: SAC`
			`stop:`
			`episode_reward_mean: -270`
			`timesteps_total: 10000`
			`config:`
			`# Works for both torch and tf.`
			`seed: 42`
			`framework: tf`
			`horizon: 200`
			`soft_horizon: false`
			`Q_model:`
			`fcnet_activation: relu`
			`fcnet_hiddens: [ 256, 256 ]`
			`policy_model:`
			`fcnet_activation: relu`
			`fcnet_hiddens: [ 256, 256 ]`
			`tau: 0.005`
			`target_entropy: auto`
			`no_done_at_end: true`
			`n_step: 1`
			`rollout_fragment_length: 1`
			`prioritized_replay: true`
			`train_batch_size: 256`
			`target_network_update_freq: 1`
			`timesteps_per_iteration: 1000`
			`learning_starts: 256`
			`num_workers: 0`
			`metrics_smoothing_episodes: 5`
[RLlib] Move existing fake multi-GPU learning tests into separate buildkite job. (#18065) 2021-08-31 14:56:53 +02:00
[RLlib] [CI] Deflake longer running RLlib learning tests for off policy algorithms. Fix seeding issue in TransformedAction Environments (#21685) 2022-02-04 05:59:56 -08:00			`# 1x batch size (despite 2 GPUs).`
			`# train_batch_size: 256`
			`optimization:`
			`actor_learning_rate: 0.001`
			`critic_learning_rate: 0.001`
			`entropy_learning_rate: 0.001`
[RLlib] Move existing fake multi-GPU learning tests into separate buildkite job. (#18065) 2021-08-31 14:56:53 +02:00
[RLlib] [CI] Deflake longer running RLlib learning tests for off policy algorithms. Fix seeding issue in TransformedAction Environments (#21685) 2022-02-04 05:59:56 -08:00			`# Fake 2 GPUs.`
			`num_gpus: 2`
			`_fake_gpus: true`