pendulum-sac-fake-gpus: env: Pendulum-v1 run: SAC stop: episode_reward_mean: -700 training_iteration: 200 config: # Works for both torch and tf. framework: tf horizon: 200 soft_horizon: true Q_model: fcnet_activation: relu fcnet_hiddens: [256, 256] policy_model: fcnet_activation: relu fcnet_hiddens: [256, 256] tau: 0.005 target_entropy: auto n_step: 1 rollout_fragment_length: 1 prioritized_replay: true target_network_update_freq: 1 timesteps_per_iteration: 1000 learning_starts: 256 num_workers: 0 metrics_smoothing_episodes: 5 # 1x batch size (despite 2 GPUs). # train_batch_size: 256 optimization: actor_learning_rate: 0.001 critic_learning_rate: 0.001 entropy_learning_rate: 0.001 # Fake 2 GPUs. num_gpus: 2 _fake_gpus: true