[rllib] Don't use get_gpu_ids() in ppo

This lets the num_gpus config work properly even when not using tune, since the gpu ids won't be set by ray in that case.
2025-03-06 02:21:39 -05:00 · 2018-08-01 16:25:11 -07:00 · 2018-08-01 16:25:11 -07:00 · a630e332f3
commit a630e332f3
parent d9a36c4e39
2 changed files with 5 additions and 4 deletions
--- a/python/ray/rllib/agents/ppo/ppo.py
+++ b/python/ray/rllib/agents/ppo/ppo.py
@ -91,8 +91,9 @@ class PPOAgent(Agent):
                    "sgd_batch_size": self.config["sgd_batchsize"],
                    "sgd_stepsize": self.config["sgd_stepsize"],
                    "num_sgd_iter": self.config["num_sgd_iter"],
+                    "num_gpus": self.config["num_gpus"],
                    "timesteps_per_batch": self.config["timesteps_per_batch"],
-                    "standardize_fields": ["advantages"]
+                    "standardize_fields": ["advantages"],
                })

    def _train(self):
--- a/python/ray/rllib/optimizers/multi_gpu_optimizer.py
+++ b/python/ray/rllib/optimizers/multi_gpu_optimizer.py
@ -35,16 +35,16 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
              sgd_stepsize=5e-5,
              num_sgd_iter=10,
              timesteps_per_batch=1024,
+              num_gpus=0,
              standardize_fields=[]):
        self.batch_size = sgd_batch_size
        self.sgd_stepsize = sgd_stepsize
        self.num_sgd_iter = num_sgd_iter
        self.timesteps_per_batch = timesteps_per_batch
-        gpu_ids = ray.get_gpu_ids()
-        if not gpu_ids:
+        if not num_gpus:
            self.devices = ["/cpu:0"]
        else:
-            self.devices = ["/gpu:{}".format(i) for i in range(len(gpu_ids))]
+            self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)]
        self.batch_size = int(sgd_batch_size / len(self.devices)) * len(
            self.devices)
        assert self.batch_size % len(self.devices) == 0