[rllib] Don't use get_gpu_ids() in ppo

This lets the num_gpus config work properly even when not using tune, since the gpu ids won't be set by ray in that case.
This commit is contained in:
Eric Liang 2018-08-01 16:25:11 -07:00 committed by GitHub
parent d9a36c4e39
commit a630e332f3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 4 deletions

View file

@ -91,8 +91,9 @@ class PPOAgent(Agent):
"sgd_batch_size": self.config["sgd_batchsize"],
"sgd_stepsize": self.config["sgd_stepsize"],
"num_sgd_iter": self.config["num_sgd_iter"],
"num_gpus": self.config["num_gpus"],
"timesteps_per_batch": self.config["timesteps_per_batch"],
"standardize_fields": ["advantages"]
"standardize_fields": ["advantages"],
})
def _train(self):

View file

@ -35,16 +35,16 @@ class LocalMultiGPUOptimizer(PolicyOptimizer):
sgd_stepsize=5e-5,
num_sgd_iter=10,
timesteps_per_batch=1024,
num_gpus=0,
standardize_fields=[]):
self.batch_size = sgd_batch_size
self.sgd_stepsize = sgd_stepsize
self.num_sgd_iter = num_sgd_iter
self.timesteps_per_batch = timesteps_per_batch
gpu_ids = ray.get_gpu_ids()
if not gpu_ids:
if not num_gpus:
self.devices = ["/cpu:0"]
else:
self.devices = ["/gpu:{}".format(i) for i in range(len(gpu_ids))]
self.devices = ["/gpu:{}".format(i) for i in range(num_gpus)]
self.batch_size = int(sgd_batch_size / len(self.devices)) * len(
self.devices)
assert self.batch_size % len(self.devices) == 0