mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Autodetect the number of GPUs when starting Ray. (#1293)
* autodetect * Wed Dec 6 12:46:52 PST 2017 * Wed Dec 6 12:47:54 PST 2017 * Move GPU autodetection into services.py. * Fix capitalization of Nvidia. * Update documentation.
This commit is contained in:
parent
6aae9a12fb
commit
7009538321
4 changed files with 23 additions and 4 deletions
|
@ -32,8 +32,8 @@ If there are GPUs available on the machine, you should specify this with the
|
||||||
|
|
||||||
ray.init(num_cpus=20, num_gpus=2)
|
ray.init(num_cpus=20, num_gpus=2)
|
||||||
|
|
||||||
By default, Ray will use ``psutil.cpu_count()`` to determine the number of CPUs,
|
By default, Ray will use ``psutil.cpu_count()`` to determine the number of CPUs.
|
||||||
and by default the number of GPUs will be zero.
|
Ray will also attempt to automatically determine the number of GPUs.
|
||||||
|
|
||||||
Instead of thinking about the number of "worker" processes on each node, we
|
Instead of thinking about the number of "worker" processes on each node, we
|
||||||
prefer to think in terms of the quantities of CPU and GPU resources on each
|
prefer to think in terms of the quantities of CPU and GPU resources on each
|
||||||
|
|
|
@ -33,7 +33,7 @@ through ``ray.init``, do the following.
|
||||||
|
|
||||||
If the number of CPUs is unspecified, Ray will automatically determine the
|
If the number of CPUs is unspecified, Ray will automatically determine the
|
||||||
number by running ``psutil.cpu_count()``. If the number of GPUs is unspecified,
|
number by running ``psutil.cpu_count()``. If the number of GPUs is unspecified,
|
||||||
Ray will default to 0 GPUs.
|
Ray will attempt to automatically detect the number of GPUs.
|
||||||
|
|
||||||
Specifying a task's CPU and GPU requirements
|
Specifying a task's CPU and GPU requirements
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
|
@ -279,6 +279,20 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, num_retries=5):
|
||||||
"configured properly.")
|
"configured properly.")
|
||||||
|
|
||||||
|
|
||||||
|
def _autodetect_num_gpus():
|
||||||
|
"""Attempt to detect the number of GPUs on this machine.
|
||||||
|
|
||||||
|
TODO(rkn): This currently assumes Nvidia GPUs and Linux.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The number of GPUs if any were detected, otherwise 0.
|
||||||
|
"""
|
||||||
|
proc_gpus_path = "/proc/driver/nvidia/gpus"
|
||||||
|
if os.path.isdir(proc_gpus_path):
|
||||||
|
return len(os.listdir(proc_gpus_path))
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def _compute_version_info():
|
def _compute_version_info():
|
||||||
"""Compute the versions of Python, cloudpickle, pyarrow, and Ray.
|
"""Compute the versions of Python, cloudpickle, pyarrow, and Ray.
|
||||||
|
|
||||||
|
@ -679,6 +693,9 @@ def start_local_scheduler(redis_address,
|
||||||
# By default, use the number of hardware execution threads for the
|
# By default, use the number of hardware execution threads for the
|
||||||
# number of cores.
|
# number of cores.
|
||||||
resources["CPU"] = psutil.cpu_count()
|
resources["CPU"] = psutil.cpu_count()
|
||||||
|
if "GPU" not in resources:
|
||||||
|
# Try to automatically detect the number of GPUs.
|
||||||
|
resources["GPU"] = _autodetect_num_gpus()
|
||||||
print("Starting local scheduler with the following resources: {}."
|
print("Starting local scheduler with the following resources: {}."
|
||||||
.format(resources))
|
.format(resources))
|
||||||
local_scheduler_name, p = ray.local_scheduler.start_local_scheduler(
|
local_scheduler_name, p = ray.local_scheduler.start_local_scheduler(
|
||||||
|
|
|
@ -1224,7 +1224,9 @@ def _init(address_info=None,
|
||||||
num_cpus (int): Number of cpus the user wishes all local schedulers to
|
num_cpus (int): Number of cpus the user wishes all local schedulers to
|
||||||
be configured with.
|
be configured with.
|
||||||
num_gpus (int): Number of gpus the user wishes all local schedulers to
|
num_gpus (int): Number of gpus the user wishes all local schedulers to
|
||||||
be configured with.
|
be configured with. If unspecified, Ray will attempt to autodetect
|
||||||
|
the number of GPUs available on the node (note that autodetection
|
||||||
|
currently only works for Nvidia GPUs).
|
||||||
resources: A dictionary mapping resource names to the quantity of that
|
resources: A dictionary mapping resource names to the quantity of that
|
||||||
resource available.
|
resource available.
|
||||||
num_redis_shards: The number of Redis shards to start in addition to
|
num_redis_shards: The number of Redis shards to start in addition to
|
||||||
|
|
Loading…
Add table
Reference in a new issue