mirror of
https://github.com/vale981/ray
synced 2025-03-07 02:51:39 -05:00
[Doc] Document options method (#10830)
Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
parent
50784e2496
commit
d7d4e1c87b
4 changed files with 139 additions and 61 deletions
|
@ -25,6 +25,10 @@ ray.remote
|
||||||
|
|
||||||
.. autofunction:: ray.remote
|
.. autofunction:: ray.remote
|
||||||
|
|
||||||
|
.. autofunction:: ray.remote_function.RemoteFunction.options
|
||||||
|
|
||||||
|
.. autofunction:: ray.actor.ActorClass.options
|
||||||
|
|
||||||
.. _ray-get-ref:
|
.. _ray-get-ref:
|
||||||
|
|
||||||
ray.get
|
ray.get
|
||||||
|
|
|
@ -402,23 +402,60 @@ class ActorClass:
|
||||||
"""
|
"""
|
||||||
return self._remote(args=args, kwargs=kwargs)
|
return self._remote(args=args, kwargs=kwargs)
|
||||||
|
|
||||||
def options(self, **options):
|
def options(self,
|
||||||
"""Convenience method for creating an actor with options.
|
args=None,
|
||||||
|
kwargs=None,
|
||||||
|
num_cpus=None,
|
||||||
|
num_gpus=None,
|
||||||
|
memory=None,
|
||||||
|
object_store_memory=None,
|
||||||
|
resources=None,
|
||||||
|
accelerator_type=None,
|
||||||
|
max_concurrency=None,
|
||||||
|
max_restarts=None,
|
||||||
|
max_task_retries=None,
|
||||||
|
name=None,
|
||||||
|
lifetime=None,
|
||||||
|
placement_group=None,
|
||||||
|
placement_group_bundle_index=-1):
|
||||||
|
"""Configures and overrides the actor instantiation parameters.
|
||||||
|
|
||||||
Same arguments as Actor._remote(), but returns a wrapped actor class
|
The arguments are the same as those that can be passed
|
||||||
that a non-underscore .remote() can be called on.
|
to :obj:`ray.remote`.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
# The following two calls are equivalent.
|
|
||||||
>>> Actor._remote(num_cpus=4, max_concurrency=8, args=[x, y])
|
.. code-block:: python
|
||||||
>>> Actor.options(num_cpus=4, max_concurrency=8).remote(x, y)
|
|
||||||
|
@ray.remote(num_cpus=2, resources={"CustomResource": 1})
|
||||||
|
class Foo:
|
||||||
|
def method(self):
|
||||||
|
return 1
|
||||||
|
# Class Foo will require 1 cpu instead of 2.
|
||||||
|
# It will also require no custom resources.
|
||||||
|
Bar = Foo.options(num_cpus=1, resources=None)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
actor_cls = self
|
actor_cls = self
|
||||||
|
|
||||||
class ActorOptionWrapper:
|
class ActorOptionWrapper:
|
||||||
def remote(self, *args, **kwargs):
|
def remote(self, *args, **kwargs):
|
||||||
return actor_cls._remote(args=args, kwargs=kwargs, **options)
|
return actor_cls._remote(
|
||||||
|
args=args,
|
||||||
|
kwargs=kwargs,
|
||||||
|
num_cpus=num_cpus,
|
||||||
|
num_gpus=num_gpus,
|
||||||
|
memory=memory,
|
||||||
|
object_store_memory=object_store_memory,
|
||||||
|
resources=resources,
|
||||||
|
accelerator_type=accelerator_type,
|
||||||
|
max_concurrency=max_concurrency,
|
||||||
|
max_restarts=max_restarts,
|
||||||
|
max_task_retries=max_task_retries,
|
||||||
|
name=name,
|
||||||
|
lifetime=lifetime,
|
||||||
|
placement_group=placement_group,
|
||||||
|
placement_group_bundle_index=placement_group_bundle_index)
|
||||||
|
|
||||||
return ActorOptionWrapper()
|
return ActorOptionWrapper()
|
||||||
|
|
||||||
|
|
|
@ -122,23 +122,53 @@ class RemoteFunction:
|
||||||
num_gpus=num_gpus,
|
num_gpus=num_gpus,
|
||||||
resources=resources)
|
resources=resources)
|
||||||
|
|
||||||
def options(self, **options):
|
def options(self,
|
||||||
"""Convenience method for executing a task with options.
|
args=None,
|
||||||
|
kwargs=None,
|
||||||
|
num_returns=None,
|
||||||
|
num_cpus=None,
|
||||||
|
num_gpus=None,
|
||||||
|
memory=None,
|
||||||
|
object_store_memory=None,
|
||||||
|
accelerator_type=None,
|
||||||
|
resources=None,
|
||||||
|
max_retries=None,
|
||||||
|
placement_group=None,
|
||||||
|
placement_group_bundle_index=-1,
|
||||||
|
name=""):
|
||||||
|
"""Configures and overrides the task invocation parameters.
|
||||||
|
|
||||||
Same arguments as func._remote(), but returns a wrapped function
|
Options are overlapping values provided by :obj:`ray.remote`.
|
||||||
that a non-underscore .remote() can be called on.
|
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
# The following two calls are equivalent.
|
|
||||||
>>> func._remote(num_cpus=4, args=[x, y])
|
.. code-block:: python
|
||||||
>>> func.options(num_cpus=4).remote(x, y)
|
|
||||||
|
@ray.remote(num_gpus=1, max_calls=1, num_returns=2)
|
||||||
|
def f():
|
||||||
|
return 1, 2
|
||||||
|
# Task f will require 2 gpus instead of 1.
|
||||||
|
g = f.options(num_gpus=2, max_calls=None)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
func_cls = self
|
func_cls = self
|
||||||
|
|
||||||
class FuncWrapper:
|
class FuncWrapper:
|
||||||
def remote(self, *args, **kwargs):
|
def remote(self, *args, **kwargs):
|
||||||
return func_cls._remote(args=args, kwargs=kwargs, **options)
|
return func_cls._remote(
|
||||||
|
args=args,
|
||||||
|
kwargs=kwargs,
|
||||||
|
num_returns=num_returns,
|
||||||
|
num_cpus=num_cpus,
|
||||||
|
num_gpus=num_gpus,
|
||||||
|
memory=memory,
|
||||||
|
object_store_memory=object_store_memory,
|
||||||
|
accelerator_type=accelerator_type,
|
||||||
|
resources=resources,
|
||||||
|
max_retries=max_retries,
|
||||||
|
placement_group=placement_group,
|
||||||
|
placement_group_bundle_index=placement_group_bundle_index,
|
||||||
|
name=name)
|
||||||
|
|
||||||
return FuncWrapper()
|
return FuncWrapper()
|
||||||
|
|
||||||
|
|
|
@ -1723,7 +1723,7 @@ def make_decorator(num_returns=None,
|
||||||
|
|
||||||
|
|
||||||
def remote(*args, **kwargs):
|
def remote(*args, **kwargs):
|
||||||
"""Define a remote function or an actor class.
|
"""Defines a remote function or an actor class.
|
||||||
|
|
||||||
This can be used with no arguments to define a remote function or actor as
|
This can be used with no arguments to define a remote function or actor as
|
||||||
follows:
|
follows:
|
||||||
|
@ -1739,50 +1739,7 @@ def remote(*args, **kwargs):
|
||||||
def method(self):
|
def method(self):
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
It can also be used with specific keyword arguments:
|
It can also be used with specific keyword arguments as follows:
|
||||||
|
|
||||||
* **num_returns:** This is only for *remote functions*. It specifies
|
|
||||||
the number of object refs returned by the remote function invocation.
|
|
||||||
* **num_cpus:** The quantity of CPU cores to reserve for this task or for
|
|
||||||
the lifetime of the actor.
|
|
||||||
* **num_gpus:** The quantity of GPUs to reserve for this task or for the
|
|
||||||
lifetime of the actor.
|
|
||||||
* **resources:** The quantity of various custom resources to reserve for
|
|
||||||
this task or for the lifetime of the actor. This is a dictionary mapping
|
|
||||||
strings (resource names) to numbers.
|
|
||||||
* **accelerator_type:** If specified, requires that the task or actor run
|
|
||||||
on a node with the specified type of accelerator. See `ray.accelerators`
|
|
||||||
for accelerator types.
|
|
||||||
* **max_calls:** Only for *remote functions*. This specifies the maximum
|
|
||||||
number of times that a given worker can execute the given remote function
|
|
||||||
before it must exit (this can be used to address memory leaks in
|
|
||||||
third-party libraries or to reclaim resources that cannot easily be
|
|
||||||
released, e.g., GPU memory that was acquired by TensorFlow). By
|
|
||||||
default this is infinite.
|
|
||||||
* **max_restarts**: Only for *actors*. This specifies the maximum
|
|
||||||
number of times that the actor should be restarted when it dies
|
|
||||||
unexpectedly. The minimum valid value is 0 (default), which indicates
|
|
||||||
that the actor doesn't need to be restarted. A value of -1
|
|
||||||
indicates that an actor should be restarted indefinitely.
|
|
||||||
* **max_task_retries**: Only for *actors*. How many times to retry an actor
|
|
||||||
task if the task fails due to a system error, e.g., the actor has died.
|
|
||||||
If set to -1, the system will retry the failed task until the task
|
|
||||||
succeeds, or the actor has reached its max_restarts limit. If set to n >
|
|
||||||
0, the system will retry the failed task up to n times, after which the
|
|
||||||
task will throw a `RayActorError` exception upon `ray.get`. Note that
|
|
||||||
Python exceptions are not considered system errors and will not trigger
|
|
||||||
retries.
|
|
||||||
* **max_retries**: Only for *remote functions*. This specifies the maximum
|
|
||||||
number of times that the remote function should be rerun when the worker
|
|
||||||
process executing it crashes unexpectedly. The minimum valid value is 0,
|
|
||||||
the default is 4 (default), and a value of -1 indicates infinite retries.
|
|
||||||
* **placement_group**: the placement group this task belongs to,
|
|
||||||
or None if it doesn't belong to any group.
|
|
||||||
* **placement_group_bundle_index**: the index of the bundle
|
|
||||||
if the task belongs to a placement group, which may be -1 to indicate
|
|
||||||
any available bundle.
|
|
||||||
|
|
||||||
This can be done as follows:
|
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -1816,6 +1773,56 @@ def remote(*args, **kwargs):
|
||||||
in Python is deleted, which will cause them to complete any outstanding
|
in Python is deleted, which will cause them to complete any outstanding
|
||||||
work and then shut down. If you want to kill them immediately, you can
|
work and then shut down. If you want to kill them immediately, you can
|
||||||
also call ``ray.kill(actor)``.
|
also call ``ray.kill(actor)``.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_returns (int): This is only for *remote functions*. It specifies
|
||||||
|
the number of object refs returned by
|
||||||
|
the remote function invocation.
|
||||||
|
num_cpus (float): The quantity of CPU cores to reserve
|
||||||
|
for this task or for the lifetime of the actor.
|
||||||
|
num_gpus (int): The quantity of GPUs to reserve
|
||||||
|
for this task or for the lifetime of the actor.
|
||||||
|
resources (Dict[str, float]): The quantity of various custom resources
|
||||||
|
to reserve for this task or for the lifetime of the actor.
|
||||||
|
This is a dictionary mapping strings (resource names) to floats.
|
||||||
|
accelerator_type: If specified, requires that the task or actor run
|
||||||
|
on a node with the specified type of accelerator.
|
||||||
|
See `ray.accelerators` for accelerator types.
|
||||||
|
max_calls (int): Only for *remote functions*. This specifies the
|
||||||
|
maximum number of times that a given worker can execute
|
||||||
|
the given remote function before it must exit
|
||||||
|
(this can be used to address memory leaks in third-party
|
||||||
|
libraries or to reclaim resources that cannot easily be
|
||||||
|
released, e.g., GPU memory that was acquired by TensorFlow).
|
||||||
|
By default this is infinite.
|
||||||
|
max_restarts (int): Only for *actors*. This specifies the maximum
|
||||||
|
number of times that the actor should be restarted when it dies
|
||||||
|
unexpectedly. The minimum valid value is 0 (default),
|
||||||
|
which indicates that the actor doesn't need to be restarted.
|
||||||
|
A value of -1 indicates that an actor should be restarted
|
||||||
|
indefinitely.
|
||||||
|
max_task_retries (int): Only for *actors*. How many times to
|
||||||
|
retry an actor task if the task fails due to a system error,
|
||||||
|
e.g., the actor has died. If set to -1, the system will
|
||||||
|
retry the failed task until the task succeeds, or the actor
|
||||||
|
has reached its max_restarts limit. If set to `n > 0`, the
|
||||||
|
system will retry the failed task up to n times, after which the
|
||||||
|
task will throw a `RayActorError` exception upon :obj:`ray.get`.
|
||||||
|
Note that Python exceptions are not considered system errors
|
||||||
|
and will not trigger retries.
|
||||||
|
max_retries (int): Only for *remote functions*. This specifies
|
||||||
|
the maximum number of times that the remote function
|
||||||
|
should be rerun when the worker process executing it
|
||||||
|
crashes unexpectedly. The minimum valid value is 0,
|
||||||
|
the default is 4 (default), and a value of -1 indicates
|
||||||
|
infinite retries.
|
||||||
|
placement_group (:obj:`PlacementGroup`): The placement group
|
||||||
|
this task belongs to, or ``None`` if it doesn't belong
|
||||||
|
to any group.
|
||||||
|
placement_group_bundle_index (int): The index of the bundle
|
||||||
|
if the task belongs to a placement group, which may be
|
||||||
|
-1 to indicate any available bundle.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
worker = global_worker
|
worker = global_worker
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue