mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[Doc] Document options method (#10830)
Co-authored-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
parent
50784e2496
commit
d7d4e1c87b
4 changed files with 139 additions and 61 deletions
|
@ -25,6 +25,10 @@ ray.remote
|
|||
|
||||
.. autofunction:: ray.remote
|
||||
|
||||
.. autofunction:: ray.remote_function.RemoteFunction.options
|
||||
|
||||
.. autofunction:: ray.actor.ActorClass.options
|
||||
|
||||
.. _ray-get-ref:
|
||||
|
||||
ray.get
|
||||
|
|
|
@ -402,23 +402,60 @@ class ActorClass:
|
|||
"""
|
||||
return self._remote(args=args, kwargs=kwargs)
|
||||
|
||||
def options(self, **options):
|
||||
"""Convenience method for creating an actor with options.
|
||||
def options(self,
|
||||
args=None,
|
||||
kwargs=None,
|
||||
num_cpus=None,
|
||||
num_gpus=None,
|
||||
memory=None,
|
||||
object_store_memory=None,
|
||||
resources=None,
|
||||
accelerator_type=None,
|
||||
max_concurrency=None,
|
||||
max_restarts=None,
|
||||
max_task_retries=None,
|
||||
name=None,
|
||||
lifetime=None,
|
||||
placement_group=None,
|
||||
placement_group_bundle_index=-1):
|
||||
"""Configures and overrides the actor instantiation parameters.
|
||||
|
||||
Same arguments as Actor._remote(), but returns a wrapped actor class
|
||||
that a non-underscore .remote() can be called on.
|
||||
The arguments are the same as those that can be passed
|
||||
to :obj:`ray.remote`.
|
||||
|
||||
Examples:
|
||||
# The following two calls are equivalent.
|
||||
>>> Actor._remote(num_cpus=4, max_concurrency=8, args=[x, y])
|
||||
>>> Actor.options(num_cpus=4, max_concurrency=8).remote(x, y)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ray.remote(num_cpus=2, resources={"CustomResource": 1})
|
||||
class Foo:
|
||||
def method(self):
|
||||
return 1
|
||||
# Class Foo will require 1 cpu instead of 2.
|
||||
# It will also require no custom resources.
|
||||
Bar = Foo.options(num_cpus=1, resources=None)
|
||||
"""
|
||||
|
||||
actor_cls = self
|
||||
|
||||
class ActorOptionWrapper:
|
||||
def remote(self, *args, **kwargs):
|
||||
return actor_cls._remote(args=args, kwargs=kwargs, **options)
|
||||
return actor_cls._remote(
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
num_cpus=num_cpus,
|
||||
num_gpus=num_gpus,
|
||||
memory=memory,
|
||||
object_store_memory=object_store_memory,
|
||||
resources=resources,
|
||||
accelerator_type=accelerator_type,
|
||||
max_concurrency=max_concurrency,
|
||||
max_restarts=max_restarts,
|
||||
max_task_retries=max_task_retries,
|
||||
name=name,
|
||||
lifetime=lifetime,
|
||||
placement_group=placement_group,
|
||||
placement_group_bundle_index=placement_group_bundle_index)
|
||||
|
||||
return ActorOptionWrapper()
|
||||
|
||||
|
|
|
@ -122,23 +122,53 @@ class RemoteFunction:
|
|||
num_gpus=num_gpus,
|
||||
resources=resources)
|
||||
|
||||
def options(self, **options):
|
||||
"""Convenience method for executing a task with options.
|
||||
def options(self,
|
||||
args=None,
|
||||
kwargs=None,
|
||||
num_returns=None,
|
||||
num_cpus=None,
|
||||
num_gpus=None,
|
||||
memory=None,
|
||||
object_store_memory=None,
|
||||
accelerator_type=None,
|
||||
resources=None,
|
||||
max_retries=None,
|
||||
placement_group=None,
|
||||
placement_group_bundle_index=-1,
|
||||
name=""):
|
||||
"""Configures and overrides the task invocation parameters.
|
||||
|
||||
Same arguments as func._remote(), but returns a wrapped function
|
||||
that a non-underscore .remote() can be called on.
|
||||
Options are overlapping values provided by :obj:`ray.remote`.
|
||||
|
||||
Examples:
|
||||
# The following two calls are equivalent.
|
||||
>>> func._remote(num_cpus=4, args=[x, y])
|
||||
>>> func.options(num_cpus=4).remote(x, y)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
@ray.remote(num_gpus=1, max_calls=1, num_returns=2)
|
||||
def f():
|
||||
return 1, 2
|
||||
# Task f will require 2 gpus instead of 1.
|
||||
g = f.options(num_gpus=2, max_calls=None)
|
||||
"""
|
||||
|
||||
func_cls = self
|
||||
|
||||
class FuncWrapper:
|
||||
def remote(self, *args, **kwargs):
|
||||
return func_cls._remote(args=args, kwargs=kwargs, **options)
|
||||
return func_cls._remote(
|
||||
args=args,
|
||||
kwargs=kwargs,
|
||||
num_returns=num_returns,
|
||||
num_cpus=num_cpus,
|
||||
num_gpus=num_gpus,
|
||||
memory=memory,
|
||||
object_store_memory=object_store_memory,
|
||||
accelerator_type=accelerator_type,
|
||||
resources=resources,
|
||||
max_retries=max_retries,
|
||||
placement_group=placement_group,
|
||||
placement_group_bundle_index=placement_group_bundle_index,
|
||||
name=name)
|
||||
|
||||
return FuncWrapper()
|
||||
|
||||
|
|
|
@ -1723,7 +1723,7 @@ def make_decorator(num_returns=None,
|
|||
|
||||
|
||||
def remote(*args, **kwargs):
|
||||
"""Define a remote function or an actor class.
|
||||
"""Defines a remote function or an actor class.
|
||||
|
||||
This can be used with no arguments to define a remote function or actor as
|
||||
follows:
|
||||
|
@ -1739,50 +1739,7 @@ def remote(*args, **kwargs):
|
|||
def method(self):
|
||||
return 1
|
||||
|
||||
It can also be used with specific keyword arguments:
|
||||
|
||||
* **num_returns:** This is only for *remote functions*. It specifies
|
||||
the number of object refs returned by the remote function invocation.
|
||||
* **num_cpus:** The quantity of CPU cores to reserve for this task or for
|
||||
the lifetime of the actor.
|
||||
* **num_gpus:** The quantity of GPUs to reserve for this task or for the
|
||||
lifetime of the actor.
|
||||
* **resources:** The quantity of various custom resources to reserve for
|
||||
this task or for the lifetime of the actor. This is a dictionary mapping
|
||||
strings (resource names) to numbers.
|
||||
* **accelerator_type:** If specified, requires that the task or actor run
|
||||
on a node with the specified type of accelerator. See `ray.accelerators`
|
||||
for accelerator types.
|
||||
* **max_calls:** Only for *remote functions*. This specifies the maximum
|
||||
number of times that a given worker can execute the given remote function
|
||||
before it must exit (this can be used to address memory leaks in
|
||||
third-party libraries or to reclaim resources that cannot easily be
|
||||
released, e.g., GPU memory that was acquired by TensorFlow). By
|
||||
default this is infinite.
|
||||
* **max_restarts**: Only for *actors*. This specifies the maximum
|
||||
number of times that the actor should be restarted when it dies
|
||||
unexpectedly. The minimum valid value is 0 (default), which indicates
|
||||
that the actor doesn't need to be restarted. A value of -1
|
||||
indicates that an actor should be restarted indefinitely.
|
||||
* **max_task_retries**: Only for *actors*. How many times to retry an actor
|
||||
task if the task fails due to a system error, e.g., the actor has died.
|
||||
If set to -1, the system will retry the failed task until the task
|
||||
succeeds, or the actor has reached its max_restarts limit. If set to n >
|
||||
0, the system will retry the failed task up to n times, after which the
|
||||
task will throw a `RayActorError` exception upon `ray.get`. Note that
|
||||
Python exceptions are not considered system errors and will not trigger
|
||||
retries.
|
||||
* **max_retries**: Only for *remote functions*. This specifies the maximum
|
||||
number of times that the remote function should be rerun when the worker
|
||||
process executing it crashes unexpectedly. The minimum valid value is 0,
|
||||
the default is 4 (default), and a value of -1 indicates infinite retries.
|
||||
* **placement_group**: the placement group this task belongs to,
|
||||
or None if it doesn't belong to any group.
|
||||
* **placement_group_bundle_index**: the index of the bundle
|
||||
if the task belongs to a placement group, which may be -1 to indicate
|
||||
any available bundle.
|
||||
|
||||
This can be done as follows:
|
||||
It can also be used with specific keyword arguments as follows:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -1816,6 +1773,56 @@ def remote(*args, **kwargs):
|
|||
in Python is deleted, which will cause them to complete any outstanding
|
||||
work and then shut down. If you want to kill them immediately, you can
|
||||
also call ``ray.kill(actor)``.
|
||||
|
||||
Args:
|
||||
num_returns (int): This is only for *remote functions*. It specifies
|
||||
the number of object refs returned by
|
||||
the remote function invocation.
|
||||
num_cpus (float): The quantity of CPU cores to reserve
|
||||
for this task or for the lifetime of the actor.
|
||||
num_gpus (int): The quantity of GPUs to reserve
|
||||
for this task or for the lifetime of the actor.
|
||||
resources (Dict[str, float]): The quantity of various custom resources
|
||||
to reserve for this task or for the lifetime of the actor.
|
||||
This is a dictionary mapping strings (resource names) to floats.
|
||||
accelerator_type: If specified, requires that the task or actor run
|
||||
on a node with the specified type of accelerator.
|
||||
See `ray.accelerators` for accelerator types.
|
||||
max_calls (int): Only for *remote functions*. This specifies the
|
||||
maximum number of times that a given worker can execute
|
||||
the given remote function before it must exit
|
||||
(this can be used to address memory leaks in third-party
|
||||
libraries or to reclaim resources that cannot easily be
|
||||
released, e.g., GPU memory that was acquired by TensorFlow).
|
||||
By default this is infinite.
|
||||
max_restarts (int): Only for *actors*. This specifies the maximum
|
||||
number of times that the actor should be restarted when it dies
|
||||
unexpectedly. The minimum valid value is 0 (default),
|
||||
which indicates that the actor doesn't need to be restarted.
|
||||
A value of -1 indicates that an actor should be restarted
|
||||
indefinitely.
|
||||
max_task_retries (int): Only for *actors*. How many times to
|
||||
retry an actor task if the task fails due to a system error,
|
||||
e.g., the actor has died. If set to -1, the system will
|
||||
retry the failed task until the task succeeds, or the actor
|
||||
has reached its max_restarts limit. If set to `n > 0`, the
|
||||
system will retry the failed task up to n times, after which the
|
||||
task will throw a `RayActorError` exception upon :obj:`ray.get`.
|
||||
Note that Python exceptions are not considered system errors
|
||||
and will not trigger retries.
|
||||
max_retries (int): Only for *remote functions*. This specifies
|
||||
the maximum number of times that the remote function
|
||||
should be rerun when the worker process executing it
|
||||
crashes unexpectedly. The minimum valid value is 0,
|
||||
the default is 4 (default), and a value of -1 indicates
|
||||
infinite retries.
|
||||
placement_group (:obj:`PlacementGroup`): The placement group
|
||||
this task belongs to, or ``None`` if it doesn't belong
|
||||
to any group.
|
||||
placement_group_bundle_index (int): The index of the bundle
|
||||
if the task belongs to a placement group, which may be
|
||||
-1 to indicate any available bundle.
|
||||
|
||||
"""
|
||||
worker = global_worker
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue