From d7d4e1c87b8e09137127dcf5c663402d98e866f3 Mon Sep 17 00:00:00 2001 From: SangBin Cho Date: Wed, 16 Sep 2020 14:23:24 -0700 Subject: [PATCH] [Doc] Document options method (#10830) Co-authored-by: Richard Liaw --- doc/source/package-ref.rst | 4 ++ python/ray/actor.py | 53 ++++++++++++++++--- python/ray/remote_function.py | 46 ++++++++++++++--- python/ray/worker.py | 97 +++++++++++++++++++---------------- 4 files changed, 139 insertions(+), 61 deletions(-) diff --git a/doc/source/package-ref.rst b/doc/source/package-ref.rst index b05daec0b..07016ea63 100644 --- a/doc/source/package-ref.rst +++ b/doc/source/package-ref.rst @@ -25,6 +25,10 @@ ray.remote .. autofunction:: ray.remote +.. autofunction:: ray.remote_function.RemoteFunction.options + +.. autofunction:: ray.actor.ActorClass.options + .. _ray-get-ref: ray.get diff --git a/python/ray/actor.py b/python/ray/actor.py index caeb71275..a2a90d873 100644 --- a/python/ray/actor.py +++ b/python/ray/actor.py @@ -402,23 +402,60 @@ class ActorClass: """ return self._remote(args=args, kwargs=kwargs) - def options(self, **options): - """Convenience method for creating an actor with options. + def options(self, + args=None, + kwargs=None, + num_cpus=None, + num_gpus=None, + memory=None, + object_store_memory=None, + resources=None, + accelerator_type=None, + max_concurrency=None, + max_restarts=None, + max_task_retries=None, + name=None, + lifetime=None, + placement_group=None, + placement_group_bundle_index=-1): + """Configures and overrides the actor instantiation parameters. - Same arguments as Actor._remote(), but returns a wrapped actor class - that a non-underscore .remote() can be called on. + The arguments are the same as those that can be passed + to :obj:`ray.remote`. Examples: - # The following two calls are equivalent. - >>> Actor._remote(num_cpus=4, max_concurrency=8, args=[x, y]) - >>> Actor.options(num_cpus=4, max_concurrency=8).remote(x, y) + + .. code-block:: python + + @ray.remote(num_cpus=2, resources={"CustomResource": 1}) + class Foo: + def method(self): + return 1 + # Class Foo will require 1 cpu instead of 2. + # It will also require no custom resources. + Bar = Foo.options(num_cpus=1, resources=None) """ actor_cls = self class ActorOptionWrapper: def remote(self, *args, **kwargs): - return actor_cls._remote(args=args, kwargs=kwargs, **options) + return actor_cls._remote( + args=args, + kwargs=kwargs, + num_cpus=num_cpus, + num_gpus=num_gpus, + memory=memory, + object_store_memory=object_store_memory, + resources=resources, + accelerator_type=accelerator_type, + max_concurrency=max_concurrency, + max_restarts=max_restarts, + max_task_retries=max_task_retries, + name=name, + lifetime=lifetime, + placement_group=placement_group, + placement_group_bundle_index=placement_group_bundle_index) return ActorOptionWrapper() diff --git a/python/ray/remote_function.py b/python/ray/remote_function.py index 579d2828b..06bfa20b7 100644 --- a/python/ray/remote_function.py +++ b/python/ray/remote_function.py @@ -122,23 +122,53 @@ class RemoteFunction: num_gpus=num_gpus, resources=resources) - def options(self, **options): - """Convenience method for executing a task with options. + def options(self, + args=None, + kwargs=None, + num_returns=None, + num_cpus=None, + num_gpus=None, + memory=None, + object_store_memory=None, + accelerator_type=None, + resources=None, + max_retries=None, + placement_group=None, + placement_group_bundle_index=-1, + name=""): + """Configures and overrides the task invocation parameters. - Same arguments as func._remote(), but returns a wrapped function - that a non-underscore .remote() can be called on. + Options are overlapping values provided by :obj:`ray.remote`. Examples: - # The following two calls are equivalent. - >>> func._remote(num_cpus=4, args=[x, y]) - >>> func.options(num_cpus=4).remote(x, y) + + .. code-block:: python + + @ray.remote(num_gpus=1, max_calls=1, num_returns=2) + def f(): + return 1, 2 + # Task f will require 2 gpus instead of 1. + g = f.options(num_gpus=2, max_calls=None) """ func_cls = self class FuncWrapper: def remote(self, *args, **kwargs): - return func_cls._remote(args=args, kwargs=kwargs, **options) + return func_cls._remote( + args=args, + kwargs=kwargs, + num_returns=num_returns, + num_cpus=num_cpus, + num_gpus=num_gpus, + memory=memory, + object_store_memory=object_store_memory, + accelerator_type=accelerator_type, + resources=resources, + max_retries=max_retries, + placement_group=placement_group, + placement_group_bundle_index=placement_group_bundle_index, + name=name) return FuncWrapper() diff --git a/python/ray/worker.py b/python/ray/worker.py index 5a90073c6..55c6d60a7 100644 --- a/python/ray/worker.py +++ b/python/ray/worker.py @@ -1723,7 +1723,7 @@ def make_decorator(num_returns=None, def remote(*args, **kwargs): - """Define a remote function or an actor class. + """Defines a remote function or an actor class. This can be used with no arguments to define a remote function or actor as follows: @@ -1739,50 +1739,7 @@ def remote(*args, **kwargs): def method(self): return 1 - It can also be used with specific keyword arguments: - - * **num_returns:** This is only for *remote functions*. It specifies - the number of object refs returned by the remote function invocation. - * **num_cpus:** The quantity of CPU cores to reserve for this task or for - the lifetime of the actor. - * **num_gpus:** The quantity of GPUs to reserve for this task or for the - lifetime of the actor. - * **resources:** The quantity of various custom resources to reserve for - this task or for the lifetime of the actor. This is a dictionary mapping - strings (resource names) to numbers. - * **accelerator_type:** If specified, requires that the task or actor run - on a node with the specified type of accelerator. See `ray.accelerators` - for accelerator types. - * **max_calls:** Only for *remote functions*. This specifies the maximum - number of times that a given worker can execute the given remote function - before it must exit (this can be used to address memory leaks in - third-party libraries or to reclaim resources that cannot easily be - released, e.g., GPU memory that was acquired by TensorFlow). By - default this is infinite. - * **max_restarts**: Only for *actors*. This specifies the maximum - number of times that the actor should be restarted when it dies - unexpectedly. The minimum valid value is 0 (default), which indicates - that the actor doesn't need to be restarted. A value of -1 - indicates that an actor should be restarted indefinitely. - * **max_task_retries**: Only for *actors*. How many times to retry an actor - task if the task fails due to a system error, e.g., the actor has died. - If set to -1, the system will retry the failed task until the task - succeeds, or the actor has reached its max_restarts limit. If set to n > - 0, the system will retry the failed task up to n times, after which the - task will throw a `RayActorError` exception upon `ray.get`. Note that - Python exceptions are not considered system errors and will not trigger - retries. - * **max_retries**: Only for *remote functions*. This specifies the maximum - number of times that the remote function should be rerun when the worker - process executing it crashes unexpectedly. The minimum valid value is 0, - the default is 4 (default), and a value of -1 indicates infinite retries. - * **placement_group**: the placement group this task belongs to, - or None if it doesn't belong to any group. - * **placement_group_bundle_index**: the index of the bundle - if the task belongs to a placement group, which may be -1 to indicate - any available bundle. - - This can be done as follows: + It can also be used with specific keyword arguments as follows: .. code-block:: python @@ -1816,6 +1773,56 @@ def remote(*args, **kwargs): in Python is deleted, which will cause them to complete any outstanding work and then shut down. If you want to kill them immediately, you can also call ``ray.kill(actor)``. + + Args: + num_returns (int): This is only for *remote functions*. It specifies + the number of object refs returned by + the remote function invocation. + num_cpus (float): The quantity of CPU cores to reserve + for this task or for the lifetime of the actor. + num_gpus (int): The quantity of GPUs to reserve + for this task or for the lifetime of the actor. + resources (Dict[str, float]): The quantity of various custom resources + to reserve for this task or for the lifetime of the actor. + This is a dictionary mapping strings (resource names) to floats. + accelerator_type: If specified, requires that the task or actor run + on a node with the specified type of accelerator. + See `ray.accelerators` for accelerator types. + max_calls (int): Only for *remote functions*. This specifies the + maximum number of times that a given worker can execute + the given remote function before it must exit + (this can be used to address memory leaks in third-party + libraries or to reclaim resources that cannot easily be + released, e.g., GPU memory that was acquired by TensorFlow). + By default this is infinite. + max_restarts (int): Only for *actors*. This specifies the maximum + number of times that the actor should be restarted when it dies + unexpectedly. The minimum valid value is 0 (default), + which indicates that the actor doesn't need to be restarted. + A value of -1 indicates that an actor should be restarted + indefinitely. + max_task_retries (int): Only for *actors*. How many times to + retry an actor task if the task fails due to a system error, + e.g., the actor has died. If set to -1, the system will + retry the failed task until the task succeeds, or the actor + has reached its max_restarts limit. If set to `n > 0`, the + system will retry the failed task up to n times, after which the + task will throw a `RayActorError` exception upon :obj:`ray.get`. + Note that Python exceptions are not considered system errors + and will not trigger retries. + max_retries (int): Only for *remote functions*. This specifies + the maximum number of times that the remote function + should be rerun when the worker process executing it + crashes unexpectedly. The minimum valid value is 0, + the default is 4 (default), and a value of -1 indicates + infinite retries. + placement_group (:obj:`PlacementGroup`): The placement group + this task belongs to, or ``None`` if it doesn't belong + to any group. + placement_group_bundle_index (int): The index of the bundle + if the task belongs to a placement group, which may be + -1 to indicate any available bundle. + """ worker = global_worker