Create RemoteFunction class, remove FunctionProperties, simplify worker Python code. (#2052)

* Cleaning up worker and actor code. Create remote function class. Remove FunctionProperties object.

* Remove register_actor_signatures function.

* Small cleanups.

* Fix linting.

* Support @ray.method syntax for actor methods.

* Fix pickling bug.

* Fix linting.

* Shorten testBlockingTasks.

* Small fixes.

* Call get_global_worker().
This commit is contained in:
Robert Nishihara 2018-05-14 14:35:23 -07:00 committed by Philipp Moritz
parent ad48e47120
commit 8fbb88485b
9 changed files with 623 additions and 657 deletions

View file

@ -46,7 +46,7 @@ except ImportError as e:
e.args += (helpful_message, )
raise
from ray.local_scheduler import _config # noqa: E402
from ray.local_scheduler import ObjectID, _config # noqa: E402
from ray.worker import (error_info, init, connect, disconnect, get, put, wait,
remote, log_event, log_span, flush_log, get_gpu_ids,
get_webui_url,
@ -68,7 +68,7 @@ __all__ = [
"remote", "log_event", "log_span", "flush_log", "actor", "method",
"get_gpu_ids", "get_webui_url", "register_custom_serializer",
"SCRIPT_MODE", "WORKER_MODE", "PYTHON_MODE", "SILENT_MODE", "global_state",
"_config", "__version__"
"ObjectID", "_config", "__version__"
]
import ctypes # noqa: E402

View file

@ -12,8 +12,9 @@ import ray.cloudpickle as pickle
import ray.local_scheduler
import ray.signature as signature
import ray.worker
from ray.utils import (FunctionProperties, _random_string, is_cython,
push_error_to_driver)
from ray.utils import _random_string, is_cython, push_error_to_driver
DEFAULT_ACTOR_METHOD_NUM_RETURN_VALS = 1
def compute_actor_handle_id(actor_handle_id, num_forks):
@ -35,7 +36,7 @@ def compute_actor_handle_id(actor_handle_id, num_forks):
handle_id_hash.update(str(num_forks).encode("ascii"))
handle_id = handle_id_hash.digest()
assert len(handle_id) == 20
return ray.local_scheduler.ObjectID(handle_id)
return ray.ObjectID(handle_id)
def compute_actor_handle_id_non_forked(actor_id, actor_handle_id,
@ -67,7 +68,7 @@ def compute_actor_handle_id_non_forked(actor_id, actor_handle_id,
handle_id_hash.update(current_task_id.id())
handle_id = handle_id_hash.digest()
assert len(handle_id) == 20
return ray.local_scheduler.ObjectID(handle_id)
return ray.ObjectID(handle_id)
def compute_actor_creation_function_id(class_id):
@ -79,7 +80,7 @@ def compute_actor_creation_function_id(class_id):
Returns:
The function ID of the actor creation event.
"""
return ray.local_scheduler.ObjectID(class_id)
return ray.ObjectID(class_id)
def compute_actor_method_function_id(class_name, attr):
@ -93,11 +94,11 @@ def compute_actor_method_function_id(class_name, attr):
Function ID corresponding to the method.
"""
function_id_hash = hashlib.sha1()
function_id_hash.update(class_name)
function_id_hash.update(class_name.encode("ascii"))
function_id_hash.update(attr.encode("ascii"))
function_id = function_id_hash.digest()
assert len(function_id) == 20
return ray.local_scheduler.ObjectID(function_id)
return ray.ObjectID(function_id)
def set_actor_checkpoint(worker, actor_id, checkpoint_index, checkpoint,
@ -257,7 +258,7 @@ def make_actor_method_executor(worker, method_name, method, actor_imported):
return actor_method_executor
def fetch_and_register_actor(actor_class_key, resources, worker):
def fetch_and_register_actor(actor_class_key, worker):
"""Import an actor.
This will be called by the worker's import thread when the worker receives
@ -266,25 +267,20 @@ def fetch_and_register_actor(actor_class_key, resources, worker):
Args:
actor_class_key: The key in Redis to use to fetch the actor.
resources: The resources required for this actor's lifetime.
worker: The worker to use.
"""
actor_id_str = worker.actor_id
(driver_id, class_id, class_name, module, pickled_class,
checkpoint_interval, actor_method_names,
actor_method_num_return_vals) = worker.redis_client.hmget(
checkpoint_interval, actor_method_names) = worker.redis_client.hmget(
actor_class_key, [
"driver_id", "class_id", "class_name", "module", "class",
"checkpoint_interval", "actor_method_names",
"actor_method_num_return_vals"
"checkpoint_interval", "actor_method_names"
])
actor_name = class_name.decode("ascii")
class_name = class_name.decode("ascii")
module = module.decode("ascii")
checkpoint_interval = int(checkpoint_interval)
actor_method_names = json.loads(actor_method_names.decode("ascii"))
actor_method_num_return_vals = json.loads(
actor_method_num_return_vals.decode("ascii"))
# Create a temporary actor with some temporary methods so that if the actor
# fails to be unpickled, the temporary actor can be used (just to produce
@ -297,11 +293,8 @@ def fetch_and_register_actor(actor_class_key, resources, worker):
def temporary_actor_method(*xs):
raise Exception("The actor with name {} failed to be imported, and so "
"cannot execute this method".format(actor_name))
"cannot execute this method".format(class_name))
# Register the actor method signatures.
register_actor_signatures(worker, driver_id, class_id, class_name,
actor_method_names, actor_method_num_return_vals)
# Register the actor method executors.
for actor_method_name in actor_method_names:
function_id = compute_actor_method_function_id(class_name,
@ -311,8 +304,11 @@ def fetch_and_register_actor(actor_class_key, resources, worker):
actor_method_name,
temporary_actor_method,
actor_imported=False)
worker.functions[driver_id][function_id] = (actor_method_name,
temporary_executor)
worker.function_execution_info[driver_id][function_id] = (
ray.worker.FunctionExecutionInfo(
function=temporary_executor,
function_name=actor_method_name,
max_calls=0))
worker.num_task_executions[driver_id][function_id] = 0
try:
@ -347,63 +343,16 @@ def fetch_and_register_actor(actor_class_key, resources, worker):
class_name, actor_method_name).id()
executor = make_actor_method_executor(
worker, actor_method_name, actor_method, actor_imported=True)
worker.functions[driver_id][function_id] = (actor_method_name,
executor)
worker.function_execution_info[driver_id][function_id] = (
ray.worker.FunctionExecutionInfo(
function=executor,
function_name=actor_method_name,
max_calls=0))
# We do not set worker.function_properties[driver_id][function_id]
# because we currently do need the actor worker to submit new tasks
# for the actor.
def register_actor_signatures(worker,
driver_id,
class_id,
class_name,
actor_method_names,
actor_method_num_return_vals,
actor_creation_resources=None,
actor_method_cpus=None):
"""Register an actor's method signatures in the worker.
Args:
worker: The worker to register the signatures on.
driver_id: The ID of the driver that this actor is associated with.
class_id: The ID of the actor class.
class_name: The name of the actor class.
actor_method_names: The names of the methods to register.
actor_method_num_return_vals: A list of the number of return values for
each of the actor's methods.
actor_creation_resources: The resources required by the actor creation
task.
actor_method_cpus: The number of CPUs required by each actor method.
"""
assert len(actor_method_names) == len(actor_method_num_return_vals)
for actor_method_name, num_return_vals in zip(
actor_method_names, actor_method_num_return_vals):
# TODO(rkn): When we create a second actor, we are probably overwriting
# the values from the first actor here. This may or may not be a
# problem.
function_id = compute_actor_method_function_id(class_name,
actor_method_name).id()
worker.function_properties[driver_id][function_id] = (
# The extra return value is an actor dummy object.
# In the cases where actor_method_cpus is None, that value should
# never be used.
FunctionProperties(
num_return_vals=num_return_vals + 1,
resources={"CPU": actor_method_cpus},
max_calls=0))
if actor_creation_resources is not None:
# Also register the actor creation task.
function_id = compute_actor_creation_function_id(class_id)
worker.function_properties[driver_id][function_id.id()] = (
# The extra return value is an actor dummy object.
FunctionProperties(
num_return_vals=0 + 1,
resources=actor_creation_resources,
max_calls=0))
def publish_actor_class_to_key(key, actor_class_info, worker):
"""Push an actor class definition to Redis.
@ -424,17 +373,14 @@ def publish_actor_class_to_key(key, actor_class_info, worker):
def export_actor_class(class_id, Class, actor_method_names,
actor_method_num_return_vals, checkpoint_interval,
worker):
checkpoint_interval, worker):
key = b"ActorClass:" + class_id
actor_class_info = {
"class_name": Class.__name__,
"module": Class.__module__,
"class": pickle.dumps(Class),
"checkpoint_interval": checkpoint_interval,
"actor_method_names": json.dumps(list(actor_method_names)),
"actor_method_num_return_vals":
json.dumps(actor_method_num_return_vals)
"actor_method_names": json.dumps(list(actor_method_names))
}
if worker.mode is None:
@ -455,43 +401,6 @@ def export_actor_class(class_id, Class, actor_method_names,
# https://github.com/ray-project/ray/issues/1146.
def export_actor(actor_id, class_id, class_name, actor_method_names,
actor_method_num_return_vals, actor_creation_resources,
actor_method_cpus, worker):
"""Export an actor to redis.
Args:
actor_id (common.ObjectID): The ID of the actor.
class_id (str): A random ID for the actor class.
class_name (str): The actor class name.
actor_method_names (list): A list of the names of this actor's methods.
actor_method_num_return_vals: A list of the number of return values for
each of the actor's methods.
actor_creation_resources: A dictionary mapping resource name to the
quantity of that resource required by the actor.
actor_method_cpus: The number of CPUs required by actor methods.
"""
ray.worker.check_main_thread()
if worker.mode is None:
raise Exception("Actors cannot be created before Ray has been "
"started. You can start Ray with 'ray.init()'.")
driver_id = worker.task_driver_id.id()
register_actor_signatures(
worker,
driver_id,
class_id,
class_name,
actor_method_names,
actor_method_num_return_vals,
actor_creation_resources=actor_creation_resources,
actor_method_cpus=actor_method_cpus)
args = [class_id]
function_id = compute_actor_creation_function_id(class_id)
return worker.submit_task(function_id, args, actor_creation_id=actor_id)[0]
def method(*args, **kwargs):
assert len(args) == 0
assert len(kwargs) == 1
@ -508,9 +417,10 @@ def method(*args, **kwargs):
# Create objects to wrap method invocations. This is done so that we can
# invoke methods with actor.method.remote() instead of actor.method().
class ActorMethod(object):
def __init__(self, actor, method_name):
def __init__(self, actor, method_name, num_return_vals):
self._actor = actor
self._method_name = method_name
self._num_return_vals = num_return_vals
def __call__(self, *args, **kwargs):
raise Exception("Actor methods cannot be called directly. Instead "
@ -519,10 +429,17 @@ class ActorMethod(object):
self._method_name))
def remote(self, *args, **kwargs):
return self._submit(args, kwargs)
def _submit(self, args, kwargs, num_return_vals=None):
if num_return_vals is None:
num_return_vals = self._num_return_vals
return self._actor._actor_method_call(
self._method_name,
args=args,
kwargs=kwargs,
num_return_vals=num_return_vals,
dependency=self._actor._ray_actor_cursor)
@ -537,23 +454,71 @@ class ActorClass(object):
_class_id: The ID of this actor class.
_class_name: The name of this class.
_checkpoint_interval: The interval at which to checkpoint actor state.
_actor_creation_resources: The default resources required by the actor
creation task.
_num_cpus: The default number of CPUs required by the actor creation
task.
_num_gpus: The default number of GPUs required by the actor creation
task.
_resources: The default resources required by the actor creation task.
_actor_method_cpus: The number of CPUs required by actor method tasks.
_exported: True if the actor class has been exported and false
otherwise.
_actor_methods: The actor methods.
_method_signatures: The signatures of the methods.
_actor_method_names: The names of the actor methods.
_actor_method_num_return_vals: The default number of return values for
each actor method.
"""
def __init__(self, modified_class, class_id, checkpoint_interval,
actor_creation_resources, actor_method_cpus):
def __init__(self, modified_class, class_id, checkpoint_interval, num_cpus,
num_gpus, resources, actor_method_cpus):
self._modified_class = modified_class
self._class_id = class_id
self._class_name = modified_class.__name__.encode("ascii")
self._class_name = modified_class.__name__
self._checkpoint_interval = checkpoint_interval
self._actor_creation_resources = actor_creation_resources
self._num_cpus = num_cpus
self._num_gpus = num_gpus
self._resources = resources
self._actor_method_cpus = actor_method_cpus
self._exported = False
# Get the actor methods of the given class.
def pred(x):
return (inspect.isfunction(x) or inspect.ismethod(x)
or is_cython(x))
self._actor_methods = inspect.getmembers(
self._modified_class, predicate=pred)
# Extract the signatures of each of the methods. This will be used
# to catch some errors if the methods are called with inappropriate
# arguments.
self._method_signatures = dict()
self._actor_method_num_return_vals = dict()
for method_name, method in self._actor_methods:
# Print a warning message if the method signature is not
# supported. We don't raise an exception because if the actor
# inherits from a class that has a method whose signature we
# don't support, there may not be much the user can do about it.
signature.check_signature_supported(method, warn=True)
self._method_signatures[method_name] = signature.extract_signature(
method, ignore_first=True)
# Set the default number of return values for this method.
if hasattr(method, "__ray_num_return_vals__"):
self._actor_method_num_return_vals[method_name] = (
method.__ray_num_return_vals__)
else:
self._actor_method_num_return_vals[method_name] = (
DEFAULT_ACTOR_METHOD_NUM_RETURN_VALS)
self._actor_method_names = [
method_name for method_name, _ in self._actor_methods
]
def __call__(self, *args, **kwargs):
raise Exception("Actors methods cannot be instantiated directly. "
"Instead of running '{}()', try '{}.remote()'.".format(
self._class_name, self._class_name))
def remote(self, *args, **kwargs):
"""Create an actor.
@ -591,72 +556,54 @@ class ActorClass(object):
Returns:
A handle to the newly created actor.
"""
if ray.worker.global_worker.mode is None:
worker = ray.worker.get_global_worker()
ray.worker.check_main_thread()
if worker.mode is None:
raise Exception("Actors cannot be created before ray.init() "
"has been called.")
actor_id = ray.local_scheduler.ObjectID(_random_string())
actor_id = ray.ObjectID(_random_string())
# The actor cursor is a dummy object representing the most recent
# actor method invocation. For each subsequent method invocation,
# the current cursor should be added as a dependency, and then
# updated to reflect the new invocation.
actor_cursor = None
# Get the actor methods of the given class.
def pred(x):
return (inspect.isfunction(x) or inspect.ismethod(x)
or is_cython(x))
actor_methods = inspect.getmembers(
self._modified_class, predicate=pred)
# Extract the signatures of each of the methods. This will be used
# to catch some errors if the methods are called with inappropriate
# arguments.
method_signatures = dict()
for k, v in actor_methods:
# Print a warning message if the method signature is not
# supported. We don't raise an exception because if the actor
# inherits from a class that has a method whose signature we
# don't support, there may not be much the user can do about it.
signature.check_signature_supported(v, warn=True)
method_signatures[k] = signature.extract_signature(
v, ignore_first=True)
actor_method_names = [method_name for method_name, _ in actor_methods]
actor_method_num_return_vals = []
for _, method in actor_methods:
if hasattr(method, "__ray_num_return_vals__"):
actor_method_num_return_vals.append(
method.__ray_num_return_vals__)
else:
actor_method_num_return_vals.append(1)
# Do not export the actor class or the actor if run in PYTHON_MODE
# Instead, instantiate the actor locally and add it to
# global_worker's dictionary
if ray.worker.global_worker.mode == ray.PYTHON_MODE:
ray.worker.global_worker.actors[actor_id] = (
self._modified_class.__new__(self._modified_class))
# Instead, instantiate the actor locally and add it to the worker's
# dictionary
if worker.mode == ray.PYTHON_MODE:
worker.actors[actor_id] = self._modified_class.__new__(
self._modified_class)
else:
# Export the actor.
if not self._exported:
export_actor_class(
self._class_id, self._modified_class, actor_method_names,
actor_method_num_return_vals, self._checkpoint_interval,
ray.worker.global_worker)
export_actor_class(self._class_id, self._modified_class,
self._actor_method_names,
self._checkpoint_interval, worker)
self._exported = True
actor_cursor = export_actor(
actor_id, self._class_id, self._class_name, actor_method_names,
actor_method_num_return_vals, self._actor_creation_resources,
self._actor_method_cpus, ray.worker.global_worker)
resources = ray.utils.resources_from_resource_arguments(
self._num_cpus, self._num_gpus, self._resources, num_cpus,
num_gpus, resources)
creation_args = [self._class_id]
function_id = compute_actor_creation_function_id(self._class_id)
[actor_cursor] = worker.submit_task(
function_id,
creation_args,
actor_creation_id=actor_id,
num_return_vals=1,
resources=resources)
# We initialize the actor counter at 1 to account for the actor
# creation task.
actor_counter = 1
actor_handle = ActorHandle(
actor_id, self._class_name, actor_cursor, actor_counter,
actor_method_names, actor_method_num_return_vals,
method_signatures, actor_cursor, self._actor_method_cpus,
ray.worker.global_worker.task_driver_id)
self._actor_method_names, self._method_signatures,
self._actor_method_num_return_vals, actor_cursor,
self._actor_method_cpus, worker.task_driver_id)
# Call __init__ as a remote function.
if "__init__" in actor_handle._ray_actor_method_names:
@ -703,9 +650,9 @@ class ActorHandle(object):
_ray_actor_counter: The number of actor method invocations that we've
called so far.
_ray_actor_method_names: The names of the actor methods.
_ray_actor_method_num_return_vals: The number of return values for each
actor method.
_ray_method_signatures: The signatures of the actor methods.
_ray_method_num_return_vals: The default number of return values for
each method.
_ray_class_name: The name of the actor class.
_ray_actor_forks: The number of times this handle has been forked.
_ray_actor_creation_dummy_object_id: The dummy object ID from the actor
@ -729,8 +676,8 @@ class ActorHandle(object):
actor_cursor,
actor_counter,
actor_method_names,
actor_method_num_return_vals,
method_signatures,
method_num_return_vals,
actor_creation_dummy_object_id,
actor_method_cpus,
actor_driver_id,
@ -742,15 +689,15 @@ class ActorHandle(object):
self._ray_actor_id = actor_id
if self._ray_original_handle:
self._ray_actor_handle_id = ray.local_scheduler.ObjectID(
self._ray_actor_handle_id = ray.ObjectID(
ray.worker.NIL_ACTOR_HANDLE_ID)
else:
self._ray_actor_handle_id = actor_handle_id
self._ray_actor_cursor = actor_cursor
self._ray_actor_counter = actor_counter
self._ray_actor_method_names = actor_method_names
self._ray_actor_method_num_return_vals = actor_method_num_return_vals
self._ray_method_signatures = method_signatures
self._ray_method_num_return_vals = method_num_return_vals
self._ray_class_name = class_name
self._ray_actor_forks = 0
self._ray_actor_creation_dummy_object_id = (
@ -786,8 +733,11 @@ class ActorHandle(object):
object_ids: A list of object IDs returned by the remote actor
method.
"""
ray.worker.check_connected()
worker = ray.worker.get_global_worker()
worker.check_connected()
ray.worker.check_main_thread()
function_signature = self._ray_method_signatures[method_name]
if args is None:
args = []
@ -797,8 +747,8 @@ class ActorHandle(object):
# Execute functions locally if Ray is run in PYTHON_MODE
# Copy args to prevent the function from mutating them.
if ray.worker.global_worker.mode == ray.PYTHON_MODE:
return getattr(ray.worker.global_worker.actors[self._ray_actor_id],
if worker.mode == ray.PYTHON_MODE:
return getattr(worker.actors[self._ray_actor_id],
method_name)(*copy.deepcopy(args))
# Add the execution dependency.
@ -812,13 +762,13 @@ class ActorHandle(object):
if self._ray_actor_handle_id is None:
actor_handle_id = compute_actor_handle_id_non_forked(
self._ray_actor_id, self._ray_previous_actor_handle_id,
ray.worker.global_worker.current_task_id)
worker.current_task_id)
else:
actor_handle_id = self._ray_actor_handle_id
function_id = compute_actor_method_function_id(self._ray_class_name,
method_name)
object_ids = ray.worker.global_worker.submit_task(
object_ids = worker.submit_task(
function_id,
args,
actor_id=self._ray_actor_id,
@ -828,7 +778,9 @@ class ActorHandle(object):
actor_creation_dummy_object_id=(
self._ray_actor_creation_dummy_object_id),
execution_dependencies=execution_dependencies,
num_return_vals=num_return_vals,
# We add one for the dummy return ID.
num_return_vals=num_return_vals + 1,
resources={"CPU": self._ray_actor_method_cpus},
driver_id=self._ray_actor_driver_id)
# Update the actor counter and cursor to reflect the most recent
# invocation.
@ -860,8 +812,8 @@ class ActorHandle(object):
# this was causing cyclic references which were prevent
# object deallocation from behaving in a predictable
# manner.
actor_method_cls = ActorMethod
return actor_method_cls(self, attr)
return ActorMethod(self, attr,
self._ray_method_num_return_vals[attr])
except AttributeError:
pass
@ -880,11 +832,11 @@ class ActorHandle(object):
# this is not the right policy. the actor should be alive as long as
# there are ANY handles in scope in the process that created the actor,
# not just the first one.
if ray.worker.global_worker.connected and self._ray_original_handle:
worker = ray.worker.get_global_worker()
if worker.connected and self._ray_original_handle:
# TODO(rkn): Should we be passing in the actor cursor as a
# dependency here?
# self.__ray__terminate__.remote()
self._actor_method_call("__ray_terminate__")
self.__ray_terminate__.remote()
@property
def _actor_id(self):
@ -917,10 +869,10 @@ class ActorHandle(object):
0, # Reset the actor counter.
"actor_method_names":
self._ray_actor_method_names,
"actor_method_num_return_vals":
self._ray_actor_method_num_return_vals,
"method_signatures":
self._ray_method_signatures,
"method_num_return_vals":
self._ray_method_num_return_vals,
"actor_creation_dummy_object_id":
self._ray_actor_creation_dummy_object_id.id(),
"actor_method_cpus":
@ -946,43 +898,36 @@ class ActorHandle(object):
ray_forking: True if this is being called because Ray is forking
the actor handle and false if it is being called by pickling.
"""
ray.worker.check_connected()
worker = ray.worker.get_global_worker()
worker.check_connected()
ray.worker.check_main_thread()
if state["ray_forking"]:
actor_handle_id = compute_actor_handle_id(
ray.local_scheduler.ObjectID(
state["previous_actor_handle_id"]), state["actor_forks"])
ray.ObjectID(state["previous_actor_handle_id"]),
state["actor_forks"])
else:
actor_handle_id = None
# This is the driver ID of the driver that owns the actor, not
# necessarily the driver that owns this actor handle.
actor_driver_id = ray.local_scheduler.ObjectID(
state["actor_driver_id"])
actor_driver_id = ray.ObjectID(state["actor_driver_id"])
self.__init__(
ray.local_scheduler.ObjectID(state["actor_id"]),
ray.ObjectID(state["actor_id"]),
state["class_name"],
ray.local_scheduler.ObjectID(state["actor_cursor"]),
ray.ObjectID(state["actor_cursor"]),
state["actor_counter"],
state["actor_method_names"],
state["actor_method_num_return_vals"],
state["method_signatures"],
ray.local_scheduler.ObjectID(
state["actor_creation_dummy_object_id"]),
state["method_num_return_vals"],
ray.ObjectID(state["actor_creation_dummy_object_id"]),
state["actor_method_cpus"],
actor_driver_id,
actor_handle_id=actor_handle_id,
previous_actor_handle_id=ray.local_scheduler.ObjectID(
previous_actor_handle_id=ray.ObjectID(
state["previous_actor_handle_id"]))
register_actor_signatures(
ray.worker.global_worker, actor_driver_id.id(), None,
self._ray_class_name, self._ray_actor_method_names,
self._ray_actor_method_num_return_vals, None,
self._ray_actor_method_cpus)
def __getstate__(self):
"""This code path is used by pickling but not by Ray forking."""
return self._serialization_helper(False)
@ -992,7 +937,11 @@ class ActorHandle(object):
return self._deserialization_helper(state, False)
def make_actor(cls, resources, checkpoint_interval, actor_method_cpus):
def make_actor(cls, num_cpus, num_gpus, resources, actor_method_cpus,
checkpoint_interval):
if checkpoint_interval is None:
checkpoint_interval = -1
if checkpoint_interval == 0:
raise Exception("checkpoint_interval must be greater than 0.")
@ -1000,12 +949,14 @@ def make_actor(cls, resources, checkpoint_interval, actor_method_cpus):
# terminating the worker.
class Class(cls):
def __ray_terminate__(self):
# Disconnect the worker from the local scheduler. The point of this
# is so that when the worker kills itself below, the local
# scheduler won't push an error message to the driver.
ray.worker.global_worker.local_scheduler_client.disconnect()
import os
os._exit(0)
worker = ray.worker.get_global_worker()
if worker.mode != ray.PYTHON_MODE:
# Disconnect the worker from the local scheduler. The point of
# this is so that when the worker kills itself below, the local
# scheduler won't push an error message to the driver.
worker.local_scheduler_client.disconnect()
import os
os._exit(0)
def __ray_save_checkpoint__(self):
if hasattr(self, "__ray_save__"):
@ -1043,7 +994,7 @@ def make_actor(cls, resources, checkpoint_interval, actor_method_cpus):
# scheduler has seen. Handle IDs for which no task has yet reached
# the local scheduler will not be included, and may not be runnable
# on checkpoint resumption.
actor_id = ray.local_scheduler.ObjectID(worker.actor_id)
actor_id = ray.ObjectID(worker.actor_id)
frontier = worker.local_scheduler_client.get_actor_frontier(
actor_id)
# Save the checkpoint in Redis. TODO(rkn): Checkpoints
@ -1085,8 +1036,8 @@ def make_actor(cls, resources, checkpoint_interval, actor_method_cpus):
class_id = _random_string()
return ActorClass(Class, class_id, checkpoint_interval, resources,
actor_method_cpus)
return ActorClass(Class, class_id, checkpoint_interval, num_cpus, num_gpus,
resources, actor_method_cpus)
ray.worker.global_worker.fetch_and_register_actor = fetch_and_register_actor

View file

@ -459,7 +459,7 @@ class DataFrame(object):
if isinstance(self._dtypes_cache, list) and \
isinstance(self._dtypes_cache[0],
ray.local_scheduler.ObjectID):
ray.ObjectID):
self._dtypes_cache = pd.concat(ray.get(self._dtypes_cache))
self._dtypes_cache.index = self.columns

View file

@ -55,9 +55,9 @@ class _IndexMetadata(object):
self._cached_index = False
def _get__lengths(self):
if isinstance(self._lengths_cache, ray.local_scheduler.ObjectID) or \
if isinstance(self._lengths_cache, ray.ObjectID) or \
(isinstance(self._lengths_cache, list) and
isinstance(self._lengths_cache[0], ray.local_scheduler.ObjectID)):
isinstance(self._lengths_cache[0], ray.ObjectID)):
self._lengths_cache = ray.get(self._lengths_cache)
return self._lengths_cache
@ -72,7 +72,7 @@ class _IndexMetadata(object):
Since we may have had an index set before our coord_df was
materialized, we'll have to apply it to the newly materialized df
"""
if isinstance(self._coord_df_cache, ray.local_scheduler.ObjectID):
if isinstance(self._coord_df_cache, ray.ObjectID):
self._coord_df_cache = ray.get(self._coord_df_cache)
if self._cached_index:
self._coord_df_cache.index = self._index_cache
@ -89,7 +89,7 @@ class _IndexMetadata(object):
If the set _IndexMetadata is an OID instead (due to a copy or whatever
reason), we fall back relying on `_index_cache`.
"""
if not isinstance(coord_df, ray.local_scheduler.ObjectID):
if not isinstance(coord_df, ray.ObjectID):
self._index_cache = coord_df.index
self._coord_df_cache = coord_df
@ -102,7 +102,7 @@ class _IndexMetadata(object):
_IndexMetadata object without a specified `index` parameter (See the
_IndexMetadata constructor for more details)
"""
if isinstance(self._coord_df_cache, ray.local_scheduler.ObjectID):
if isinstance(self._coord_df_cache, ray.ObjectID):
return self._index_cache
else:
return self._coord_df_cache.index
@ -119,7 +119,7 @@ class _IndexMetadata(object):
assert len(new_index) == len(self)
self._index_cache = new_index
if isinstance(self._coord_df_cache, ray.local_scheduler.ObjectID):
if isinstance(self._coord_df_cache, ray.ObjectID):
self._cached_index = True
else:
self._coord_df_cache.index = new_index
@ -140,7 +140,7 @@ class _IndexMetadata(object):
if self._index_cache_validator is None:
self._index_cache_validator = pd.RangeIndex(len(self))
elif isinstance(self._index_cache_validator,
ray.local_scheduler.ObjectID):
ray.ObjectID):
self._index_cache_validator = ray.get(self._index_cache_validator)
return self._index_cache_validator
@ -296,11 +296,11 @@ class _IndexMetadata(object):
def copy(self):
# TODO: Investigate copy-on-write wrapper for metadata objects
coord_df_copy = self._coord_df_cache
if not isinstance(self._coord_df_cache, ray.local_scheduler.ObjectID):
if not isinstance(self._coord_df_cache, ray.ObjectID):
coord_df_copy = self._coord_df_cache.copy()
lengths_copy = self._lengths_cache
if not isinstance(self._lengths_cache, ray.local_scheduler.ObjectID):
if not isinstance(self._lengths_cache, ray.ObjectID):
lengths_copy = self._lengths_cache.copy()
index_copy = self._index_cache

View file

@ -184,8 +184,8 @@ class GlobalState(object):
A dictionary with information about the object ID in question.
"""
# Allow the argument to be either an ObjectID or a hex string.
if not isinstance(object_id, ray.local_scheduler.ObjectID):
object_id = ray.local_scheduler.ObjectID(hex_to_binary(object_id))
if not isinstance(object_id, ray.ObjectID):
object_id = ray.ObjectID(hex_to_binary(object_id))
# Return information about a single object ID.
object_locations = self._execute_command(object_id,
@ -297,7 +297,7 @@ class GlobalState(object):
TaskExecutionDependencies.GetRootAsTaskExecutionDependencies(
task_table_message.ExecutionDependencies(), 0))
execution_dependencies = [
ray.local_scheduler.ObjectID(
ray.ObjectID(
execution_dependencies_message.ExecutionDependencies(i))
for i in range(
execution_dependencies_message.ExecutionDependenciesLength())
@ -335,7 +335,7 @@ class GlobalState(object):
"""
self._check_connected()
if task_id is not None:
task_id = ray.local_scheduler.ObjectID(hex_to_binary(task_id))
task_id = ray.ObjectID(hex_to_binary(task_id))
return self._task_table(task_id)
else:
task_table_keys = self._keys(TASK_PREFIX + "*")
@ -343,7 +343,7 @@ class GlobalState(object):
for key in task_table_keys:
task_id_binary = key[len(TASK_PREFIX):]
results[binary_to_hex(task_id_binary)] = self._task_table(
ray.local_scheduler.ObjectID(task_id_binary))
ray.ObjectID(task_id_binary))
return results
def function_table(self, function_id=None):
@ -628,8 +628,7 @@ class GlobalState(object):
# modify it in place since we will use the original values later.
total_info = copy.copy(task_table[task_id]["TaskSpec"])
total_info["Args"] = [
oid.hex()
if isinstance(oid, ray.local_scheduler.ObjectID) else oid
oid.hex() if isinstance(oid, ray.ObjectID) else oid
for oid in task_t_info["TaskSpec"]["Args"]
]
total_info["ReturnObjectIDs"] = [
@ -855,7 +854,7 @@ class GlobalState(object):
args = task_table[task_id]["TaskSpec"]["Args"]
for arg in args:
# Don't visualize arguments that are not object IDs.
if isinstance(arg, ray.local_scheduler.ObjectID):
if isinstance(arg, ray.ObjectID):
object_info = self._object_table(arg)
# Don't visualize objects that were created by calls to
# put.

View file

@ -0,0 +1,158 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import hashlib
import inspect
import ray.signature
# Default parameters for remote functions.
DEFAULT_REMOTE_FUNCTION_CPUS = 1
DEFAULT_REMOTE_FUNCTION_NUM_RETURN_VALS = 1
DEFAULT_REMOTE_FUNCTION_MAX_CALLS = 0
def in_ipython():
"""Return true if we are in an IPython interpreter and false otherwise."""
try:
__IPYTHON__
return True
except NameError:
return False
def compute_function_id(function):
"""Compute an function ID for a function.
Args:
func: The actual function.
Returns:
This returns the function ID.
"""
function_id_hash = hashlib.sha1()
# Include the function module and name in the hash.
function_id_hash.update(function.__module__.encode("ascii"))
function_id_hash.update(function.__name__.encode("ascii"))
# If we are running a script or are in IPython, include the source code in
# the hash. If we are in a regular Python interpreter we skip this part
# because the source code is not accessible. If the function is a built-in
# (e.g., Cython), the source code is not accessible.
import __main__ as main
if (hasattr(main, "__file__") or in_ipython()) \
and inspect.isfunction(function):
function_id_hash.update(inspect.getsource(function).encode("ascii"))
# Compute the function ID.
function_id = function_id_hash.digest()
assert len(function_id) == 20
function_id = ray.ObjectID(function_id)
return function_id
class RemoteFunction(object):
"""A remote function.
This is a decorated function. It can be used to spawn tasks.
Attributes:
_function: The original function.
_function_id: The ID of the function.
_function_name: The module and function name.
_num_cpus: The default number of CPUs to use for invocations of this
remote function.
_num_gpus: The default number of GPUs to use for invocations of this
remote function.
_resources: The default custom resource requirements for invocations of
this remote function.
_num_return_vals: The default number of return values for invocations
of this remote function.
_max_calls: The number of times a worker can execute this function
before executing.
_function_signature: The function signature.
"""
def __init__(self, function, num_cpus, num_gpus, resources,
num_return_vals, max_calls):
self._function = function
# TODO(rkn): We store the function ID as a string, so that
# RemoteFunction objects can be pickled. We should undo this when
# we allow ObjectIDs to be pickled.
self._function_id = compute_function_id(self._function).id()
self._function_name = (
self._function.__module__ + '.' + self._function.__name__)
self._num_cpus = (DEFAULT_REMOTE_FUNCTION_CPUS
if num_cpus is None else num_cpus)
self._num_gpus = num_gpus
self._resources = resources
self._num_return_vals = (DEFAULT_REMOTE_FUNCTION_NUM_RETURN_VALS if
num_return_vals is None else num_return_vals)
self._max_calls = (DEFAULT_REMOTE_FUNCTION_MAX_CALLS
if max_calls is None else max_calls)
ray.signature.check_signature_supported(self._function)
self._function_signature = ray.signature.extract_signature(
self._function)
# # Export the function.
worker = ray.worker.get_global_worker()
if worker.mode in [ray.worker.SCRIPT_MODE, ray.worker.SILENT_MODE]:
self._export()
elif worker.mode is None:
worker.cached_remote_functions_and_actors.append(
("remote_function", self))
def __call__(self, *args, **kwargs):
raise Exception("Remote functions cannot be called directly. Instead "
"of running '{}()', try '{}.remote()'.".format(
self._function_name, self._function_name))
def remote(self, *args, **kwargs):
"""This runs immediately when a remote function is called."""
return self._submit(args=args, kwargs=kwargs)
def _submit(self,
args=None,
kwargs=None,
num_return_vals=None,
num_cpus=None,
num_gpus=None,
resources=None):
"""An experimental alternate way to submit remote functions."""
worker = ray.worker.get_global_worker()
worker.check_connected()
ray.worker.check_main_thread()
kwargs = {} if kwargs is None else kwargs
args = ray.signature.extend_args(self._function_signature, args,
kwargs)
if num_return_vals is None:
num_return_vals = self._num_return_vals
resources = ray.utils.resources_from_resource_arguments(
self._num_cpus, self._num_gpus, self._resources, num_cpus,
num_gpus, resources)
if worker.mode == ray.worker.PYTHON_MODE:
# In PYTHON_MODE, remote calls simply execute the function.
# We copy the arguments to prevent the function call from
# mutating them and to match the usual behavior of
# immutable remote objects.
result = self._function(*copy.deepcopy(args))
return result
object_ids = worker.submit_task(
ray.ObjectID(self._function_id),
args,
num_return_vals=num_return_vals,
resources=resources)
if len(object_ids) == 1:
return object_ids[0]
elif len(object_ids) > 1:
return object_ids
def _export(self):
worker = ray.worker.get_global_worker()
worker.export_remote_function(
ray.ObjectID(self._function_id), self._function_name,
self._function, self._max_calls, self)

View file

@ -3,7 +3,6 @@ from __future__ import division
from __future__ import print_function
import binascii
import collections
import hashlib
import numpy as np
import os
@ -125,7 +124,7 @@ def decode(byte_str):
def binary_to_object_id(binary_object_id):
return ray.local_scheduler.ObjectID(binary_object_id)
return ray.ObjectID(binary_object_id)
def binary_to_hex(identifier):
@ -139,11 +138,6 @@ def hex_to_binary(hex_identifier):
return binascii.unhexlify(hex_identifier)
FunctionProperties = collections.namedtuple(
"FunctionProperties", ["num_return_vals", "resources", "max_calls"])
"""FunctionProperties: A named tuple storing remote functions information."""
def get_cuda_visible_devices():
"""Get the device IDs in the CUDA_VISIBLE_DEVICES environment variable.
@ -169,3 +163,48 @@ def set_cuda_visible_devices(gpu_ids):
gpu_ids: This is a list of integers representing GPU IDs.
"""
os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(i) for i in gpu_ids])
def resources_from_resource_arguments(default_num_cpus, default_num_gpus,
default_resources, runtime_num_cpus,
runtime_num_gpus, runtime_resources):
"""Determine a task's resource requirements.
Args:
default_num_cpus: The default number of CPUs required by this function
or actor method.
default_num_gpus: The default number of GPUs required by this function
or actor method.
default_resources: The default custom resources required by this
function or actor method.
runtime_num_cpus: The number of CPUs requested when the task was
invoked.
runtime_num_gpus: The number of GPUs requested when the task was
invoked.
runtime_resources: The custom resources requested when the task was
invoked.
Returns:
A dictionary of the resource requirements for the task.
"""
if runtime_resources is not None:
resources = runtime_resources.copy()
elif default_resources is not None:
resources = default_resources.copy()
else:
resources = {}
if "CPU" in resources or "GPU" in resources:
raise ValueError("The resources dictionary must not "
"contain the key 'CPU' or 'GPU'")
assert default_num_cpus is not None
resources["CPU"] = (default_num_cpus
if runtime_num_cpus is None else runtime_num_cpus)
if runtime_num_gpus is not None:
resources["GPU"] = runtime_num_gpus
elif default_num_gpus is not None:
resources["GPU"] = default_num_gpus
return resources

View file

@ -5,7 +5,6 @@ from __future__ import print_function
import atexit
import collections
import colorama
import copy
import hashlib
import inspect
import json
@ -23,13 +22,13 @@ import pyarrow
import pyarrow.plasma as plasma
import ray.cloudpickle as pickle
import ray.experimental.state as state
import ray.remote_function
import ray.serialization as serialization
import ray.services as services
import ray.signature as signature
import ray.signature
import ray.local_scheduler
import ray.plasma
from ray.utils import (FunctionProperties, random_string, binary_to_hex,
is_cython)
from ray.utils import random_string, binary_to_hex, is_cython
# Import flatbuffer bindings.
from ray.core.generated.ClientTableData import ClientTableData
@ -63,9 +62,6 @@ PUT_RECONSTRUCTION_ERROR_TYPE = b"put_reconstruction"
# This must be kept in sync with the `scheduling_state` enum in common/task.h.
TASK_STATUS_RUNNING = 8
# Default resource requirements for remote functions.
DEFAULT_REMOTE_FUNCTION_CPUS = 1
DEFAULT_REMOTE_FUNCTION_GPUS = 0
# Default resource requirements for actors when no resource requirements are
# specified.
DEFAULT_ACTOR_METHOD_CPUS_SIMPLE_CASE = 1
@ -74,15 +70,6 @@ DEFAULT_ACTOR_CREATION_CPUS_SIMPLE_CASE = 0
# specified.
DEFAULT_ACTOR_METHOD_CPUS_SPECIFIED_CASE = 0
DEFAULT_ACTOR_CREATION_CPUS_SPECIFIED_CASE = 1
DEFAULT_ACTOR_CREATION_GPUS_SPECIFIED_CASE = 0
class FunctionID(object):
def __init__(self, function_id):
self.function_id = function_id
def id(self):
return self.function_id
class RayTaskError(Exception):
@ -182,6 +169,11 @@ class RayGetArgumentError(Exception):
self.task_error))
FunctionExecutionInfo = collections.namedtuple(
"FunctionExecutionInfo", ["function", "function_name", "max_calls"])
"""FunctionExecutionInfo: A named tuple storing remote function information."""
class Worker(object):
"""A class used to define the control flow of a worker process.
@ -190,9 +182,10 @@ class Worker(object):
functions outside of this class are considered exposed.
Attributes:
functions (Dict[str, Callable]): A dictionary mapping the name of a
remote function to the remote function itself. This is the set of
remote functions that can be executed by this worker.
function_execution_info (Dict[str, FunctionExecutionInfo]): A
dictionary mapping the name of a remote function to the remote
function itself. This is the set of remote functions that can be
executed by this worker.
connected (bool): True if Ray has been started and False otherwise.
mode: The mode of the worker. One of SCRIPT_MODE, PYTHON_MODE,
SILENT_MODE, and WORKER_MODE.
@ -208,20 +201,12 @@ class Worker(object):
def __init__(self):
"""Initialize a Worker object."""
# The functions field is a dictionary that maps a driver ID to a
# dictionary of functions that have been registered for that driver
# (this inner dictionary maps function IDs to a tuple of the function
# name and the function itself). This should only be used on workers
# that execute remote functions.
self.functions = collections.defaultdict(lambda: {})
# The function_properties field is a dictionary that maps a driver ID
# to a dictionary of functions that have been registered for that
# driver (this inner dictionary maps function IDs to a tuple of the
# number of values returned by that function, the number of CPUs
# required by that function, and the number of GPUs required by that
# function). This is used when submitting a function (which can be done
# both on workers and on drivers).
self.function_properties = collections.defaultdict(lambda: {})
# This field is a dictionary that maps a driver ID to a dictionary of
# functions (and information about those functions) that have been
# registered for that driver (this inner dictionary maps function IDs
# to a FunctionExecutionInfo object. This should only be used on
# workers that execute remote functions.
self.function_execution_info = collections.defaultdict(lambda: {})
# This is a dictionary mapping driver ID to a dictionary that maps
# remote function IDs for that driver to a counter of the number of
# times that remote function has been executed on this worker. The
@ -248,6 +233,16 @@ class Worker(object):
# CUDA_VISIBLE_DEVICES environment variable.
self.original_gpu_ids = ray.utils.get_cuda_visible_devices()
def check_connected(self):
"""Check if the worker is connected.
Raises:
Exception: An exception is raised if the worker is not connected.
"""
if not self.connected:
raise RayConnectionError("Ray has not been started yet. You can "
"start Ray with 'ray.init()'.")
def set_mode(self, mode):
"""Set the mode of the worker.
@ -356,7 +351,7 @@ class Worker(object):
full.
"""
# Make sure that the value is not an object ID.
if isinstance(value, ray.local_scheduler.ObjectID):
if isinstance(value, ray.ObjectID):
raise Exception("Calling 'put' on an ObjectID is not allowed "
"(similarly, returning an ObjectID from a remote "
"function is not allowed). If you really want to "
@ -438,7 +433,7 @@ class Worker(object):
"""
# Make sure that the values are object IDs.
for object_id in object_ids:
if not isinstance(object_id, ray.local_scheduler.ObjectID):
if not isinstance(object_id, ray.ObjectID):
raise Exception("Attempting to call `get` on the value {}, "
"which is not an ObjectID.".format(object_id))
# Do an initial fetch for remote objects. We divide the fetch into
@ -518,8 +513,6 @@ class Worker(object):
actor_creation_dummy_object_id=None,
execution_dependencies=None,
num_return_vals=None,
num_cpus=None,
num_gpus=None,
resources=None,
driver_id=None):
"""Submit a remote task to the scheduler.
@ -545,8 +538,6 @@ class Worker(object):
execution_dependencies: The execution dependencies for this task.
num_return_vals: The number of return values this function should
have.
num_cpus: The number of CPUs required by this task.
num_gpus: The number of GPUs required by this task.
resources: The resource requirements for this task.
driver_id: The ID of the relevant driver. This is almost always the
driver ID of the driver that is currently running. However, in
@ -561,24 +552,22 @@ class Worker(object):
check_main_thread()
if actor_id is None:
assert actor_handle_id is None
actor_id = ray.local_scheduler.ObjectID(NIL_ACTOR_ID)
actor_handle_id = ray.local_scheduler.ObjectID(
NIL_ACTOR_HANDLE_ID)
actor_id = ray.ObjectID(NIL_ACTOR_ID)
actor_handle_id = ray.ObjectID(NIL_ACTOR_HANDLE_ID)
else:
assert actor_handle_id is not None
if actor_creation_id is None:
actor_creation_id = ray.local_scheduler.ObjectID(NIL_ACTOR_ID)
actor_creation_id = ray.ObjectID(NIL_ACTOR_ID)
if actor_creation_dummy_object_id is None:
actor_creation_dummy_object_id = (
ray.local_scheduler.ObjectID(NIL_ID))
actor_creation_dummy_object_id = (ray.ObjectID(NIL_ID))
# Put large or complex arguments that are passed by value in the
# object store first.
args_for_local_scheduler = []
for arg in args:
if isinstance(arg, ray.local_scheduler.ObjectID):
if isinstance(arg, ray.ObjectID):
args_for_local_scheduler.append(arg)
elif ray.local_scheduler.check_simple_value(arg):
args_for_local_scheduler.append(arg)
@ -592,26 +581,12 @@ class Worker(object):
if driver_id is None:
driver_id = self.task_driver_id
# Look up the various function properties.
function_properties = self.function_properties[driver_id.id()][
function_id.id()]
if num_return_vals is None:
num_return_vals = function_properties.num_return_vals
if resources is None and num_cpus is None and num_gpus is None:
resources = function_properties.resources
else:
resources = {} if resources is None else resources
if "CPU" in resources or "GPU" in resources:
raise ValueError("The resources dictionary must not "
"contain the keys 'CPU' or 'GPU'")
resources["CPU"] = num_cpus
resources["GPU"] = num_gpus
if resources is None:
raise ValueError("The resources dictionary is required.")
# Submit the task to local scheduler.
task = ray.local_scheduler.Task(
driver_id, ray.local_scheduler.ObjectID(
driver_id, ray.ObjectID(
function_id.id()), args_for_local_scheduler,
num_return_vals, self.current_task_id, self.task_index,
actor_creation_id, actor_creation_dummy_object_id, actor_id,
@ -624,6 +599,55 @@ class Worker(object):
return task.returns()
def export_remote_function(self, function_id, function_name, function,
max_calls, decorated_function):
"""Export a remote function.
Args:
function_id: The ID of the function.
function_name: The name of the function.
function: The raw undecorated function to export.
max_calls: The maximum number of times a given worker can execute
this function before exiting.
decorated_function: The decorated function (this is used to enable
the remote function to recursively call itself).
"""
check_main_thread()
if self.mode not in [SCRIPT_MODE, SILENT_MODE]:
raise Exception("export_remote_function can only be called on a "
"driver.")
key = (b"RemoteFunction:" + self.task_driver_id.id() + b":" +
function_id.id())
# Work around limitations of Python pickling.
function_name_global_valid = function.__name__ in function.__globals__
function_name_global_value = function.__globals__.get(
function.__name__)
# Allow the function to reference itself as a global variable
if not is_cython(function):
function.__globals__[function.__name__] = decorated_function
try:
pickled_function = pickle.dumps(function)
finally:
# Undo our changes
if function_name_global_valid:
function.__globals__[function.__name__] = (
function_name_global_value)
else:
del function.__globals__[function.__name__]
self.redis_client.hmset(
key, {
"driver_id": self.task_driver_id.id(),
"function_id": function_id.id(),
"name": function_name,
"module": function.__module__,
"function": pickled_function,
"max_calls": max_calls
})
self.redis_client.rpush("Exports", key)
def run_function_on_all_workers(self, function):
"""Run arbitrary code on all of the workers.
@ -697,7 +721,8 @@ class Worker(object):
while True:
with self.lock:
if (self.actor_id == NIL_ACTOR_ID
and (function_id.id() in self.functions[driver_id])):
and (function_id.id() in
self.function_execution_info[driver_id])):
break
elif self.actor_id != NIL_ACTOR_ID and (
self.actor_id in self.actors):
@ -741,7 +766,7 @@ class Worker(object):
"""
arguments = []
for (i, arg) in enumerate(serialized_args):
if isinstance(arg, ray.local_scheduler.ObjectID):
if isinstance(arg, ray.ObjectID):
# get the object from the local object store
argument = self.get_object([arg])[0]
if isinstance(argument, RayTaskError):
@ -798,7 +823,6 @@ class Worker(object):
# message to the correct driver.
self.task_driver_id = task.driver_id()
self.current_task_id = task.task_id()
self.current_function_id = task.function_id().id()
self.task_index = 0
self.put_index = 1
function_id = task.function_id()
@ -806,8 +830,10 @@ class Worker(object):
return_object_ids = task.returns()
if task.actor_id().id() != NIL_ACTOR_ID:
dummy_return_id = return_object_ids.pop()
function_name, function_executor = (
self.functions[self.task_driver_id.id()][function_id.id()])
function_executor = self.function_execution_info[
self.task_driver_id.id()][function_id.id()].function
function_name = self.function_execution_info[self.task_driver_id.id()][
function_id.id()].function_name
# Get task arguments from the object store.
try:
@ -829,7 +855,7 @@ class Worker(object):
try:
with log_span("ray:task:execute", worker=self):
if task.actor_id().id() == NIL_ACTOR_ID:
outputs = function_executor.executor(arguments)
outputs = function_executor(*arguments)
else:
outputs = function_executor(
dummy_return_id, self.actors[task.actor_id().id()],
@ -862,8 +888,8 @@ class Worker(object):
def _handle_process_task_failure(self, function_id, return_object_ids,
error, backtrace):
function_name, _ = self.functions[self.task_driver_id.id()][
function_id.id()]
function_name = self.function_execution_info[self.task_driver_id.id()][
function_id.id()].function_name
failure_object = RayTaskError(function_name, error, backtrace)
failure_objects = [
failure_object for _ in range(len(return_object_ids))
@ -902,7 +928,7 @@ class Worker(object):
time.sleep(0.001)
with self.lock:
self.fetch_and_register_actor(key, task.required_resources(), self)
self.fetch_and_register_actor(key, self)
def _wait_for_and_process_task(self, task):
"""Wait for a task to be ready and process the task.
@ -911,11 +937,11 @@ class Worker(object):
task: The task to execute.
"""
function_id = task.function_id()
driver_id = task.driver_id().id()
# TODO(rkn): It would be preferable for actor creation tasks to share
# more of the code path with regular task execution.
if (task.actor_creation_id() !=
ray.local_scheduler.ObjectID(NIL_ACTOR_ID)):
if (task.actor_creation_id() != ray.ObjectID(NIL_ACTOR_ID)):
self._become_actor(task)
return
@ -923,7 +949,7 @@ class Worker(object):
# on this worker. We will push warnings to the user if we spend too
# long in this loop.
with log_span("ray:wait_for_function", worker=self):
self._wait_for_function(function_id, task.driver_id().id())
self._wait_for_function(function_id, driver_id)
# Execute the task.
# TODO(rkn): Consider acquiring this lock with a timeout and pushing a
@ -934,8 +960,8 @@ class Worker(object):
with self.lock:
log(event_type="ray:acquire_lock", kind=LOG_SPAN_END, worker=self)
function_name, _ = (
self.functions[task.driver_id().id()][function_id.id()])
function_name = (self.function_execution_info[driver_id][
function_id.id()]).function_name
contents = {
"function_name": function_name,
"task_id": task.task_id().hex(),
@ -948,14 +974,13 @@ class Worker(object):
flush_log()
# Increase the task execution counter.
(self.num_task_executions[task.driver_id().id()][function_id.id()]
) += 1
self.num_task_executions[driver_id][function_id.id()] += 1
reached_max_executions = (self.num_task_executions[task.driver_id().id(
)][function_id.id()] == self.function_properties[task.driver_id().id()]
[function_id.id()].max_calls)
reached_max_executions = (
self.num_task_executions[driver_id][function_id.id()] == self.
function_execution_info[driver_id][function_id.id()].max_calls)
if reached_max_executions:
ray.worker.global_worker.local_scheduler_client.disconnect()
self.local_scheduler_client.disconnect()
os._exit(0)
def _get_next_task_from_local_scheduler(self):
@ -1069,18 +1094,6 @@ def check_main_thread():
.format(threading.current_thread().getName()))
def check_connected(worker=global_worker):
"""Check if the worker is connected.
Raises:
Exception: An exception is raised if the worker is not connected.
"""
if not worker.connected:
raise RayConnectionError("This command cannot be called before Ray "
"has been started. You can start Ray with "
"'ray.init()'.")
def print_failed_task(task_status):
"""Print information about failed tasks.
@ -1114,7 +1127,7 @@ def error_applies_to_driver(error_key, worker=global_worker):
def error_info(worker=global_worker):
"""Return information about failed tasks."""
check_connected(worker)
worker.check_connected()
check_main_thread()
error_keys = worker.redis_client.lrange("ErrorKeys", 0, -1)
errors = []
@ -1143,13 +1156,13 @@ def _initialize_serialization(worker=global_worker):
return obj.id()
def object_id_custom_deserializer(serialized_obj):
return ray.local_scheduler.ObjectID(serialized_obj)
return ray.ObjectID(serialized_obj)
# We register this serializer on each worker instead of calling
# register_custom_serializer from the driver so that isinstance still
# works.
worker.serialization_context.register_type(
ray.local_scheduler.ObjectID,
ray.ObjectID,
"ray.ObjectID",
pickle=False,
custom_serializer=object_id_custom_serializer,
@ -1786,12 +1799,9 @@ def fetch_and_register_remote_function(key, worker=global_worker):
"driver_id", "function_id", "name", "function", "num_return_vals",
"module", "resources", "max_calls"
])
function_id = ray.local_scheduler.ObjectID(function_id_str)
function_id = ray.ObjectID(function_id_str)
function_name = function_name.decode("ascii")
function_properties = FunctionProperties(
num_return_vals=int(num_return_vals),
resources=json.loads(resources.decode("ascii")),
max_calls=int(max_calls))
max_calls = int(max_calls)
module = module.decode("ascii")
# This is a placeholder in case the function can't be unpickled. This will
@ -1799,11 +1809,9 @@ def fetch_and_register_remote_function(key, worker=global_worker):
def f():
raise Exception("This function was not imported properly.")
remote_f_placeholder = remote(function_id=function_id)(lambda *xs: f())
worker.functions[driver_id][function_id.id()] = (function_name,
remote_f_placeholder)
worker.function_properties[driver_id][function_id.id()] = (
function_properties)
worker.function_execution_info[driver_id][function_id.id()] = (
FunctionExecutionInfo(
function=f, function_name=function_name, max_calls=max_calls))
worker.num_task_executions[driver_id][function_id.id()] = 0
try:
@ -1825,8 +1833,11 @@ def fetch_and_register_remote_function(key, worker=global_worker):
else:
# TODO(rkn): Why is the below line necessary?
function.__module__ = module
worker.functions[driver_id][function_id.id()] = (
function_name, remote(function_id=function_id)(function))
worker.function_execution_info[driver_id][function_id.id()] = (
FunctionExecutionInfo(
function=function,
function_name=function_name,
max_calls=max_calls))
# Add the function to the function table.
worker.redis_client.rpush(b"FunctionTable:" + function_id.id(),
worker.worker_id)
@ -1973,6 +1984,14 @@ def connect(info,
assert worker.cached_remote_functions_and_actors is not None, error_message
# Initialize some fields.
worker.worker_id = random_string()
# When tasks are executed on remote workers in the context of multiple
# drivers, the task driver ID is used to keep track of which driver is
# responsible for the task so that error messages will be propagated to
# the correct driver.
if mode != WORKER_MODE:
worker.task_driver_id = ray.ObjectID(worker.worker_id)
# All workers start out as non-actors. A worker can be turned into an actor
# after it is created.
worker.actor_id = NIL_ACTOR_ID
@ -2102,13 +2121,7 @@ def connect(info,
else:
# Try to use true randomness.
np.random.seed(None)
worker.current_task_id = ray.local_scheduler.ObjectID(
np.random.bytes(20))
# When tasks are executed on remote workers in the context of multiple
# drivers, the task driver ID is used to keep track of which driver is
# responsible for the task so that error messages will be propagated to
# the correct driver.
worker.task_driver_id = ray.local_scheduler.ObjectID(worker.worker_id)
worker.current_task_id = ray.ObjectID(np.random.bytes(20))
# Reset the state of the numpy random number generator.
np.random.set_state(numpy_state)
# Set other fields needed for computing task IDs.
@ -2124,14 +2137,11 @@ def connect(info,
nil_actor_counter = 0
driver_task = ray.local_scheduler.Task(
worker.task_driver_id,
ray.local_scheduler.ObjectID(NIL_FUNCTION_ID), [], 0,
worker.task_driver_id, ray.ObjectID(NIL_FUNCTION_ID), [], 0,
worker.current_task_id, worker.task_index,
ray.local_scheduler.ObjectID(NIL_ACTOR_ID),
ray.local_scheduler.ObjectID(NIL_ACTOR_ID),
ray.local_scheduler.ObjectID(NIL_ACTOR_ID),
ray.local_scheduler.ObjectID(NIL_ACTOR_ID), nil_actor_counter,
False, [], {"CPU": 0}, worker.use_raylet)
ray.ObjectID(NIL_ACTOR_ID), ray.ObjectID(NIL_ACTOR_ID),
ray.ObjectID(NIL_ACTOR_ID), ray.ObjectID(NIL_ACTOR_ID),
nil_actor_counter, False, [], {"CPU": 0}, worker.use_raylet)
global_state._execute_command(
driver_task.task_id(), "RAY.TASK_TABLE_ADD",
driver_task.task_id().id(),
@ -2194,11 +2204,7 @@ def connect(info,
# Export cached remote functions to the workers.
for cached_type, info in worker.cached_remote_functions_and_actors:
if cached_type == "remote_function":
(function_id, func_name, func, func_invoker,
function_properties) = info
export_remote_function(function_id, func_name, func,
func_invoker, function_properties,
worker)
info._export()
elif cached_type == "actor":
(key, actor_class_info) = info
ray.actor.publish_actor_class_to_key(key, actor_class_info,
@ -2450,7 +2456,7 @@ def get(object_ids, worker=global_worker):
Returns:
A Python object or a list of Python objects.
"""
check_connected(worker)
worker.check_connected()
with log_span("ray:get", worker=worker):
check_main_thread()
@ -2483,7 +2489,7 @@ def put(value, worker=global_worker):
Returns:
The object ID assigned to this value.
"""
check_connected(worker)
worker.check_connected()
with log_span("ray:put", worker=worker):
check_main_thread()
@ -2524,7 +2530,7 @@ def wait(object_ids, num_returns=1, timeout=None, worker=global_worker):
print("plasma_client.wait has not been implemented yet")
return
if isinstance(object_ids, ray.local_scheduler.ObjectID):
if isinstance(object_ids, ray.ObjectID):
raise TypeError(
"wait() expected a list of ObjectID, got a single ObjectID")
@ -2534,12 +2540,12 @@ def wait(object_ids, num_returns=1, timeout=None, worker=global_worker):
if worker.mode != PYTHON_MODE:
for object_id in object_ids:
if not isinstance(object_id, ray.local_scheduler.ObjectID):
if not isinstance(object_id, ray.ObjectID):
raise TypeError("wait() expected a list of ObjectID, "
"got list containing {}".format(
type(object_id)))
check_connected(worker)
worker.check_connected()
with log_span("ray:wait", worker=worker):
check_main_thread()
@ -2561,27 +2567,14 @@ def wait(object_ids, num_returns=1, timeout=None, worker=global_worker):
ready_ids, remaining_ids = worker.plasma_client.wait(
object_id_strs, timeout, num_returns)
ready_ids = [
ray.local_scheduler.ObjectID(object_id.binary())
for object_id in ready_ids
ray.ObjectID(object_id.binary()) for object_id in ready_ids
]
remaining_ids = [
ray.local_scheduler.ObjectID(object_id.binary())
for object_id in remaining_ids
ray.ObjectID(object_id.binary()) for object_id in remaining_ids
]
return ready_ids, remaining_ids
def _submit_task(function_id, *args, **kwargs):
"""This is a wrapper around worker.submit_task.
We use this wrapper so that in the remote decorator, we can call
_submit_task instead of worker.submit_task. The difference is that when we
attempt to serialize remote functions, we don't attempt to serialize the
worker object, which cannot be serialized.
"""
return global_worker.submit_task(function_id, *args, **kwargs)
def _mode(worker=global_worker):
"""This is a wrapper around worker.mode.
@ -2593,278 +2586,104 @@ def _mode(worker=global_worker):
return worker.mode
def export_remote_function(function_id,
func_name,
func,
func_invoker,
function_properties,
worker=global_worker):
check_main_thread()
if _mode(worker) not in [SCRIPT_MODE, SILENT_MODE]:
raise Exception("export_remote_function can only be called on a "
"driver.")
worker.function_properties[worker.task_driver_id.id()][
function_id.id()] = function_properties
task_driver_id = worker.task_driver_id
key = b"RemoteFunction:" + task_driver_id.id() + b":" + function_id.id()
# Work around limitations of Python pickling.
func_name_global_valid = func.__name__ in func.__globals__
func_name_global_value = func.__globals__.get(func.__name__)
# Allow the function to reference itself as a global variable
if not is_cython(func):
func.__globals__[func.__name__] = func_invoker
try:
pickled_func = pickle.dumps(func)
finally:
# Undo our changes
if func_name_global_valid:
func.__globals__[func.__name__] = func_name_global_value
else:
del func.__globals__[func.__name__]
worker.redis_client.hmset(
key, {
"driver_id": worker.task_driver_id.id(),
"function_id": function_id.id(),
"name": func_name,
"module": func.__module__,
"function": pickled_func,
"num_return_vals": function_properties.num_return_vals,
"resources": json.dumps(function_properties.resources),
"max_calls": function_properties.max_calls
})
worker.redis_client.rpush("Exports", key)
def get_global_worker():
return global_worker
def in_ipython():
"""Return true if we are in an IPython interpreter and false otherwise."""
try:
__IPYTHON__
return True
except NameError:
return False
def make_decorator(num_return_vals=None,
num_cpus=None,
num_gpus=None,
resources=None,
max_calls=None,
checkpoint_interval=None,
worker=None):
def decorator(function_or_class):
if (inspect.isfunction(function_or_class)
or is_cython(function_or_class)):
# Set the remote function default resources.
if checkpoint_interval is not None:
raise Exception("The keyword 'checkpoint_interval' is not "
"allowed for remote functions.")
return ray.remote_function.RemoteFunction(
function_or_class, num_cpus, num_gpus, resources,
num_return_vals, max_calls)
def compute_function_id(func_name, func):
"""Compute an function ID for a function.
if inspect.isclass(function_or_class):
if num_return_vals is not None:
raise Exception("The keyword 'num_return_vals' is not allowed "
"for actors.")
if max_calls is not None:
raise Exception("The keyword 'max_calls' is not allowed for "
"actors.")
Args:
func_name: The name of the function (this includes the module name plus
the function name).
func: The actual function.
# Set the actor default resources.
if num_cpus is None and num_gpus is None and resources is None:
# In the default case, actors acquire no resources for
# their lifetime, and actor methods will require 1 CPU.
cpus_to_use = DEFAULT_ACTOR_CREATION_CPUS_SIMPLE_CASE
actor_method_cpus = DEFAULT_ACTOR_METHOD_CPUS_SIMPLE_CASE
else:
# If any resources are specified, then all resources are
# acquired for the actor's lifetime and no resources are
# associated with methods.
cpus_to_use = (DEFAULT_ACTOR_CREATION_CPUS_SPECIFIED_CASE
if num_cpus is None else num_cpus)
actor_method_cpus = DEFAULT_ACTOR_METHOD_CPUS_SPECIFIED_CASE
Returns:
This returns the function ID.
"""
function_id_hash = hashlib.sha1()
# Include the function name in the hash.
function_id_hash.update(func_name.encode("ascii"))
# If we are running a script or are in IPython, include the source code in
# the hash. If we are in a regular Python interpreter we skip this part
# because the source code is not accessible. If the function is a built-in
# (e.g., Cython), the source code is not accessible.
import __main__ as main
if (hasattr(main, "__file__") or in_ipython()) \
and inspect.isfunction(func):
function_id_hash.update(inspect.getsource(func).encode("ascii"))
# Compute the function ID.
function_id = function_id_hash.digest()
assert len(function_id) == 20
function_id = FunctionID(function_id)
return worker.make_actor(function_or_class, cpus_to_use, num_gpus,
resources, actor_method_cpus,
checkpoint_interval)
return function_id
raise Exception("The @ray.remote decorator must be applied to "
"either a function or to a class.")
return decorator
def remote(*args, **kwargs):
"""This decorator is used to define remote functions and to define actors.
Args:
num_return_vals (int): The number of object IDs that a call to this
function should return.
num_cpus (int): The number of CPUs needed to execute this function.
num_gpus (int): The number of GPUs needed to execute this function.
resources: A dictionary mapping resource name to the required quantity
of that resource.
max_calls (int): The maximum number of tasks of this kind that can be
run on a worker before the worker needs to be restarted.
checkpoint_interval (int): The number of tasks to run between
checkpoints of the actor state.
"""
worker = global_worker
def make_remote_decorator(num_return_vals,
num_cpus,
num_gpus,
resources,
max_calls,
checkpoint_interval,
func_id=None):
def remote_decorator(func_or_class):
if inspect.isfunction(func_or_class) or is_cython(func_or_class):
# Set the remote function default resources.
resources["CPU"] = (DEFAULT_REMOTE_FUNCTION_CPUS
if num_cpus is None else num_cpus)
resources["GPU"] = (DEFAULT_REMOTE_FUNCTION_GPUS
if num_gpus is None else num_gpus)
function_properties = FunctionProperties(
num_return_vals=num_return_vals,
resources=resources,
max_calls=max_calls)
return remote_function_decorator(func_or_class,
function_properties)
if inspect.isclass(func_or_class):
# Set the actor default resources.
if num_cpus is None and num_gpus is None and resources == {}:
# In the default case, actors acquire no resources for
# their lifetime, and actor methods will require 1 CPU.
resources["CPU"] = DEFAULT_ACTOR_CREATION_CPUS_SIMPLE_CASE
actor_method_cpus = DEFAULT_ACTOR_METHOD_CPUS_SIMPLE_CASE
else:
# If any resources are specified, then all resources are
# acquired for the actor's lifetime and no resources are
# associated with methods.
resources["CPU"] = (
DEFAULT_ACTOR_CREATION_CPUS_SPECIFIED_CASE
if num_cpus is None else num_cpus)
resources["GPU"] = (
DEFAULT_ACTOR_CREATION_GPUS_SPECIFIED_CASE
if num_gpus is None else num_gpus)
actor_method_cpus = (
DEFAULT_ACTOR_METHOD_CPUS_SPECIFIED_CASE)
return worker.make_actor(func_or_class, resources,
checkpoint_interval,
actor_method_cpus)
raise Exception("The @ray.remote decorator must be applied to "
"either a function or to a class.")
def remote_function_decorator(func, function_properties):
func_name = "{}.{}".format(func.__module__, func.__name__)
if func_id is None:
function_id = compute_function_id(func_name, func)
else:
function_id = func_id
def func_call(*args, **kwargs):
"""This runs immediately when a remote function is called."""
return _submit(args=args, kwargs=kwargs)
def _submit(args=None,
kwargs=None,
num_return_vals=None,
num_cpus=None,
num_gpus=None,
resources=None):
"""An experimental alternate way to submit remote functions."""
check_connected()
check_main_thread()
kwargs = {} if kwargs is None else kwargs
args = signature.extend_args(function_signature, args, kwargs)
if _mode() == PYTHON_MODE:
# In PYTHON_MODE, remote calls simply execute the function.
# We copy the arguments to prevent the function call from
# mutating them and to match the usual behavior of
# immutable remote objects.
result = func(*copy.deepcopy(args))
return result
object_ids = _submit_task(
function_id,
args,
num_return_vals=num_return_vals,
num_cpus=num_cpus,
num_gpus=num_gpus,
resources=resources)
if len(object_ids) == 1:
return object_ids[0]
elif len(object_ids) > 1:
return object_ids
def func_executor(arguments):
"""This gets run when the remote function is executed."""
result = func(*arguments)
return result
def func_invoker(*args, **kwargs):
"""This is used to invoke the function."""
raise Exception("Remote functions cannot be called directly. "
"Instead of running '{}()', try '{}.remote()'."
.format(func_name, func_name))
func_invoker.remote = func_call
func_invoker._submit = _submit
func_invoker.executor = func_executor
func_invoker.is_remote = True
func_name = "{}.{}".format(func.__module__, func.__name__)
func_invoker.func_name = func_name
if sys.version_info >= (3, 0) or is_cython(func):
func_invoker.__doc__ = func.__doc__
else:
func_invoker.func_doc = func.func_doc
signature.check_signature_supported(func)
function_signature = signature.extract_signature(func)
# Everything ready - export the function
if worker.mode in [SCRIPT_MODE, SILENT_MODE]:
export_remote_function(function_id, func_name, func,
func_invoker, function_properties)
elif worker.mode is None:
worker.cached_remote_functions_and_actors.append(
("remote_function", (function_id, func_name, func,
func_invoker, function_properties)))
return func_invoker
return remote_decorator
# Handle resource arguments
num_cpus = kwargs["num_cpus"] if "num_cpus" in kwargs else None
num_gpus = kwargs["num_gpus"] if "num_gpus" in kwargs else None
resources = kwargs.get("resources", {})
if not isinstance(resources, dict):
raise Exception("The 'resources' keyword argument must be a "
"dictionary, but received type {}.".format(
type(resources)))
assert "CPU" not in resources, "Use the 'num_cpus' argument."
assert "GPU" not in resources, "Use the 'num_gpus' argument."
# Handle other arguments.
num_return_vals = (kwargs["num_return_vals"]
if "num_return_vals" in kwargs else 1)
max_calls = kwargs["max_calls"] if "max_calls" in kwargs else 0
checkpoint_interval = (kwargs["checkpoint_interval"]
if "checkpoint_interval" in kwargs else -1)
if _mode() == WORKER_MODE:
if "function_id" in kwargs:
function_id = kwargs["function_id"]
return make_remote_decorator(num_return_vals, num_cpus, num_gpus,
resources, max_calls,
checkpoint_interval, function_id)
worker = get_global_worker()
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
# This is the case where the decorator is just @ray.remote.
return make_remote_decorator(num_return_vals, num_cpus, num_gpus,
resources, max_calls,
checkpoint_interval)(args[0])
else:
# This is the case where the decorator is something like
# @ray.remote(num_return_vals=2).
error_string = ("The @ray.remote decorator must be applied either "
"with no arguments and no parentheses, for example "
"'@ray.remote', or it must be applied using some of "
"the arguments 'num_return_vals', 'resources', "
"or 'max_calls', like "
"'@ray.remote(num_return_vals=2, "
"resources={\"GPU\": 1})'.")
assert len(args) == 0 and len(kwargs) > 0, error_string
for key in kwargs:
assert key in [
"num_return_vals", "num_cpus", "num_gpus", "resources",
"max_calls", "checkpoint_interval"
], error_string
assert "function_id" not in kwargs
return make_remote_decorator(num_return_vals, num_cpus, num_gpus,
resources, max_calls, checkpoint_interval)
return make_decorator(worker=worker)(args[0])
# Parse the keyword arguments from the decorator.
error_string = ("The @ray.remote decorator must be applied either "
"with no arguments and no parentheses, for example "
"'@ray.remote', or it must be applied using some of "
"the arguments 'num_return_vals', 'num_cpus', 'num_gpus', "
"'resources', 'max_calls', or 'checkpoint_interval', like "
"'@ray.remote(num_return_vals=2, "
"resources={\"CustomResource\": 1})'.")
assert len(args) == 0 and len(kwargs) > 0, error_string
for key in kwargs:
assert key in [
"num_return_vals", "num_cpus", "num_gpus", "resources",
"max_calls", "checkpoint_interval"
], error_string
num_cpus = kwargs["num_cpus"] if "num_cpus" in kwargs else None
num_gpus = kwargs["num_gpus"] if "num_gpus" in kwargs else None
resources = kwargs.get("resources")
if not isinstance(resources, dict) and resources is not None:
raise Exception("The 'resources' keyword argument must be a "
"dictionary, but received type {}.".format(
type(resources)))
if resources is not None:
assert "CPU" not in resources, "Use the 'num_cpus' argument."
assert "GPU" not in resources, "Use the 'num_gpus' argument."
# Handle other arguments.
num_return_vals = kwargs.get("num_return_vals")
max_calls = kwargs.get("max_calls")
checkpoint_interval = kwargs.get("checkpoint_interval")
return make_decorator(
num_return_vals=num_return_vals,
num_cpus=num_cpus,
num_gpus=num_gpus,
resources=resources,
max_calls=max_calls,
checkpoint_interval=checkpoint_interval,
worker=worker)

View file

@ -1724,7 +1724,7 @@ class WorkerPoolTests(unittest.TestCase):
os.environ.get("RAY_USE_XRAY") == "1",
"This test does not work with xray yet.")
def testBlockingTasks(self):
ray.init(num_workers=1)
ray.init(num_cpus=1)
@ray.remote
def f(i, j):
@ -1734,20 +1734,20 @@ class WorkerPoolTests(unittest.TestCase):
def g(i):
# Each instance of g submits and blocks on the result of another
# remote task.
object_ids = [f.remote(i, j) for j in range(10)]
object_ids = [f.remote(i, j) for j in range(2)]
return ray.get(object_ids)
ray.get([g.remote(i) for i in range(100)])
ray.get([g.remote(i) for i in range(4)])
@ray.remote
def _sleep(i):
time.sleep(1)
time.sleep(0.01)
return (i)
@ray.remote
def sleep():
# Each instance of sleep submits and blocks on the result of
# another remote task, which takes one second to execute.
# another remote task, which takes some time to execute.
ray.get([_sleep.remote(i) for i in range(10)])
ray.get(sleep.remote())