ray/test/failure_test.py
Robert Nishihara ab8c3432f7 Add driver ID to task spec and add driver ID to Python error handling. (#225)
* Add driver ID to task spec and add driver ID to Python error handling.

* Make constants global variables.

* Add test for error isolation.
2017-01-25 22:53:48 -08:00

169 lines
5.4 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import ray
import sys
import time
if sys.version_info >= (3, 0):
from importlib import reload
import ray.test.test_functions as test_functions
def relevant_errors(error_type):
return [info for info in ray.error_info() if info[b"type"] == error_type]
def wait_for_errors(error_type, num_errors, timeout=10):
start_time = time.time()
while time.time() - start_time < timeout:
if len(relevant_errors(error_type)) >= num_errors:
return
time.sleep(0.1)
print("Timing out of wait.")
class FailureTest(unittest.TestCase):
def testUnknownSerialization(self):
reload(test_functions)
ray.init(num_workers=1, driver_mode=ray.SILENT_MODE)
test_functions.test_unknown_type.remote()
wait_for_errors(b"task", 1)
error_info = ray.error_info()
self.assertEqual(len(relevant_errors(b"task")), 1)
ray.worker.cleanup()
class TaskSerializationTest(unittest.TestCase):
def testReturnAndPassUnknownType(self):
ray.init(num_workers=1, driver_mode=ray.SILENT_MODE)
class Foo(object):
pass
# Check that returning an unknown type from a remote function raises an
# exception.
@ray.remote
def f():
return Foo()
self.assertRaises(Exception, lambda : ray.get(f.remote()))
# Check that passing an unknown type into a remote function raises an
# exception.
@ray.remote
def g(x):
return 1
self.assertRaises(Exception, lambda : g.remote(Foo()))
ray.worker.cleanup()
class TaskStatusTest(unittest.TestCase):
def testFailedTask(self):
reload(test_functions)
ray.init(num_workers=3, driver_mode=ray.SILENT_MODE)
test_functions.throw_exception_fct1.remote()
test_functions.throw_exception_fct1.remote()
wait_for_errors(b"task", 2)
result = ray.error_info()
self.assertEqual(len(relevant_errors(b"task")), 2)
for task in relevant_errors(b"task"):
self.assertTrue(b"Test function 1 intentionally failed." in task.get(b"message"))
x = test_functions.throw_exception_fct2.remote()
try:
ray.get(x)
except Exception as e:
self.assertTrue("Test function 2 intentionally failed." in str(e))
else:
self.assertTrue(False) # ray.get should throw an exception
x, y, z = test_functions.throw_exception_fct3.remote(1.0)
for ref in [x, y, z]:
try:
ray.get(ref)
except Exception as e:
self.assertTrue("Test function 3 intentionally failed." in str(e))
else:
self.assertTrue(False) # ray.get should throw an exception
ray.worker.cleanup()
def testFailImportingRemoteFunction(self):
ray.init(num_workers=2, driver_mode=ray.SILENT_MODE)
# This example is somewhat contrived. It should be successfully pickled, and
# then it should throw an exception when it is unpickled. This may depend a
# bit on the specifics of our pickler.
def reducer(*args):
raise Exception("There is a problem here.")
class Foo(object):
def __init__(self):
self.__name__ = "Foo_object"
self.func_doc = ""
self.__globals__ = {}
def __reduce__(self):
return reducer, ()
def __call__(self):
return
f = ray.remote(Foo())
wait_for_errors(b"register_remote_function", 2)
self.assertTrue(b"There is a problem here." in ray.error_info()[0][b"message"])
# Check that if we try to call the function it throws an exception and does
# not hang.
for _ in range(10):
self.assertRaises(Exception, lambda : ray.get(f.remote()))
ray.worker.cleanup()
def testFailImportingEnvironmentVariable(self):
ray.init(num_workers=2, driver_mode=ray.SILENT_MODE)
# This will throw an exception when the environment variable is imported on
# the workers.
def initializer():
if ray.worker.global_worker.mode == ray.WORKER_MODE:
raise Exception("The initializer failed.")
return 0
ray.env.foo = ray.EnvironmentVariable(initializer)
wait_for_errors(b"register_environment_variable", 2)
# Check that the error message is in the task info.
self.assertTrue(b"The initializer failed." in ray.error_info()[0][b"message"])
ray.worker.cleanup()
def testFailReinitializingVariable(self):
ray.init(num_workers=2, driver_mode=ray.SILENT_MODE)
def initializer():
return 0
def reinitializer(foo):
raise Exception("The reinitializer failed.")
ray.env.foo = ray.EnvironmentVariable(initializer, reinitializer)
@ray.remote
def use_foo():
ray.env.foo
use_foo.remote()
wait_for_errors(b"reinitialize_environment_variable", 1)
# Check that the error message is in the task info.
self.assertTrue(b"The reinitializer failed." in ray.error_info()[0][b"message"])
ray.worker.cleanup()
def testFailedFunctionToRun(self):
ray.init(num_workers=2, driver_mode=ray.SILENT_MODE)
def f(worker):
if ray.worker.global_worker.mode == ray.WORKER_MODE:
raise Exception("Function to run failed.")
ray.worker.global_worker.run_function_on_all_workers(f)
wait_for_errors(b"function_to_run", 2)
# Check that the error message is in the task info.
self.assertEqual(len(ray.error_info()), 2)
self.assertTrue(b"Function to run failed." in ray.error_info()[0][b"message"])
self.assertTrue(b"Function to run failed." in ray.error_info()[1][b"message"])
ray.worker.cleanup()
if __name__ == "__main__":
unittest.main(verbosity=2)