ray/test/component_failures_test.py
Stephanie Wang 2b8e6485e3 Start and clean up workers from the local scheduler. (#250)
* Start and clean up workers from the local scheduler

Ability to kill workers in photon scheduler

Test for old method of starting workers

Common codepath for killing workers

Common codepath for killing workers

Photon test case for starting and killing workers

fix build

Fix component failure test

Register a worker's pid as part of initial connection

Address comments and revert photon_connect

Set PATH during travis install

Fix

* Fix photon test case to accept clients on plasma manager fd
2017-02-10 12:46:23 -08:00

74 lines
2.3 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import ray
import sys
import time
import unittest
class ComponentFailureTest(unittest.TestCase):
# This test checks that when a worker dies in the middle of a get, the plasma
# store and manager will not die.
def testDyingWorkerGet(self):
obj_id = 20 * b"a"
@ray.remote
def f():
ray.worker.global_worker.plasma_client.get(obj_id)
ray.worker._init(num_workers=1,
driver_mode=ray.SILENT_MODE,
start_workers_from_local_scheduler=False,
start_ray_local=True)
# Have the worker wait in a get call.
f.remote()
# Kill the worker.
time.sleep(1)
ray.services.all_processes[ray.services.PROCESS_TYPE_WORKER][0].terminate()
time.sleep(0.1)
# Seal the object so the store attempts to notify the worker that the get
# has been fulfilled.
ray.worker.global_worker.plasma_client.create(obj_id, 100)
ray.worker.global_worker.plasma_client.seal(obj_id)
time.sleep(0.1)
# Make sure that nothing has died.
self.assertTrue(ray.services.all_processes_alive(exclude=[ray.services.PROCESS_TYPE_WORKER]))
ray.worker.cleanup()
# This test checks that when a worker dies in the middle of a wait, the plasma
# store and manager will not die.
def testDyingWorkerWait(self):
obj_id = 20 * b"a"
@ray.remote
def f():
ray.worker.global_worker.plasma_client.wait([obj_id])
ray.worker._init(num_workers=1,
driver_mode=ray.SILENT_MODE,
start_workers_from_local_scheduler=False,
start_ray_local=True)
# Have the worker wait in a get call.
f.remote()
# Kill the worker.
time.sleep(1)
ray.services.all_processes[ray.services.PROCESS_TYPE_WORKER][0].terminate()
time.sleep(0.1)
# Seal the object so the store attempts to notify the worker that the get
# has been fulfilled.
ray.worker.global_worker.plasma_client.create(obj_id, 100)
ray.worker.global_worker.plasma_client.seal(obj_id)
time.sleep(0.1)
# Make sure that nothing has died.
self.assertTrue(ray.services.all_processes_alive(exclude=[ray.services.PROCESS_TYPE_WORKER]))
ray.worker.cleanup()
if __name__ == "__main__":
unittest.main(verbosity=2)