mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00

* Start and clean up workers from the local scheduler Ability to kill workers in photon scheduler Test for old method of starting workers Common codepath for killing workers Common codepath for killing workers Photon test case for starting and killing workers fix build Fix component failure test Register a worker's pid as part of initial connection Address comments and revert photon_connect Set PATH during travis install Fix * Fix photon test case to accept clients on plasma manager fd
74 lines
2.3 KiB
Python
74 lines
2.3 KiB
Python
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import ray
|
|
import sys
|
|
import time
|
|
import unittest
|
|
|
|
class ComponentFailureTest(unittest.TestCase):
|
|
# This test checks that when a worker dies in the middle of a get, the plasma
|
|
# store and manager will not die.
|
|
def testDyingWorkerGet(self):
|
|
obj_id = 20 * b"a"
|
|
@ray.remote
|
|
def f():
|
|
ray.worker.global_worker.plasma_client.get(obj_id)
|
|
|
|
ray.worker._init(num_workers=1,
|
|
driver_mode=ray.SILENT_MODE,
|
|
start_workers_from_local_scheduler=False,
|
|
start_ray_local=True)
|
|
|
|
# Have the worker wait in a get call.
|
|
f.remote()
|
|
|
|
# Kill the worker.
|
|
time.sleep(1)
|
|
ray.services.all_processes[ray.services.PROCESS_TYPE_WORKER][0].terminate()
|
|
time.sleep(0.1)
|
|
|
|
# Seal the object so the store attempts to notify the worker that the get
|
|
# has been fulfilled.
|
|
ray.worker.global_worker.plasma_client.create(obj_id, 100)
|
|
ray.worker.global_worker.plasma_client.seal(obj_id)
|
|
time.sleep(0.1)
|
|
|
|
# Make sure that nothing has died.
|
|
self.assertTrue(ray.services.all_processes_alive(exclude=[ray.services.PROCESS_TYPE_WORKER]))
|
|
ray.worker.cleanup()
|
|
|
|
# This test checks that when a worker dies in the middle of a wait, the plasma
|
|
# store and manager will not die.
|
|
def testDyingWorkerWait(self):
|
|
obj_id = 20 * b"a"
|
|
@ray.remote
|
|
def f():
|
|
ray.worker.global_worker.plasma_client.wait([obj_id])
|
|
|
|
ray.worker._init(num_workers=1,
|
|
driver_mode=ray.SILENT_MODE,
|
|
start_workers_from_local_scheduler=False,
|
|
start_ray_local=True)
|
|
|
|
# Have the worker wait in a get call.
|
|
f.remote()
|
|
|
|
# Kill the worker.
|
|
time.sleep(1)
|
|
ray.services.all_processes[ray.services.PROCESS_TYPE_WORKER][0].terminate()
|
|
time.sleep(0.1)
|
|
|
|
# Seal the object so the store attempts to notify the worker that the get
|
|
# has been fulfilled.
|
|
ray.worker.global_worker.plasma_client.create(obj_id, 100)
|
|
ray.worker.global_worker.plasma_client.seal(obj_id)
|
|
time.sleep(0.1)
|
|
|
|
# Make sure that nothing has died.
|
|
self.assertTrue(ray.services.all_processes_alive(exclude=[ray.services.PROCESS_TYPE_WORKER]))
|
|
ray.worker.cleanup()
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main(verbosity=2)
|