mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
commit
b5a210b715
5 changed files with 121 additions and 128 deletions
|
@ -9,24 +9,37 @@ all_processes = []
|
||||||
|
|
||||||
def cleanup():
|
def cleanup():
|
||||||
global all_processes
|
global all_processes
|
||||||
timeout_sec = 5
|
for p, port in all_processes:
|
||||||
for p in all_processes:
|
if p.poll() is not None: # process has already terminated
|
||||||
p_sec = 0
|
print "Process at port " + str(port) + " has already terminated."
|
||||||
for second in range(timeout_sec):
|
continue
|
||||||
if p.poll() == None:
|
print "Attempting to kill process at port " + str(port) + "."
|
||||||
time.sleep(0.1)
|
p.kill()
|
||||||
p_sec += 1
|
time.sleep(0.05) # is this necessary?
|
||||||
if p_sec >= timeout_sec:
|
if p.poll() is not None:
|
||||||
p.kill() # supported from python 2.6
|
print "Successfully killed process at port " + str(port) + "."
|
||||||
print 'helper processes shut down!'
|
continue
|
||||||
|
print "Kill attempt failed, attempting to terminate process at port " + str(port) + "."
|
||||||
|
p.terminate()
|
||||||
|
time.sleep(0.05) # is this necessary?
|
||||||
|
if p.poll is not None:
|
||||||
|
print "Successfully terminated process at port " + str(port) + "."
|
||||||
|
continue
|
||||||
|
print "Termination attempt failed, giving up."
|
||||||
all_processes = []
|
all_processes = []
|
||||||
|
|
||||||
atexit.register(cleanup)
|
atexit.register(cleanup)
|
||||||
|
|
||||||
def start_scheduler(scheduler_address):
|
def start_scheduler(host, port):
|
||||||
|
scheduler_address = host + ":" + str(port)
|
||||||
p = subprocess.Popen([os.path.join(_services_path, "scheduler"), str(scheduler_address)])
|
p = subprocess.Popen([os.path.join(_services_path, "scheduler"), str(scheduler_address)])
|
||||||
all_processes.append(p)
|
all_processes.append((p, port))
|
||||||
|
|
||||||
def start_objstore(objstore_address):
|
def start_objstore(host, port):
|
||||||
|
objstore_address = host + ":" + str(port)
|
||||||
p = subprocess.Popen([os.path.join(_services_path, "objstore"), str(objstore_address)])
|
p = subprocess.Popen([os.path.join(_services_path, "objstore"), str(objstore_address)])
|
||||||
all_processes.append(p)
|
all_processes.append((p, port))
|
||||||
|
|
||||||
|
def start_worker(test_path, host, scheduler_port, worker_port, objstore_port):
|
||||||
|
p = subprocess.Popen(["python", test_path, host, str(scheduler_port), str(worker_port), str(objstore_port)])
|
||||||
|
all_processes.append((p, worker_port))
|
||||||
|
|
|
@ -269,8 +269,8 @@ cdef class Worker:
|
||||||
print "after get data"
|
print "after get data"
|
||||||
return unison.deserialize_from_string(data)
|
return unison.deserialize_from_string(data)
|
||||||
|
|
||||||
cpdef register_function(self, func_name, function, num_args):
|
cpdef register_function(self, func_name, function, num_return_vals):
|
||||||
orch_register_function(self.context, func_name, num_args)
|
orch_register_function(self.context, func_name, num_return_vals)
|
||||||
self.functions[func_name] = function
|
self.functions[func_name] = function
|
||||||
|
|
||||||
cpdef wait_for_next_task(self):
|
cpdef wait_for_next_task(self):
|
||||||
|
@ -314,7 +314,7 @@ def distributed(types, return_types, worker=global_worker):
|
||||||
if i < len(types) - 1:
|
if i < len(types) - 1:
|
||||||
arguments.append(worker.pull(arg))
|
arguments.append(worker.pull(arg))
|
||||||
elif i == len(types) - 1 and types[-1] is not None:
|
elif i == len(types) - 1 and types[-1] is not None:
|
||||||
arguments.append(global_worker.pull(arg))
|
arguments.append(worker.pull(arg))
|
||||||
elif types[-1] is None:
|
elif types[-1] is None:
|
||||||
arguments.append(worker.pull(arg))
|
arguments.append(worker.pull(arg))
|
||||||
else:
|
else:
|
||||||
|
|
154
test/runtest.py
154
test/runtest.py
|
@ -13,37 +13,9 @@ from grpc.beta import implementations
|
||||||
import orchestra_pb2
|
import orchestra_pb2
|
||||||
import types_pb2
|
import types_pb2
|
||||||
|
|
||||||
"""
|
IP_ADDRESS = "127.0.0.1"
|
||||||
class UnisonTest(unittest.TestCase):
|
|
||||||
|
|
||||||
def testSerialize(self):
|
|
||||||
d = [1, 2L, "hello", 3.0]
|
|
||||||
res = unison.serialize_args(d)
|
|
||||||
c = unison.deserialize_args(res)
|
|
||||||
self.assertEqual(c, d)
|
|
||||||
|
|
||||||
d = [{'hello': 'world'}]
|
|
||||||
res = unison.serialize_args(d)
|
|
||||||
c = unison.deserialize_args(res)
|
|
||||||
self.assertEqual(c, d)
|
|
||||||
|
|
||||||
a = np.zeros((100, 100))
|
|
||||||
res = unison.serialize_args(a)
|
|
||||||
b = unison.deserialize_args(res)
|
|
||||||
self.assertTrue((a == b).all())
|
|
||||||
|
|
||||||
a = [unison.ObjRef(42, int)]
|
|
||||||
res = unison.serialize_args(a)
|
|
||||||
b = unison.deserialize_args(res)
|
|
||||||
self.assertEqual(a, b)
|
|
||||||
"""
|
|
||||||
|
|
||||||
TIMEOUT_SECONDS = 5
|
TIMEOUT_SECONDS = 5
|
||||||
|
|
||||||
def produce_data(num_chunks):
|
|
||||||
for _ in range(num_chunks):
|
|
||||||
yield orchestra_pb2.ObjChunk(objref=1, totalsize=1000, data=b"hello world")
|
|
||||||
|
|
||||||
def connect_to_scheduler(host, port):
|
def connect_to_scheduler(host, port):
|
||||||
channel = implementations.insecure_channel(host, port)
|
channel = implementations.insecure_channel(host, port)
|
||||||
return orchestra_pb2.beta_create_Scheduler_stub(channel)
|
return orchestra_pb2.beta_create_Scheduler_stub(channel)
|
||||||
|
@ -52,96 +24,104 @@ def connect_to_objstore(host, port):
|
||||||
channel = implementations.insecure_channel(host, port)
|
channel = implementations.insecure_channel(host, port)
|
||||||
return orchestra_pb2.beta_create_ObjStore_stub(channel)
|
return orchestra_pb2.beta_create_ObjStore_stub(channel)
|
||||||
|
|
||||||
|
def address(host, port):
|
||||||
|
return host + ":" + str(port)
|
||||||
|
|
||||||
|
scheduler_port_counter = 0
|
||||||
|
def new_scheduler_port():
|
||||||
|
global scheduler_port_counter
|
||||||
|
scheduler_port_counter += 1
|
||||||
|
return 10000 + scheduler_port_counter
|
||||||
|
|
||||||
|
worker_port_counter = 0
|
||||||
|
def new_worker_port():
|
||||||
|
global worker_port_counter
|
||||||
|
worker_port_counter += 1
|
||||||
|
return 40000 + worker_port_counter
|
||||||
|
|
||||||
|
objstore_port_counter = 0
|
||||||
|
def new_objstore_port():
|
||||||
|
global objstore_port_counter
|
||||||
|
objstore_port_counter += 1
|
||||||
|
return 20000 + objstore_port_counter
|
||||||
|
|
||||||
|
|
||||||
class ObjStoreTest(unittest.TestCase):
|
class ObjStoreTest(unittest.TestCase):
|
||||||
|
|
||||||
"""Test setting up object stores, transfering data between them and retrieving data to a client"""
|
"""Test setting up object stores, transfering data between them and retrieving data to a client"""
|
||||||
def testObjStore(self):
|
def testObjStore(self):
|
||||||
services.start_scheduler("0.0.0.0:22221")
|
scheduler_port = new_scheduler_port()
|
||||||
services.start_objstore("0.0.0.0:22222")
|
objstore1_port = new_objstore_port()
|
||||||
services.start_objstore("0.0.0.0:22223")
|
objstore2_port = new_objstore_port()
|
||||||
|
worker1_port = new_worker_port()
|
||||||
|
worker2_port = new_worker_port()
|
||||||
|
|
||||||
|
services.start_scheduler(IP_ADDRESS, scheduler_port)
|
||||||
|
services.start_objstore(IP_ADDRESS, objstore1_port)
|
||||||
|
services.start_objstore(IP_ADDRESS, objstore2_port)
|
||||||
|
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
scheduler_stub = connect_to_scheduler('localhost', 22221)
|
scheduler_stub = connect_to_scheduler(IP_ADDRESS, scheduler_port)
|
||||||
objstore1_stub = connect_to_objstore('localhost', 22222)
|
objstore1_stub = connect_to_objstore(IP_ADDRESS, objstore1_port)
|
||||||
objstore2_stub = connect_to_objstore('localhost', 22223)
|
objstore2_stub = connect_to_objstore(IP_ADDRESS, objstore2_port)
|
||||||
|
|
||||||
worker1 = worker.Worker()
|
worker1 = worker.Worker()
|
||||||
worker1.connect("127.0.0.1:22221", "127.0.0.1:40000", "127.0.0.1:22222")
|
worker1.connect(address(IP_ADDRESS, scheduler_port), address(IP_ADDRESS, worker1_port), address(IP_ADDRESS, objstore1_port))
|
||||||
|
|
||||||
worker2 = worker.Worker()
|
worker2 = worker.Worker()
|
||||||
worker2.connect("127.0.0.1:22221", "127.0.0.1:40001", "127.0.0.1:22223")
|
worker2.connect(address(IP_ADDRESS, scheduler_port), address(IP_ADDRESS, worker2_port), address(IP_ADDRESS, objstore2_port))
|
||||||
|
|
||||||
for i in range(1, 100):
|
# pushing and pulling an object shouldn't change it
|
||||||
l = i * 100 * "h"
|
for data in ["h", "h" * 10000, 0, 0.0]:
|
||||||
objref = worker1.push(l)
|
objref = worker1.push(data)
|
||||||
response = objstore1_stub.DeliverObj(orchestra_pb2.DeliverObjRequest(objref=objref.get_id(), objstore_address="0.0.0.0:22223"), TIMEOUT_SECONDS)
|
result = worker1.pull(objref)
|
||||||
s = worker2.get_serialized(objref.get_id())
|
self.assertEqual(result, data)
|
||||||
result = worker.unison.deserialize_from_string(s)
|
|
||||||
# result = worker1.pull(objref)
|
# pushing an object, shipping it to another worker, and pulling it shouldn't change it
|
||||||
self.assertEqual(len(result), 100 * i)
|
for data in ["h", "h" * 10000, 0, 0.0]:
|
||||||
|
objref = worker1.push(data)
|
||||||
|
response = objstore1_stub.DeliverObj(orchestra_pb2.DeliverObjRequest(objref=objref.get_id(), objstore_address=address(IP_ADDRESS, objstore2_port)), TIMEOUT_SECONDS)
|
||||||
|
result = worker2.pull(objref)
|
||||||
|
self.assertEqual(result, data)
|
||||||
|
|
||||||
services.cleanup()
|
services.cleanup()
|
||||||
|
|
||||||
class SchedulerTest(unittest.TestCase):
|
class SchedulerTest(unittest.TestCase):
|
||||||
|
|
||||||
def testCall(self):
|
def testCall(self):
|
||||||
services.start_scheduler("0.0.0.0:22221")
|
scheduler_port = new_scheduler_port()
|
||||||
services.start_objstore("0.0.0.0:22222")
|
objstore_port = new_objstore_port()
|
||||||
|
worker1_port = new_worker_port()
|
||||||
|
worker2_port = new_worker_port()
|
||||||
|
|
||||||
|
services.start_scheduler(IP_ADDRESS, scheduler_port)
|
||||||
|
services.start_objstore(IP_ADDRESS, objstore_port)
|
||||||
|
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
scheduler_stub = connect_to_scheduler('localhost', 22221)
|
scheduler_stub = connect_to_scheduler(IP_ADDRESS, scheduler_port)
|
||||||
objstore_stub = connect_to_objstore('localhost', 22222)
|
objstore_stub = connect_to_objstore(IP_ADDRESS, objstore_port)
|
||||||
|
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
w = worker.Worker()
|
worker1 = worker.Worker()
|
||||||
w.connect("127.0.0.1:22221", "127.0.0.1:40003", "127.0.0.1:22222")
|
worker1.connect(address(IP_ADDRESS, scheduler_port), address(IP_ADDRESS, worker1_port), address(IP_ADDRESS, objstore_port))
|
||||||
w.start_worker_service()
|
worker1.start_worker_service()
|
||||||
w2 = worker.Worker()
|
|
||||||
w2.connect("127.0.0.1:22221", "127.0.0.1:40004", "127.0.0.1:22222")
|
test_dir = os.path.dirname(os.path.abspath(__file__))
|
||||||
w2.start_worker_service()
|
test_path = os.path.join(test_dir, "testrecv.py")
|
||||||
|
services.start_worker(test_path, IP_ADDRESS, scheduler_port, worker2_port, objstore_port)
|
||||||
|
|
||||||
time.sleep(0.2)
|
time.sleep(0.2)
|
||||||
|
|
||||||
w.register_function("hello_world", None, 2)
|
worker1.call("hello_world", ["hi"])
|
||||||
w2.register_function("hello_world", None, 2)
|
|
||||||
|
|
||||||
time.sleep(0.1)
|
|
||||||
|
|
||||||
w.call("hello_world", ["hi"])
|
|
||||||
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
|
|
||||||
reply = scheduler_stub.GetDebugInfo(orchestra_pb2.GetDebugInfoRequest(), TIMEOUT_SECONDS)
|
reply = scheduler_stub.GetDebugInfo(orchestra_pb2.GetDebugInfoRequest(), TIMEOUT_SECONDS)
|
||||||
|
|
||||||
# self.assertEqual(reply.task[0].name, u'hello_world') # doesn't currently work, because scheduler is now invoked on every interaction
|
services.cleanup()
|
||||||
|
|
||||||
test_path = os.path.dirname(os.path.abspath(__file__))
|
|
||||||
|
|
||||||
p = subprocess.Popen(["python", os.path.join(test_path, "testrecv.py")])
|
|
||||||
|
|
||||||
time.sleep(0.2)
|
|
||||||
|
|
||||||
w.call("hello_world", ["hi"])
|
|
||||||
w.call("hello_world", ["hi"])
|
|
||||||
w.call("hello_world", ["hi"])
|
|
||||||
|
|
||||||
scheduler_stub.PushObj(orchestra_pb2.PushObjRequest(workerid=0), TIMEOUT_SECONDS)
|
|
||||||
|
|
||||||
# self.assertEqual(p.wait(), 0, "argument was not received by the test program") # todo: reactivate
|
|
||||||
|
|
||||||
# w.main_loop()
|
|
||||||
# w2.main_loop()
|
|
||||||
#
|
|
||||||
# reply = scheduler_stub.GetDebugInfo(orchestra_pb2.GetDebugInfoRequest(do_scheduling=True), TIMEOUT_SECONDS)
|
|
||||||
# time.sleep(0.1)
|
|
||||||
# reply = scheduler_stub.GetDebugInfo(orchestra_pb2.GetDebugInfoRequest(), TIMEOUT_SECONDS)
|
|
||||||
#
|
|
||||||
# self.assertEqual(list(reply.task), [])
|
|
||||||
#
|
|
||||||
# services.cleanup()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -1,21 +0,0 @@
|
||||||
import orchpy.unison as unison
|
|
||||||
import orchpy.services as services
|
|
||||||
import orchpy.worker as worker
|
|
||||||
|
|
||||||
@worker.distributed([str], [str])
|
|
||||||
def print_string(string):
|
|
||||||
print "called print_string with", string
|
|
||||||
return string
|
|
||||||
|
|
||||||
@worker.distributed([int, int], [int, int])
|
|
||||||
def handle_int(a, b):
|
|
||||||
return a+1, b+1
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
worker.global_worker.connect("127.0.0.1:22221", "127.0.0.1:40000", "127.0.0.1:22222")
|
|
||||||
|
|
||||||
worker.global_worker.register_function("hello_world", print_string, 1)
|
|
||||||
worker.global_worker.register_function("handle_int", handle_int, 2)
|
|
||||||
worker.global_worker.start_worker_service()
|
|
||||||
|
|
||||||
worker.global_worker.main_loop()
|
|
|
@ -1,13 +1,34 @@
|
||||||
|
import sys
|
||||||
|
|
||||||
import orchpy.unison as unison
|
import orchpy.unison as unison
|
||||||
import orchpy.services as services
|
import orchpy.services as services
|
||||||
import orchpy.worker as worker
|
import orchpy.worker as worker
|
||||||
import sys
|
|
||||||
|
@worker.distributed([str], [str])
|
||||||
|
def print_string(string):
|
||||||
|
print "called print_string with", string
|
||||||
|
return string
|
||||||
|
|
||||||
|
@worker.distributed([int, int], [int, int])
|
||||||
|
def handle_int(a, b):
|
||||||
|
return a + 1, b + 1
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
ip_address = sys.argv[1]
|
||||||
|
scheduler_port = sys.argv[2]
|
||||||
|
worker_port = sys.argv[3]
|
||||||
|
objstore_port = sys.argv[4]
|
||||||
|
|
||||||
|
def address(host, port):
|
||||||
|
return host + ":" + str(port)
|
||||||
|
|
||||||
worker = worker.Worker()
|
worker = worker.Worker()
|
||||||
worker.connect("127.0.0.1:22221", "127.0.0.1:50000", "127.0.0.1:22222")
|
worker.connect(address(ip_address, scheduler_port), address(ip_address, worker_port), address(ip_address, objstore_port))
|
||||||
worker.start_worker_service()
|
worker.start_worker_service()
|
||||||
worker.register_function("hello_world", None, 0)
|
|
||||||
|
worker.register_function("print_string", print_string, 0)
|
||||||
|
worker.register_function("handle_int", handle_int, 0)
|
||||||
|
|
||||||
name, args, returnref = worker.wait_for_next_task()
|
name, args, returnref = worker.wait_for_next_task()
|
||||||
print "received args ", args
|
print "received args ", args
|
||||||
if args == ["hi"]:
|
if args == ["hi"]:
|
||||||
|
|
Loading…
Add table
Reference in a new issue