2016-11-02 00:39:35 -07:00
|
|
|
from __future__ import print_function
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
import os
|
2016-07-26 16:18:39 -07:00
|
|
|
import sys
|
2016-02-22 13:55:06 -08:00
|
|
|
import time
|
2016-11-04 00:41:20 -07:00
|
|
|
import signal
|
2016-11-02 00:39:35 -07:00
|
|
|
import subprocess
|
2016-08-15 11:02:54 -07:00
|
|
|
import string
|
|
|
|
import random
|
2016-02-22 13:55:06 -08:00
|
|
|
|
2016-08-01 17:55:38 -07:00
|
|
|
# Ray modules
|
|
|
|
import config
|
2016-11-02 16:40:37 -07:00
|
|
|
import plasma
|
2016-04-05 00:34:23 -07:00
|
|
|
|
2016-07-08 00:14:26 -07:00
|
|
|
# all_processes is a list of the scheduler, object store, and worker processes
|
|
|
|
# that have been started by this services module if Ray is being used in local
|
|
|
|
# mode.
|
2016-02-22 13:55:06 -08:00
|
|
|
all_processes = []
|
|
|
|
|
2016-11-04 00:41:20 -07:00
|
|
|
# True if processes are run in the valgrind profiler.
|
|
|
|
RUN_PHOTON_PROFILER = False
|
|
|
|
RUN_PLASMA_MANAGER_PROFILER = False
|
|
|
|
RUN_PLASMA_STORE_PROFILER = False
|
|
|
|
|
2016-04-05 00:34:23 -07:00
|
|
|
def address(host, port):
|
|
|
|
return host + ":" + str(port)
|
|
|
|
|
2016-11-02 00:39:35 -07:00
|
|
|
def new_port():
|
2016-08-15 11:02:54 -07:00
|
|
|
return random.randint(10000, 65535)
|
2016-04-05 00:34:23 -07:00
|
|
|
|
2016-11-02 00:39:35 -07:00
|
|
|
def random_name():
|
|
|
|
return str(random.randint(0, 99999999))
|
|
|
|
|
2016-02-22 13:55:06 -08:00
|
|
|
def cleanup():
|
2016-07-12 23:54:18 -07:00
|
|
|
"""When running in local mode, shutdown the Ray processes.
|
|
|
|
|
2016-07-07 14:05:25 -07:00
|
|
|
This method is used to shutdown processes that were started with
|
2016-07-12 23:54:18 -07:00
|
|
|
services.start_ray_local(). It kills all scheduler, object store, and worker
|
2016-08-01 17:55:38 -07:00
|
|
|
processes that were started by this services module. Driver processes are
|
|
|
|
started and disconnected by worker.py.
|
2016-07-07 14:05:25 -07:00
|
|
|
"""
|
2016-03-01 01:02:08 -08:00
|
|
|
global all_processes
|
2016-08-04 17:47:08 -07:00
|
|
|
successfully_shut_down = True
|
2016-11-02 00:39:35 -07:00
|
|
|
# Terminate the processes in reverse order.
|
|
|
|
for p in all_processes[::-1]:
|
2016-03-09 11:40:36 -08:00
|
|
|
if p.poll() is not None: # process has already terminated
|
|
|
|
continue
|
2016-11-04 00:41:20 -07:00
|
|
|
if RUN_PHOTON_PROFILER or RUN_PLASMA_MANAGER_PROFILER or RUN_PLASMA_STORE_PROFILER:
|
|
|
|
os.kill(p.pid, signal.SIGINT) # Give process signal to write profiler data.
|
|
|
|
time.sleep(0.1) # Wait for profiling data to be written.
|
2016-03-09 11:40:36 -08:00
|
|
|
p.kill()
|
|
|
|
time.sleep(0.05) # is this necessary?
|
|
|
|
if p.poll() is not None:
|
|
|
|
continue
|
|
|
|
p.terminate()
|
|
|
|
time.sleep(0.05) # is this necessary?
|
|
|
|
if p.poll is not None:
|
|
|
|
continue
|
2016-08-04 17:47:08 -07:00
|
|
|
successfully_shut_down = False
|
|
|
|
if successfully_shut_down:
|
2016-11-02 00:39:35 -07:00
|
|
|
print("Successfully shut down Ray.")
|
2016-08-04 17:47:08 -07:00
|
|
|
else:
|
2016-11-02 00:39:35 -07:00
|
|
|
print("Ray did not shut down properly.")
|
2016-03-01 01:02:08 -08:00
|
|
|
all_processes = []
|
2016-02-22 13:55:06 -08:00
|
|
|
|
2016-11-04 00:41:20 -07:00
|
|
|
def start_redis(port, cleanup=True):
|
2016-11-02 00:39:35 -07:00
|
|
|
redis_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../common/thirdparty/redis-3.2.3/src/redis-server")
|
|
|
|
p = subprocess.Popen([redis_filepath, "--port", str(port), "--loglevel", "warning"])
|
|
|
|
if cleanup:
|
|
|
|
all_processes.append(p)
|
2016-02-22 13:55:06 -08:00
|
|
|
|
2016-11-04 00:41:20 -07:00
|
|
|
def start_local_scheduler(redis_address, plasma_store_name, cleanup=True):
|
2016-11-02 00:39:35 -07:00
|
|
|
local_scheduler_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../photon/build/photon_scheduler")
|
2016-11-04 00:41:20 -07:00
|
|
|
if RUN_PHOTON_PROFILER:
|
|
|
|
local_scheduler_prefix = ["valgrind", "--tool=callgrind", local_scheduler_filepath]
|
|
|
|
else:
|
|
|
|
local_scheduler_prefix = [local_scheduler_filepath]
|
2016-11-02 00:39:35 -07:00
|
|
|
local_scheduler_name = "/tmp/scheduler{}".format(random_name())
|
2016-11-04 00:41:20 -07:00
|
|
|
p = subprocess.Popen(local_scheduler_prefix + ["-s", local_scheduler_name, "-r", redis_address, "-p", plasma_store_name])
|
2016-08-08 16:01:13 -07:00
|
|
|
if cleanup:
|
2016-08-04 17:47:08 -07:00
|
|
|
all_processes.append(p)
|
2016-11-02 00:39:35 -07:00
|
|
|
return local_scheduler_name
|
2016-07-07 14:05:25 -07:00
|
|
|
|
2016-11-04 00:41:20 -07:00
|
|
|
def start_objstore(node_ip_address, redis_address, cleanup=True):
|
2016-07-12 23:54:18 -07:00
|
|
|
"""This method starts an object store process.
|
|
|
|
|
|
|
|
Args:
|
2016-08-10 19:08:38 -07:00
|
|
|
node_ip_address (str): The ip address of the node running the object store.
|
2016-08-08 16:01:13 -07:00
|
|
|
cleanup (bool): True if using Ray in local mode. If cleanup is true, then
|
|
|
|
this process will be killed by serices.cleanup() when the Python process
|
|
|
|
that imported services exits.
|
2016-07-07 14:05:25 -07:00
|
|
|
"""
|
2016-11-04 00:41:20 -07:00
|
|
|
plasma_store_filepath = os.path.join(os.path.abspath(os.path.dirname(__file__)), "../plasma/build/plasma_store")
|
|
|
|
if RUN_PLASMA_STORE_PROFILER:
|
|
|
|
plasma_store_prefix = ["valgrind", "--tool=callgrind", plasma_store_filepath]
|
|
|
|
else:
|
|
|
|
plasma_store_prefix = [plasma_store_filepath]
|
2016-11-02 00:39:35 -07:00
|
|
|
store_name = "/tmp/ray_plasma_store{}".format(random_name())
|
2016-11-04 00:41:20 -07:00
|
|
|
p1 = subprocess.Popen(plasma_store_prefix + ["-s", store_name])
|
2016-11-02 00:39:35 -07:00
|
|
|
|
|
|
|
manager_name = "/tmp/ray_plasma_manager{}".format(random_name())
|
2016-11-04 00:41:20 -07:00
|
|
|
p2, manager_port = plasma.start_plasma_manager(store_name, manager_name, redis_address, run_profiler=RUN_PLASMA_MANAGER_PROFILER)
|
2016-11-02 00:39:35 -07:00
|
|
|
|
2016-08-08 16:01:13 -07:00
|
|
|
if cleanup:
|
2016-11-02 00:39:35 -07:00
|
|
|
all_processes.append(p1)
|
|
|
|
all_processes.append(p2)
|
2016-03-08 16:14:02 -08:00
|
|
|
|
2016-11-02 00:39:35 -07:00
|
|
|
return store_name, manager_name, manager_port
|
|
|
|
|
|
|
|
def start_worker(address_info, worker_path, cleanup=True):
|
2016-07-12 23:54:18 -07:00
|
|
|
"""This method starts a worker process.
|
|
|
|
|
|
|
|
Args:
|
2016-11-02 00:39:35 -07:00
|
|
|
address_info (dict): This dictionary contains the node_ip_address,
|
|
|
|
redis_port, object_store_name, object_store_manager_name, and
|
|
|
|
local_scheduler_name.
|
2016-07-12 23:54:18 -07:00
|
|
|
worker_path (str): The path of the source code which the worker process will
|
|
|
|
run.
|
2016-11-02 00:39:35 -07:00
|
|
|
cleanup (bool): True if using Ray in local mode. If cleanup is true, then
|
|
|
|
this process will be killed by services.cleanup() when the Python process
|
|
|
|
that imported services exits. This is True by default.
|
2016-07-07 14:05:25 -07:00
|
|
|
"""
|
2016-08-04 17:47:08 -07:00
|
|
|
command = ["python",
|
|
|
|
worker_path,
|
2016-11-02 00:39:35 -07:00
|
|
|
"--node-ip-address=" + address_info["node_ip_address"],
|
|
|
|
"--object-store-name=" + address_info["object_store_name"],
|
|
|
|
"--object-store-manager-name=" + address_info["object_store_manager_name"],
|
|
|
|
"--local-scheduler-name=" + address_info["local_scheduler_name"],
|
|
|
|
"--redis-port=" + str(address_info["redis_port"])]
|
2016-08-04 17:47:08 -07:00
|
|
|
p = subprocess.Popen(command)
|
2016-08-08 16:01:13 -07:00
|
|
|
if cleanup:
|
2016-08-04 17:47:08 -07:00
|
|
|
all_processes.append(p)
|
2016-04-05 00:34:23 -07:00
|
|
|
|
2016-11-02 00:39:35 -07:00
|
|
|
def start_ray_local(node_ip_address="127.0.0.1", num_workers=0, worker_path=None):
|
2016-07-12 23:54:18 -07:00
|
|
|
"""Start Ray in local mode.
|
|
|
|
|
|
|
|
Args:
|
2016-08-01 17:55:38 -07:00
|
|
|
num_workers (int): The number of workers to start.
|
2016-07-12 23:54:18 -07:00
|
|
|
worker_path (str): The path of the source code that will be run by the
|
2016-08-01 17:55:38 -07:00
|
|
|
worker.
|
|
|
|
|
|
|
|
Returns:
|
2016-11-02 00:39:35 -07:00
|
|
|
This returns a tuple of three things. The first element is a tuple of the
|
|
|
|
Redis hostname and port. The second
|
2016-07-07 14:05:25 -07:00
|
|
|
"""
|
2016-07-26 11:40:09 -07:00
|
|
|
if worker_path is None:
|
2016-11-02 00:39:35 -07:00
|
|
|
worker_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "default_worker.py")
|
|
|
|
# Start Redis.
|
|
|
|
redis_port = new_port()
|
|
|
|
redis_address = address(node_ip_address, redis_port)
|
2016-11-04 00:41:20 -07:00
|
|
|
start_redis(redis_port, cleanup=True)
|
2016-11-02 00:39:35 -07:00
|
|
|
time.sleep(0.1)
|
|
|
|
# Start Plasma.
|
|
|
|
object_store_name, object_store_manager_name, object_store_manager_port = start_objstore(node_ip_address, redis_address, cleanup=True)
|
|
|
|
# Start the local scheduler.
|
2016-04-05 00:34:23 -07:00
|
|
|
time.sleep(0.1)
|
2016-11-04 00:41:20 -07:00
|
|
|
local_scheduler_name = start_local_scheduler(redis_address, object_store_name, cleanup=True)
|
2016-11-02 00:39:35 -07:00
|
|
|
time.sleep(0.2)
|
|
|
|
# Aggregate the address information together.
|
|
|
|
address_info = {"node_ip_address": node_ip_address,
|
|
|
|
"redis_port": redis_port,
|
|
|
|
"object_store_name": object_store_name,
|
|
|
|
"object_store_manager_name": object_store_manager_name,
|
|
|
|
"local_scheduler_name": local_scheduler_name}
|
|
|
|
# Start the workers.
|
|
|
|
for _ in range(num_workers):
|
|
|
|
start_worker(address_info, worker_path, cleanup=True)
|
|
|
|
time.sleep(0.3)
|
|
|
|
# Return the addresses of the relevant processes.
|
|
|
|
return address_info
|