From 241c955707b27fa36239c4c2189066f3195e0124 Mon Sep 17 00:00:00 2001 From: Robert Nishihara Date: Fri, 23 Dec 2016 15:31:40 -0800 Subject: [PATCH] Determine node IP address programatically. (#151) * Determine node ip address programatically. * Factor out methods for getting node IP addresses. * Address comments. --- lib/python/ray/services.py | 16 ++++++++++++++++ lib/python/ray/worker.py | 7 ++++--- scripts/start_ray.py | 20 ++++++++++++++++---- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/lib/python/ray/services.py b/lib/python/ray/services.py index 9fd439c15..be13720a3 100644 --- a/lib/python/ray/services.py +++ b/lib/python/ray/services.py @@ -7,6 +7,7 @@ import os import random import redis import signal +import socket import string import subprocess import sys @@ -72,6 +73,21 @@ def cleanup(): def all_processes_alive(): return all([p.poll() is None for p in all_processes]) +def get_node_ip_address(address="8.8.8.8:53"): + """Determine the IP address of the local node. + + Args: + address (str): The IP address and port of any known live service on the + network you care about. + + Returns: + The IP address of the current node. + """ + host, port = address.split(":") + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect((host, int(port))) + return s.getsockname()[0] + def wait_for_redis_to_start(redis_host, redis_port, num_retries=5): """Wait for a Redis server to be available. diff --git a/lib/python/ray/worker.py b/lib/python/ray/worker.py index 880b2ce98..dc582a0c4 100644 --- a/lib/python/ray/worker.py +++ b/lib/python/ray/worker.py @@ -674,7 +674,7 @@ def get_address_info_from_redis(redis_address, node_ip_address, num_retries=5): time.sleep(1) counter += 1 -def init(node_ip_address="127.0.0.1", redis_address=None, start_ray_local=False, object_id_seed=None, num_workers=None, num_local_schedulers=None, driver_mode=SCRIPT_MODE): +def init(node_ip_address=None, redis_address=None, start_ray_local=False, object_id_seed=None, num_workers=None, num_local_schedulers=None, driver_mode=SCRIPT_MODE): """Either connect to an existing Ray cluster or start one and connect to it. This method handles two cases. Either a Ray cluster already exists and we @@ -735,12 +735,13 @@ def init(node_ip_address="127.0.0.1", redis_address=None, start_ray_local=False, else: if redis_address is None: raise Exception("If start_ray_local=False, then redis_address must be provided.") - if node_ip_address is None: - raise Exception("If start_ray_local=False, then node_ip_address must be provided.") if num_workers is not None: raise Exception("If start_ray_local=False, then num_workers must not be provided.") if num_local_schedulers is not None: raise Exception("If start_ray_local=False, then num_local_schedulers must not be provided.") + # Get the node IP address if one is not provided. + if node_ip_address is None: + node_ip_address = services.get_node_ip_address(redis_address) # Get the address info of the processes to connect to from Redis. info = get_address_info_from_redis(redis_address, node_ip_address) # Connect this driver to Redis, the object store, and the local scheduler. The diff --git a/scripts/start_ray.py b/scripts/start_ray.py index b2f119b19..1f6383fe2 100644 --- a/scripts/start_ray.py +++ b/scripts/start_ray.py @@ -8,7 +8,7 @@ import redis import ray.services as services parser = argparse.ArgumentParser(description="Parse addresses for the worker to connect to.") -parser.add_argument("--node-ip-address", required=True, type=str, help="the ip address of the worker's node") +parser.add_argument("--node-ip-address", required=False, type=str, help="the IP address of the worker's node") parser.add_argument("--redis-address", required=False, type=str, help="the address to use for Redis") parser.add_argument("--num-workers", default=10, required=False, type=int, help="the number of workers to start on this node") parser.add_argument("--head", action="store_true", help="provide this argument for the head node") @@ -41,7 +41,13 @@ if __name__ == "__main__": # Start Ray on the head node. if args.redis_address is not None: raise Exception("If --head is passed in, a Redis server will be started, so a Redis address should not be provided.") - address_info = services.start_ray_local(node_ip_address=args.node_ip_address, + # Get the node IP address if one is not provided. + if args.node_ip_address is None: + node_ip_address = services.get_node_ip_address() + else: + node_ip_address = args.node_ip_address + print("Using IP address {} for this node.".format(node_ip_address)) + address_info = services.start_ray_local(node_ip_address=node_ip_address, num_workers=args.num_workers, cleanup=False, redirect_output=True) @@ -53,11 +59,17 @@ if __name__ == "__main__": # Wait for the Redis server to be started. And throw an exception if we # can't connect to it. services.wait_for_redis_to_start(redis_host, int(redis_port)) + # Get the node IP address if one is not provided. + if args.node_ip_address is None: + node_ip_address = services.get_node_ip_address(args.redis_address) + else: + node_ip_addess = args.node_ip_address + print("Using IP address {} for this node.".format(node_ip_address)) # Check that there aren't already Redis clients with the same IP address # connected with this Redis instance. This raises an exception if the Redis # server already has clients on this node. - check_no_existing_redis_clients(args.node_ip_address, args.redis_address) - address_info = services.start_ray_node(node_ip_address=args.node_ip_address, + check_no_existing_redis_clients(node_ip_address, args.redis_address) + address_info = services.start_ray_node(node_ip_address=node_ip_address, redis_address=args.redis_address, num_workers=args.num_workers, cleanup=False,