mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
Determine node IP address programatically. (#151)
* Determine node ip address programatically. * Factor out methods for getting node IP addresses. * Address comments.
This commit is contained in:
parent
8d90c9f432
commit
241c955707
3 changed files with 36 additions and 7 deletions
|
@ -7,6 +7,7 @@ import os
|
|||
import random
|
||||
import redis
|
||||
import signal
|
||||
import socket
|
||||
import string
|
||||
import subprocess
|
||||
import sys
|
||||
|
@ -72,6 +73,21 @@ def cleanup():
|
|||
def all_processes_alive():
|
||||
return all([p.poll() is None for p in all_processes])
|
||||
|
||||
def get_node_ip_address(address="8.8.8.8:53"):
|
||||
"""Determine the IP address of the local node.
|
||||
|
||||
Args:
|
||||
address (str): The IP address and port of any known live service on the
|
||||
network you care about.
|
||||
|
||||
Returns:
|
||||
The IP address of the current node.
|
||||
"""
|
||||
host, port = address.split(":")
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect((host, int(port)))
|
||||
return s.getsockname()[0]
|
||||
|
||||
def wait_for_redis_to_start(redis_host, redis_port, num_retries=5):
|
||||
"""Wait for a Redis server to be available.
|
||||
|
||||
|
|
|
@ -674,7 +674,7 @@ def get_address_info_from_redis(redis_address, node_ip_address, num_retries=5):
|
|||
time.sleep(1)
|
||||
counter += 1
|
||||
|
||||
def init(node_ip_address="127.0.0.1", redis_address=None, start_ray_local=False, object_id_seed=None, num_workers=None, num_local_schedulers=None, driver_mode=SCRIPT_MODE):
|
||||
def init(node_ip_address=None, redis_address=None, start_ray_local=False, object_id_seed=None, num_workers=None, num_local_schedulers=None, driver_mode=SCRIPT_MODE):
|
||||
"""Either connect to an existing Ray cluster or start one and connect to it.
|
||||
|
||||
This method handles two cases. Either a Ray cluster already exists and we
|
||||
|
@ -735,12 +735,13 @@ def init(node_ip_address="127.0.0.1", redis_address=None, start_ray_local=False,
|
|||
else:
|
||||
if redis_address is None:
|
||||
raise Exception("If start_ray_local=False, then redis_address must be provided.")
|
||||
if node_ip_address is None:
|
||||
raise Exception("If start_ray_local=False, then node_ip_address must be provided.")
|
||||
if num_workers is not None:
|
||||
raise Exception("If start_ray_local=False, then num_workers must not be provided.")
|
||||
if num_local_schedulers is not None:
|
||||
raise Exception("If start_ray_local=False, then num_local_schedulers must not be provided.")
|
||||
# Get the node IP address if one is not provided.
|
||||
if node_ip_address is None:
|
||||
node_ip_address = services.get_node_ip_address(redis_address)
|
||||
# Get the address info of the processes to connect to from Redis.
|
||||
info = get_address_info_from_redis(redis_address, node_ip_address)
|
||||
# Connect this driver to Redis, the object store, and the local scheduler. The
|
||||
|
|
|
@ -8,7 +8,7 @@ import redis
|
|||
import ray.services as services
|
||||
|
||||
parser = argparse.ArgumentParser(description="Parse addresses for the worker to connect to.")
|
||||
parser.add_argument("--node-ip-address", required=True, type=str, help="the ip address of the worker's node")
|
||||
parser.add_argument("--node-ip-address", required=False, type=str, help="the IP address of the worker's node")
|
||||
parser.add_argument("--redis-address", required=False, type=str, help="the address to use for Redis")
|
||||
parser.add_argument("--num-workers", default=10, required=False, type=int, help="the number of workers to start on this node")
|
||||
parser.add_argument("--head", action="store_true", help="provide this argument for the head node")
|
||||
|
@ -41,7 +41,13 @@ if __name__ == "__main__":
|
|||
# Start Ray on the head node.
|
||||
if args.redis_address is not None:
|
||||
raise Exception("If --head is passed in, a Redis server will be started, so a Redis address should not be provided.")
|
||||
address_info = services.start_ray_local(node_ip_address=args.node_ip_address,
|
||||
# Get the node IP address if one is not provided.
|
||||
if args.node_ip_address is None:
|
||||
node_ip_address = services.get_node_ip_address()
|
||||
else:
|
||||
node_ip_address = args.node_ip_address
|
||||
print("Using IP address {} for this node.".format(node_ip_address))
|
||||
address_info = services.start_ray_local(node_ip_address=node_ip_address,
|
||||
num_workers=args.num_workers,
|
||||
cleanup=False,
|
||||
redirect_output=True)
|
||||
|
@ -53,11 +59,17 @@ if __name__ == "__main__":
|
|||
# Wait for the Redis server to be started. And throw an exception if we
|
||||
# can't connect to it.
|
||||
services.wait_for_redis_to_start(redis_host, int(redis_port))
|
||||
# Get the node IP address if one is not provided.
|
||||
if args.node_ip_address is None:
|
||||
node_ip_address = services.get_node_ip_address(args.redis_address)
|
||||
else:
|
||||
node_ip_addess = args.node_ip_address
|
||||
print("Using IP address {} for this node.".format(node_ip_address))
|
||||
# Check that there aren't already Redis clients with the same IP address
|
||||
# connected with this Redis instance. This raises an exception if the Redis
|
||||
# server already has clients on this node.
|
||||
check_no_existing_redis_clients(args.node_ip_address, args.redis_address)
|
||||
address_info = services.start_ray_node(node_ip_address=args.node_ip_address,
|
||||
check_no_existing_redis_clients(node_ip_address, args.redis_address)
|
||||
address_info = services.start_ray_node(node_ip_address=node_ip_address,
|
||||
redis_address=args.redis_address,
|
||||
num_workers=args.num_workers,
|
||||
cleanup=False,
|
||||
|
|
Loading…
Add table
Reference in a new issue