From 5bc4976550087b7183ef3e390c8780c660907c33 Mon Sep 17 00:00:00 2001 From: dHannasch Date: Wed, 18 Nov 2020 20:28:10 -0700 Subject: [PATCH] More informative error message if ray start fails to connect to Redis (#11880) * Chain original redis.ConnectionError. More importantly, print out the address so people don't have to dig out --logging-level debug to get the number wait_for_redis_to_start() already knows. Check the Redis password. * f --- python/ray/_private/services.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python/ray/_private/services.py b/python/ray/_private/services.py index c2bcc9aaa..7a76e4e0e 100644 --- a/python/ray/_private/services.py +++ b/python/ray/_private/services.py @@ -605,7 +605,7 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None): # Wait for the Redis server to start. num_retries = ray_constants.START_REDIS_WAIT_RETRIES delay = 0.001 - for _ in range(num_retries): + for i in range(num_retries): try: # Run some random command and see if it worked. logger.debug( @@ -623,7 +623,17 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None): except redis.AuthenticationError as authEx: raise RuntimeError("Unable to connect to Redis at {}:{}.".format( redis_ip_address, redis_port)) from authEx - except redis.ConnectionError: + except redis.ConnectionError as connEx: + if i >= num_retries - 1: + raise RuntimeError( + f"Unable to connect to Redis at {redis_ip_address}:" + f"{redis_port} after {num_retries} retries. Check that " + f"{redis_ip_address}:{redis_port} is reachable from this " + "machine. If it is not, your firewall may be blocking " + "this port. If the problem is a flaky connection, try " + "setting the environment variable " + "`RAY_START_REDIS_WAIT_RETRIES` to increase the number of" + " attempts to ping the Redis server.") from connEx # Wait a little bit. time.sleep(delay) delay *= 2