From 50dbf1a3078b953ebaaad4ed9432a9302df2757b Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Tue, 10 Nov 2020 14:57:57 -0800 Subject: [PATCH] [core] Support configurable number of "check for redis" attempts (#11902) Signed-off-by: Richard Liaw --- python/ray/_private/services.py | 12 ++++++++---- python/ray/ray_constants.py | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/python/ray/_private/services.py b/python/ray/_private/services.py index 13116dff2..fa0545a2e 100644 --- a/python/ray/_private/services.py +++ b/python/ray/_private/services.py @@ -551,7 +551,7 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None): redis_client = redis.StrictRedis( host=redis_ip_address, port=redis_port, password=password) # Wait for the Redis server to start. - num_retries = 12 + num_retries = ray_constants.START_REDIS_WAIT_RETRIES delay = 0.001 for _ in range(num_retries): try: @@ -567,9 +567,13 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None): else: break else: - raise RuntimeError("Unable to connect to Redis. If the Redis instance " - "is on a different machine, check that your " - "firewall is configured properly.") + raise RuntimeError( + f"Unable to connect to Redis (after {num_retries} retries). " + "If the Redis instance is on a different machine, check that " + "your firewall and relevant Ray ports are configured properly. " + "You can also set the environment variable " + "`RAY_START_REDIS_WAIT_RETRIES` to increase the number of " + "attempts to ping the Redis server.") def _compute_version_info(): diff --git a/python/ray/ray_constants.py b/python/ray/ray_constants.py index 29f5387b5..c748d4b4f 100644 --- a/python/ray/ray_constants.py +++ b/python/ray/ray_constants.py @@ -137,6 +137,10 @@ RESOURCES_ENVIRONMENT_VARIABLE = "RAY_OVERRIDE_RESOURCES" # The reporter will report its statistics this often (milliseconds). REPORTER_UPDATE_INTERVAL_MS = env_integer("REPORTER_UPDATE_INTERVAL_MS", 2500) +# Number of attempts to ping the Redis server. See +# `services.py:wait_for_redis_to_start`. +START_REDIS_WAIT_RETRIES = env_integer("RAY_START_REDIS_WAIT_RETRIES", 12) + LOGGER_FORMAT = ( "%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s") LOGGER_FORMAT_HELP = f"The logging format. default='{LOGGER_FORMAT}'"