From edf058d4f7c16706d8d40bc7beb3e3f91308cee6 Mon Sep 17 00:00:00 2001 From: ZhuSenlin Date: Mon, 25 Apr 2022 16:10:24 +0800 Subject: [PATCH] improve exponential backoff when connecting to the redis (#24150) --- python/ray/_private/services.py | 4 +++- python/ray/ray_constants.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ray/_private/services.py b/python/ray/_private/services.py index 438031b58..6d496dbd7 100644 --- a/python/ray/_private/services.py +++ b/python/ray/_private/services.py @@ -861,7 +861,9 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None): ) from connEx # Wait a little bit. time.sleep(delay) - delay *= 2 + # Make sure the retry interval doesn't increase too large, which will + # affect the delivery time of the Ray cluster. + delay = 1000 if i >= 10 else delay * 2 else: break else: diff --git a/python/ray/ray_constants.py b/python/ray/ray_constants.py index e460e4f01..620c33f64 100644 --- a/python/ray/ray_constants.py +++ b/python/ray/ray_constants.py @@ -181,7 +181,7 @@ REPORTER_UPDATE_INTERVAL_MS = env_integer("REPORTER_UPDATE_INTERVAL_MS", 2500) # Number of attempts to ping the Redis server. See # `services.py::wait_for_redis_to_start()` and # `services.py::create_redis_client()` -START_REDIS_WAIT_RETRIES = env_integer("RAY_START_REDIS_WAIT_RETRIES", 16) +START_REDIS_WAIT_RETRIES = env_integer("RAY_START_REDIS_WAIT_RETRIES", 60) LOGGER_FORMAT = "%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s" LOGGER_FORMAT_HELP = f"The logging format. default='{LOGGER_FORMAT}'"