[core] Support configurable number of "check for redis" attempts (#11902)

Signed-off-by: Richard Liaw <rliaw@berkeley.edu>
This commit is contained in:
Richard Liaw 2020-11-10 14:57:57 -08:00 committed by GitHub
parent 1d158dda32
commit 50dbf1a307
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 4 deletions

View file

@ -551,7 +551,7 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None):
redis_client = redis.StrictRedis(
host=redis_ip_address, port=redis_port, password=password)
# Wait for the Redis server to start.
num_retries = 12
num_retries = ray_constants.START_REDIS_WAIT_RETRIES
delay = 0.001
for _ in range(num_retries):
try:
@ -567,9 +567,13 @@ def wait_for_redis_to_start(redis_ip_address, redis_port, password=None):
else:
break
else:
raise RuntimeError("Unable to connect to Redis. If the Redis instance "
"is on a different machine, check that your "
"firewall is configured properly.")
raise RuntimeError(
f"Unable to connect to Redis (after {num_retries} retries). "
"If the Redis instance is on a different machine, check that "
"your firewall and relevant Ray ports are configured properly. "
"You can also set the environment variable "
"`RAY_START_REDIS_WAIT_RETRIES` to increase the number of "
"attempts to ping the Redis server.")
def _compute_version_info():

View file

@ -137,6 +137,10 @@ RESOURCES_ENVIRONMENT_VARIABLE = "RAY_OVERRIDE_RESOURCES"
# The reporter will report its statistics this often (milliseconds).
REPORTER_UPDATE_INTERVAL_MS = env_integer("REPORTER_UPDATE_INTERVAL_MS", 2500)
# Number of attempts to ping the Redis server. See
# `services.py:wait_for_redis_to_start`.
START_REDIS_WAIT_RETRIES = env_integer("RAY_START_REDIS_WAIT_RETRIES", 12)
LOGGER_FORMAT = (
"%(asctime)s\t%(levelname)s %(filename)s:%(lineno)s -- %(message)s")
LOGGER_FORMAT_HELP = f"The logging format. default='{LOGGER_FORMAT}'"