Revert "[KubeRay][Autoscaler][Core] Add a flag to disable ray status version check (#26584)" (#26597)

Reverts #26584

Seems it breaking test_advanced_4
This commit is contained in:
Chen Shen 2022-07-15 15:47:28 -07:00 committed by GitHub
parent 5ad4e75831
commit fe9a12aa92
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 4 additions and 42 deletions

View file

@ -1,7 +1,6 @@
import enum
import logging
import time
import traceback
from functools import wraps
from typing import List, Optional
@ -97,15 +96,13 @@ def create_gcs_channel(address: str, aio=False):
return init_grpc_channel(address, options=_GRPC_OPTIONS, asynchronous=aio)
def check_health(address: str, timeout=2, skip_version_check=False) -> bool:
def check_health(address: str, timeout=2) -> bool:
"""Checks Ray cluster health, before / without actually connecting to the
cluster via ray.init().
Args:
address: Ray cluster / GCS address string, e.g. ip:port.
timeout: request timeout.
skip_version_check: If True, will skip comparision of GCS Ray version with local
Ray version. If False (default), will raise exception on mismatch.
Returns:
Returns True if the cluster is running and has matching Ray version.
Returns False if no service is running.
@ -117,14 +114,9 @@ def check_health(address: str, timeout=2, skip_version_check=False) -> bool:
stub = gcs_service_pb2_grpc.HeartbeatInfoGcsServiceStub(channel)
resp = stub.CheckAlive(req, timeout=timeout)
except grpc.RpcError:
traceback.print_exc()
return False
if resp.status.code != GcsCode.OK:
raise RuntimeError(f"GCS running at {address} is unhealthy: {resp.status}")
if skip_version_check:
return True
# Otherwise, continue to check for Ray version match.
if resp.ray_version is None:
resp.ray_version = "<= 1.12"
if resp.ray_version != ray.__version__:

View file

@ -21,17 +21,7 @@ def run_kuberay_autoscaler(cluster_name: str, cluster_namespace: str):
ray_address = f"{head_ip}:6379"
while True:
try:
# Autoscaler Ray version might not exactly match GCS version, so skip the
# version check when checking GCS status.
subprocess.check_call(
[
"ray",
"health-check",
"--address",
ray_address,
"--skip-version-check",
]
)
subprocess.check_call(["ray", "health-check", "--address", ray_address])
# Logging is not ready yet. Print to stdout for now.
print("The Ray head is ready. Starting the autoscaler.")
break

View file

@ -6,7 +6,6 @@ import signal
import subprocess
import sys
import time
import traceback
import urllib
import urllib.parse
from datetime import datetime
@ -2324,13 +2323,7 @@ def kuberay_autoscaler(cluster_name: str, cluster_namespace: str) -> None:
help="Health check for a specific component. Currently supports: "
"[ray_client_server]",
)
@click.option(
"--skip-version-check",
is_flag=True,
default=False,
help="Skip comparison of GCS version with local Ray version.",
)
def healthcheck(address, redis_password, component, skip_version_check):
def healthcheck(address, redis_password, component):
"""
This is NOT a public api.
@ -2341,12 +2334,9 @@ def healthcheck(address, redis_password, component, skip_version_check):
if not component:
try:
if ray._private.gcs_utils.check_health(
address, skip_version_check=skip_version_check
):
if ray._private.gcs_utils.check_health(address):
sys.exit(0)
except Exception:
traceback.print_exc()
pass
sys.exit(1)

View file

@ -1,4 +1,3 @@
import mock
import subprocess
import sys
@ -113,15 +112,6 @@ def test_check_health(shutdown_only):
assert check_health(addr)
def test_check_health_version_check():
with mock.patch("ray.__version__", "FOO-VERSION"):
conn = ray.init()
addr = conn.address_info["address"]
assert check_health(addr, skip_version_check=True)
with pytest.raises(RuntimeError):
check_health(addr)
def test_back_pressure(shutdown_only_with_initialization_check):
ray.init()