mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
Revert "[KubeRay][Autoscaler][Core] Add a flag to disable ray status version check (#26584)" (#26597)
Reverts #26584 Seems it breaking test_advanced_4
This commit is contained in:
parent
5ad4e75831
commit
fe9a12aa92
4 changed files with 4 additions and 42 deletions
|
@ -1,7 +1,6 @@
|
|||
import enum
|
||||
import logging
|
||||
import time
|
||||
import traceback
|
||||
from functools import wraps
|
||||
from typing import List, Optional
|
||||
|
||||
|
@ -97,15 +96,13 @@ def create_gcs_channel(address: str, aio=False):
|
|||
return init_grpc_channel(address, options=_GRPC_OPTIONS, asynchronous=aio)
|
||||
|
||||
|
||||
def check_health(address: str, timeout=2, skip_version_check=False) -> bool:
|
||||
def check_health(address: str, timeout=2) -> bool:
|
||||
"""Checks Ray cluster health, before / without actually connecting to the
|
||||
cluster via ray.init().
|
||||
|
||||
Args:
|
||||
address: Ray cluster / GCS address string, e.g. ip:port.
|
||||
timeout: request timeout.
|
||||
skip_version_check: If True, will skip comparision of GCS Ray version with local
|
||||
Ray version. If False (default), will raise exception on mismatch.
|
||||
Returns:
|
||||
Returns True if the cluster is running and has matching Ray version.
|
||||
Returns False if no service is running.
|
||||
|
@ -117,14 +114,9 @@ def check_health(address: str, timeout=2, skip_version_check=False) -> bool:
|
|||
stub = gcs_service_pb2_grpc.HeartbeatInfoGcsServiceStub(channel)
|
||||
resp = stub.CheckAlive(req, timeout=timeout)
|
||||
except grpc.RpcError:
|
||||
traceback.print_exc()
|
||||
return False
|
||||
if resp.status.code != GcsCode.OK:
|
||||
raise RuntimeError(f"GCS running at {address} is unhealthy: {resp.status}")
|
||||
|
||||
if skip_version_check:
|
||||
return True
|
||||
# Otherwise, continue to check for Ray version match.
|
||||
if resp.ray_version is None:
|
||||
resp.ray_version = "<= 1.12"
|
||||
if resp.ray_version != ray.__version__:
|
||||
|
|
|
@ -21,17 +21,7 @@ def run_kuberay_autoscaler(cluster_name: str, cluster_namespace: str):
|
|||
ray_address = f"{head_ip}:6379"
|
||||
while True:
|
||||
try:
|
||||
# Autoscaler Ray version might not exactly match GCS version, so skip the
|
||||
# version check when checking GCS status.
|
||||
subprocess.check_call(
|
||||
[
|
||||
"ray",
|
||||
"health-check",
|
||||
"--address",
|
||||
ray_address,
|
||||
"--skip-version-check",
|
||||
]
|
||||
)
|
||||
subprocess.check_call(["ray", "health-check", "--address", ray_address])
|
||||
# Logging is not ready yet. Print to stdout for now.
|
||||
print("The Ray head is ready. Starting the autoscaler.")
|
||||
break
|
||||
|
|
|
@ -6,7 +6,6 @@ import signal
|
|||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import traceback
|
||||
import urllib
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
|
@ -2324,13 +2323,7 @@ def kuberay_autoscaler(cluster_name: str, cluster_namespace: str) -> None:
|
|||
help="Health check for a specific component. Currently supports: "
|
||||
"[ray_client_server]",
|
||||
)
|
||||
@click.option(
|
||||
"--skip-version-check",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Skip comparison of GCS version with local Ray version.",
|
||||
)
|
||||
def healthcheck(address, redis_password, component, skip_version_check):
|
||||
def healthcheck(address, redis_password, component):
|
||||
"""
|
||||
This is NOT a public api.
|
||||
|
||||
|
@ -2341,12 +2334,9 @@ def healthcheck(address, redis_password, component, skip_version_check):
|
|||
|
||||
if not component:
|
||||
try:
|
||||
if ray._private.gcs_utils.check_health(
|
||||
address, skip_version_check=skip_version_check
|
||||
):
|
||||
if ray._private.gcs_utils.check_health(address):
|
||||
sys.exit(0)
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
pass
|
||||
sys.exit(1)
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import mock
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
@ -113,15 +112,6 @@ def test_check_health(shutdown_only):
|
|||
assert check_health(addr)
|
||||
|
||||
|
||||
def test_check_health_version_check():
|
||||
with mock.patch("ray.__version__", "FOO-VERSION"):
|
||||
conn = ray.init()
|
||||
addr = conn.address_info["address"]
|
||||
assert check_health(addr, skip_version_check=True)
|
||||
with pytest.raises(RuntimeError):
|
||||
check_health(addr)
|
||||
|
||||
|
||||
def test_back_pressure(shutdown_only_with_initialization_check):
|
||||
ray.init()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue