[kuberay][autoscaler] Improve CPU, GPU, and memory detection. (#26219)

This PR improves the autoscaler's resource detection logic
2025-03-05 10:01:43 -05:00 · 2022-07-02 11:32:05 -07:00 · 2022-07-02 11:32:05 -07:00 · 7d3ceb222c
commit 7d3ceb222c
parent 34d1e580cb
4 changed files with 110 additions and 59 deletions
--- a/python/ray/autoscaler/_private/kuberay/autoscaling_config.py
+++ b/python/ray/autoscaler/_private/kuberay/autoscaling_config.py
@ -1,10 +1,10 @@
+import decimal
 import json
 import logging
-import math
 import time
-from contextlib import suppress
 from typing import Any, Dict, Optional

+import kubernetes
 import requests

 from ray.autoscaler._private.constants import (
@ -29,8 +29,6 @@ RAYCLUSTER_FETCH_RETRY_S = 5
 # Used as the name of the "head node type" by the autoscaler.
 _HEAD_GROUP_NAME = "head-group"

-_GPU_WARNING_LOGGED = False
-

 class AutoscalingConfigProducer:
    """Produces an autoscaling config by reading data from the RayCluster CR.
@ -259,15 +257,14 @@ def _get_num_cpus(
    k8s_resource_limits: Dict[str, str],
    group_name: str,
 ) -> int:
-    if "num_cpus" in ray_start_params:
-        return int(ray_start_params["num_cpus"])
+    """Get CPU annotation from ray_start_params or k8s_resource_limits,
+    with priority for ray_start_params.
+    """
+    if "num-cpus" in ray_start_params:
+        return int(ray_start_params["num-cpus"])
    elif "cpu" in k8s_resource_limits:
-        cpu_str = str(k8s_resource_limits["cpu"])
-        if cpu_str[-1] == "m":
-            # For example, '500m' rounds up to 1.
-            return math.ceil(int(cpu_str[:-1]) / 1000)
-        else:
-            return int(cpu_str)
+        cpu_quantity: str = k8s_resource_limits["cpu"]
+        return _round_up_k8s_quantity(cpu_quantity)
    else:
        # Getting the number of CPUs is important, so raise an error if we can't do it.
        raise ValueError(
@ -280,13 +277,14 @@ def _get_num_cpus(
 def _get_memory(
    ray_start_params: Dict[str, str], k8s_resource_limits: Dict[str, Any]
 ) -> Optional[int]:
-    """Get memory resource annotation from ray_start_params, if it is set there.
-
-    TODO, maybe: Consider container resource limits as in
-    https://github.com/ray-project/ray/pull/14567/files
+    """Get memory resource annotation from ray_start_params or k8s_resource_limits,
+    with priority for ray_start_params.
    """
    if "memory" in ray_start_params:
        return int(ray_start_params["memory"])
+    elif "memory" in k8s_resource_limits:
+        memory_quantity: str = k8s_resource_limits["memory"]
+        return _round_up_k8s_quantity(memory_quantity)
    return None


@ -295,34 +293,44 @@ def _get_num_gpus(
    k8s_resource_limits: Dict[str, Any],
    group_name: str,
 ) -> Optional[int]:
-    """Read the number of GPUs from the Ray start params.
-
-    Potential TODO: Read GPU info from the container spec, here and in the
-    Ray Operator.
+    """Get memory resource annotation from ray_start_params or k8s_resource_limits,
+    with priority for ray_start_params.
    """

    if "num-gpus" in ray_start_params:
        return int(ray_start_params["num-gpus"])
-
-    # Issue a warning if GPUs are present in the container spec but not in the
-    # ray start params.
-    # TODO: Consider reading GPU info from container spec.
+    else:
        for key in k8s_resource_limits:
-        global _GPU_WARNING_LOGGED
-        if "gpu" in key and not _GPU_WARNING_LOGGED:
-            with suppress(Exception):
-                if int(k8s_resource_limits[key]) > 0:
-                    logger.warning(
-                        f"Detected GPUs in container resources for group {group_name}."
-                        "To ensure Ray and the autoscaler are aware of the GPUs,"
-                        " set the `--num-gpus` rayStartParam."
-                    )
-                    _GPU_WARNING_LOGGED = True
-            break
-
+            # e.g. nvidia.com/gpu
+            if key.endswith("gpu"):
+                # Typically, this is a string representing an interger, e.g. "1".
+                gpu_resource_quantity = k8s_resource_limits[key]
+                # Convert to int, making no assumptions on the gpu_resource_quantity,
+                # besides that it's valid as a K8s resource quantity.
+                num_gpus = _round_up_k8s_quantity(gpu_resource_quantity)
+                if num_gpus > 0:
+                    # Only one GPU type supported for now, break out on first
+                    # "/gpu" match.
+                    return num_gpus
    return None


+def _round_up_k8s_quantity(quantity: str) -> int:
+    """Rounds a Kubernetes resource quantity up to the nearest integer.
+
+    Args:
+        quantity: Resource quantity as a string in the canonical K8s form.
+
+    Returns:
+        The quantity, rounded up, as an integer.
+    """
+    resource_decimal: decimal.Decimal = kubernetes.utils.quantity.parse_quantity(
+        quantity
+    )
+    rounded = resource_decimal.to_integral_value(rounding=decimal.ROUND_UP)
+    return int(rounded)
+
+
 def _get_custom_resources(
    ray_start_params: Dict[str, Any], group_name: str
 ) -> Dict[str, int]:
--- a/python/ray/autoscaler/_private/kuberay/run_autoscaler.py
+++ b/python/ray/autoscaler/_private/kuberay/run_autoscaler.py
@ -71,7 +71,7 @@ def _setup_logging() -> None:
        filename=ray_constants.MONITOR_LOG_FILE_NAME,  # monitor.log
        max_bytes=ray_constants.LOGGING_ROTATE_BYTES,
        backup_count=ray_constants.LOGGING_ROTATE_BACKUP_COUNT,
-        logger_name="ray",  # Root of the logging hierachy for Ray code.
+        logger_name="ray",  # Root of the logging hierarchy for Ray code.
    )
    # Logs will also be written to the container's stdout.
    # The stdout handler was set up in the cli entry point.
--- a/python/ray/scripts/scripts.py
+++ b/python/ray/scripts/scripts.py
@ -41,7 +41,6 @@ from ray.autoscaler._private.commands import (
 )
 from ray.autoscaler._private.constants import RAY_PROCESSES
 from ray.autoscaler._private.fake_multi_node.node_provider import FAKE_HEAD_NODE_ID
-from ray.autoscaler._private.kuberay.run_autoscaler import run_kuberay_autoscaler
 from ray.dashboard.modules.job.cli import job_cli_group
 from ray.experimental.state.api import get_log, list_logs
 from ray.experimental.state.common import DEFAULT_RPC_TIMEOUT, DEFAULT_LOG_LIMIT
@ -2292,6 +2291,10 @@ def kuberay_autoscaler(cluster_name: str, cluster_namespace: str) -> None:
        KubeRay cluster configs.
    `ray kuberay-autoscaler` is NOT a public CLI.
    """
+    # Delay import to avoid introducing Ray core dependency on the Python Kubernetes
+    # client.
+    from ray.autoscaler._private.kuberay.run_autoscaler import run_kuberay_autoscaler
+
    run_kuberay_autoscaler(cluster_name, cluster_namespace)


--- a/python/ray/tests/kuberay/test_autoscaling_config.py
+++ b/python/ray/tests/kuberay/test_autoscaling_config.py
@ -1,3 +1,4 @@
+import copy
 from pathlib import Path
 import requests
 from typing import Any, Dict, Optional
@ -10,20 +11,30 @@ import yaml
 from ray.autoscaler._private.kuberay.autoscaling_config import (
    _derive_autoscaling_config_from_ray_cr,
    AutoscalingConfigProducer,
+    _round_up_k8s_quantity,
 )

 AUTOSCALING_CONFIG_MODULE_PATH = "ray.autoscaler._private.kuberay.autoscaling_config"


 def _get_basic_ray_cr() -> dict:
-    """Returns the example Ray CR included in the Ray documentation."""
+    """Returns the example Ray CR included in the Ray documentation,
+    modified to include a GPU worker group.
+    """
    cr_path = str(
        Path(__file__).resolve().parents[2]
        / "autoscaler"
        / "kuberay"
        / "ray-cluster.complete.yaml"
    )
-    return yaml.safe_load(open(cr_path).read())
+    config = yaml.safe_load(open(cr_path).read())
+    gpu_group = copy.deepcopy(config["spec"]["workerGroupSpecs"][0])
+    gpu_group["groupName"] = "gpu-group"
+    gpu_group["template"]["spec"]["containers"][0]["resources"]["limits"].setdefault(
+        "nvidia.com/gpu", 3
+    )
+    config["spec"]["workerGroupSpecs"].append(gpu_group)
+    return config


 def _get_basic_autoscaling_config() -> dict:
@ -44,6 +55,7 @@ def _get_basic_autoscaling_config() -> dict:
                "node_config": {},
                "resources": {
                    "CPU": 1,
+                    "memory": 1000000000,
                    "Custom1": 1,
                    "Custom2": 5,
                },
@ -54,10 +66,24 @@ def _get_basic_autoscaling_config() -> dict:
                "node_config": {},
                "resources": {
                    "CPU": 1,
+                    "memory": 536870912,
                    "Custom2": 5,
                    "Custom3": 1,
                },
            },
+            # Same as "small-group" with a GPU entry added.
+            "gpu-group": {
+                "max_workers": 300,
+                "min_workers": 1,
+                "node_config": {},
+                "resources": {
+                    "CPU": 1,
+                    "memory": 536870912,
+                    "Custom2": 5,
+                    "Custom3": 1,
+                    "GPU": 3,
+                },
+            },
        },
        "auth": {},
        "cluster_synced_files": [],
@ -69,7 +95,7 @@ def _get_basic_autoscaling_config() -> dict:
        "head_start_ray_commands": [],
        "idle_timeout_minutes": 5,
        "initialization_commands": [],
-        "max_workers": 300,
+        "max_workers": 600,
        "setup_commands": [],
        "upscaling_speed": 1,
        "worker_nodes": {},
@ -99,19 +125,25 @@ def _get_no_cpu_error() -> str:
    )


-def _get_ray_cr_memory_and_gpu() -> dict:
-    """CR with memory and gpu rayStartParams."""
+def _get_ray_cr_with_overrides() -> dict:
+    """CR with memory, cpu, and gpu overrides from rayStartParams."""
    cr = _get_basic_ray_cr()
    cr["spec"]["workerGroupSpecs"][0]["rayStartParams"]["memory"] = "300000000"
-    cr["spec"]["workerGroupSpecs"][0]["rayStartParams"]["num-gpus"] = "1"
+    # num-gpus rayStartParam with no gpus in container limits
+    cr["spec"]["workerGroupSpecs"][0]["rayStartParams"]["num-gpus"] = "100"
+    # num-gpus rayStartParam overriding gpus in container limits
+    cr["spec"]["workerGroupSpecs"][1]["rayStartParams"]["num-gpus"] = "100"
+    cr["spec"]["workerGroupSpecs"][0]["rayStartParams"]["num-cpus"] = "100"
    return cr


-def _get_autoscaling_config_memory_and_gpu() -> dict:
+def _get_autoscaling_config_with_overrides() -> dict:
    """Autoscaling config with memory and gpu annotations."""
    config = _get_basic_autoscaling_config()
    config["available_node_types"]["small-group"]["resources"]["memory"] = 300000000
-    config["available_node_types"]["small-group"]["resources"]["GPU"] = 1
+    config["available_node_types"]["small-group"]["resources"]["GPU"] = 100
+    config["available_node_types"]["small-group"]["resources"]["CPU"] = 100
+    config["available_node_types"]["gpu-group"]["resources"]["GPU"] = 100
    return config


@ -151,6 +183,21 @@ def _get_autoscaling_config_with_options() -> dict:
    return config


+@pytest.mark.parametrize(
+    "input,output",
+    [
+        # There's no particular discipline to these test cases.
+        ("100m", 1),
+        ("15001m", 16),
+        ("2", 2),
+        ("100Mi", 104857600),
+        ("1G", 1000000000),
+    ],
+)
+def test_resource_quantity(input: str, output: int):
+    assert _round_up_k8s_quantity(input) == output, output
+
+
 PARAM_ARGS = ",".join(
    [
        "ray_cr_in",
@ -182,20 +229,12 @@ TEST_DATA = (
            id="no-cpu-error",
        ),
        pytest.param(
-            _get_ray_cr_memory_and_gpu(),
-            _get_autoscaling_config_memory_and_gpu(),
+            _get_ray_cr_with_overrides(),
+            _get_autoscaling_config_with_overrides(),
            None,
            None,
            None,
-            id="memory-and-gpu",
-        ),
-        pytest.param(
-            _get_ray_cr_missing_gpu_arg(),
-            _get_basic_autoscaling_config(),
-            None,
-            None,
-            _get_gpu_complaint(),
-            id="gpu-complaint",
+            id="overrides",
        ),
        pytest.param(
            _get_ray_cr_with_autoscaler_options(),
@ -239,7 +278,8 @@ def test_cr_image_consistency():
    cr = _get_basic_ray_cr()

    group_specs = [cr["spec"]["headGroupSpec"]] + cr["spec"]["workerGroupSpecs"]
-    assert len(group_specs) == 2
+    # Head, CPU group, GPU group.
+    assert len(group_specs) == 3

    ray_containers = [
        group_spec["template"]["spec"]["containers"][0] for group_spec in group_specs