[serve] Implement experimental deploy_group API (#22039)

If the declarative API issues a code change to a group of deployments at once, it needs to deploy the group of updated deployments atomically. This ensures any deployment using another deployment's handle inside its own __init__() function can access that handle regardless of the deployment order. This change adds deploy_group to the ServeController class, allowing it to deploy a list of deployments atomically. It also adds a new public API command, serve.deploy_group(), exposing the controller's functionality publicly, so atomic deployments can also be executed via Python API. Closes #21873.
2025-03-06 10:31:39 -05:00 · 2022-02-04 16:12:14 -08:00 · 2022-02-04 16:12:14 -08:00 · a61d974dd5
commit a61d974dd5
parent a692e7d05e
3 changed files with 353 additions and 65 deletions
--- a/python/ray/serve/api.py
+++ b/python/ray/serve/api.py
@ -8,7 +8,7 @@ import re
 import time
 from dataclasses import dataclass
 from functools import wraps
-from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, overload
+from typing import Any, Callable, Dict, Optional, Tuple, Type, Union, List, overload
 from fastapi import APIRouter, FastAPI
 from starlette.requests import Request
@ -116,7 +116,7 @@ class Client:
    def __init__(
        self, controller: ActorHandle, controller_name: str, detached: bool = False
    ):
-        self._controller = controller
+        self._controller: ServeController = controller
        self._controller_name = controller_name
        self._detached = detached
        self._shutdown = False
@ -227,83 +227,77 @@ class Client:
        url: Optional[str] = None,
        _blocking: Optional[bool] = True,
    ) -> Optional[GoalId]:
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}
-        curr_job_env = ray.get_runtime_context().runtime_env
+        controller_deploy_args = self.get_deploy_args(
-        if "runtime_env" in ray_actor_options:
+            name=name,
-            ray_actor_options["runtime_env"].setdefault(
+            deployment_def=deployment_def,
                "working_dir", curr_job_env.get("working_dir")
            )
        else:
            ray_actor_options["runtime_env"] = curr_job_env
        replica_config = ReplicaConfig(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options,
-        )
+            config=config,
-
+            version=version,
-        if isinstance(config, dict):
+            prev_version=prev_version,
-            deployment_config = DeploymentConfig.parse_obj(config)
+            route_prefix=route_prefix,
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError("config must be a DeploymentConfig or a dictionary.")
        if (
            deployment_config.autoscaling_config is not None
            and deployment_config.max_concurrent_queries
            < deployment_config.autoscaling_config.target_num_ongoing_requests_per_replica  # noqa: E501
        ):
            logger.warning(
                "Autoscaling will never happen, "
                "because 'max_concurrent_queries' is less than "
                "'target_num_ongoing_requests_per_replica' now."
        )
        goal_id, updating = ray.get(
-            self._controller.deploy.remote(
+            self._controller.deploy.remote(**controller_deploy_args)
                name,
                deployment_config.to_proto_bytes(),
                replica_config,
                version,
                prev_version,
                route_prefix,
                ray.get_runtime_context().job_id,
            )
        )
-        tag = f"component=serve deployment={name}"
+        tag = self.log_deployment_update_status(name, version, updating)
        if updating:
            msg = f"Updating deployment '{name}'"
            if version is not None:
                msg += f" to version '{version}'"
            logger.info(f"{msg}. {tag}")
        else:
            logger.info(
                f"Deployment '{name}' is already at version "
                f"'{version}', not updating. {tag}"
            )
        if _blocking:
            self._wait_for_goal(goal_id)
-
+            self.log_deployment_ready(name, version, url, tag)
            if url is not None:
                url_part = f" at `{url}`"
            else:
                url_part = ""
            logger.info(
                f"Deployment '{name}{':'+version if version else ''}' is ready"
                f"{url_part}. {tag}"
            )
        else:
            return goal_id
    @_ensure_connected
    def deploy_group(
        self, deployments: List[Dict], _blocking: bool = True
    ) -> List[GoalId]:
        deployment_args_list = []
        for deployment in deployments:
            deployment_args_list.append(
                self.get_deploy_args(
                    deployment["name"],
                    deployment["func_or_class"],
                    deployment["init_args"],
                    deployment["init_kwargs"],
                    ray_actor_options=deployment["ray_actor_options"],
                    config=deployment["config"],
                    version=deployment["version"],
                    prev_version=deployment["prev_version"],
                    route_prefix=deployment["route_prefix"],
                )
            )
        update_goals = ray.get(
            self._controller.deploy_group.remote(deployment_args_list)
        )
        tags = []
        for i in range(len(deployments)):
            deployment = deployments[i]
            name, version = deployment["name"], deployment["version"]
            updating = update_goals[i][1]
            tags.append(self.log_deployment_update_status(name, version, updating))
        nonblocking_goal_ids = []
        for i in range(len(deployments)):
            deployment = deployments[i]
            url = deployment["url"]
            goal_id = update_goals[i][0]
            if _blocking:
                self._wait_for_goal(goal_id)
                self.log_deployment_ready(name, version, url, tags[i])
            else:
                nonblocking_goal_ids.append(goal_id)
        return nonblocking_goal_ids
    @_ensure_connected
    def delete_deployment(self, name: str) -> None:
        self._wait_for_goal(ray.get(self._controller.delete_deployment.remote(name)))
@ -406,6 +400,104 @@ class Client:
        return handle
    @_ensure_connected
    def get_deploy_args(
        self,
        name: str,
        deployment_def: Union[Callable, Type[Callable], str],
        init_args: Tuple[Any],
        init_kwargs: Dict[Any, Any],
        ray_actor_options: Optional[Dict] = None,
        config: Optional[Union[DeploymentConfig, Dict[str, Any]]] = None,
        version: Optional[str] = None,
        prev_version: Optional[str] = None,
        route_prefix: Optional[str] = None,
    ) -> Dict:
        """
        Takes a deployment's configuration, and returns the arguments needed
        for the controller to deploy it.
        """
        if config is None:
            config = {}
        if ray_actor_options is None:
            ray_actor_options = {}
        curr_job_env = ray.get_runtime_context().runtime_env
        if "runtime_env" in ray_actor_options:
            ray_actor_options["runtime_env"].setdefault(
                "working_dir", curr_job_env.get("working_dir")
            )
        else:
            ray_actor_options["runtime_env"] = curr_job_env
        replica_config = ReplicaConfig(
            deployment_def,
            init_args=init_args,
            init_kwargs=init_kwargs,
            ray_actor_options=ray_actor_options,
        )
        if isinstance(config, dict):
            deployment_config = DeploymentConfig.parse_obj(config)
        elif isinstance(config, DeploymentConfig):
            deployment_config = config
        else:
            raise TypeError("config must be a DeploymentConfig or a dictionary.")
        if (
            deployment_config.autoscaling_config is not None
            and deployment_config.max_concurrent_queries
            < deployment_config.autoscaling_config.target_num_ongoing_requests_per_replica  # noqa: E501
        ):
            logger.warning(
                "Autoscaling will never happen, "
                "because 'max_concurrent_queries' is less than "
                "'target_num_ongoing_requests_per_replica' now."
            )
        controller_deploy_args = {
            "name": name,
            "deployment_config_proto_bytes": deployment_config.to_proto_bytes(),
            "replica_config": replica_config,
            "version": version,
            "prev_version": prev_version,
            "route_prefix": route_prefix,
            "deployer_job_id": ray.get_runtime_context().job_id,
        }
        return controller_deploy_args
    @_ensure_connected
    def log_deployment_update_status(
        self, name: str, version: str, updating: bool
    ) -> str:
        tag = f"component=serve deployment={name}"
        if updating:
            msg = f"Updating deployment '{name}'"
            if version is not None:
                msg += f" to version '{version}'"
            logger.info(f"{msg}. {tag}")
        else:
            logger.info(
                f"Deployment '{name}' is already at version "
                f"'{version}', not updating. {tag}"
            )
        return tag
    @_ensure_connected
    def log_deployment_ready(self, name: str, version: str, url: str, tag: str) -> None:
        if url is not None:
            url_part = f" at `{url}`"
        else:
            url_part = ""
        logger.info(
            f"Deployment '{name}{':'+version if version else ''}' is ready"
            f"{url_part}. {tag}"
        )
 def _check_http_and_checkpoint_options(
    client: Client,
@ -865,7 +957,7 @@ class Deployment:
    @property
    def init_kwargs(self) -> Tuple[Any]:
        """Keyword args passed to the underlying class's constructor."""
-        return self._init_args
+        return self._init_kwargs
    @property
    def url(self) -> Optional[str]:
@ -1236,3 +1328,45 @@ def list_deployments() -> Dict[str, Deployment]:
        )
    return deployments
 def deploy_group(deployments: List[Deployment], _blocking: bool = True) -> List[GoalId]:
    """
    EXPERIMENTAL API
    Takes in a list of deployment object, and deploys them atomically.
    Args:
        deployments(List[Deployment]): a list of deployments to deploy.
        _blocking(bool): whether to wait for the deployments to finish
            deploying or not.
    """
    if len(deployments) == 0:
        return []
    parameter_group = []
    for deployment in deployments:
        if not isinstance(deployment, Deployment):
            raise TypeError(
                f"deploy_group only accepts Deployments, but got unexpected "
                f"type {type(deployment)}."
            )
        deployment_parameters = {
            "name": deployment._name,
            "func_or_class": deployment._func_or_class,
            "init_args": deployment.init_args,
            "init_kwargs": deployment.init_kwargs,
            "ray_actor_options": deployment._ray_actor_options,
            "config": deployment._config,
            "version": deployment._version,
            "prev_version": deployment._prev_version,
            "route_prefix": deployment.route_prefix,
            "url": deployment.url,
        }
        parameter_group.append(deployment_parameters)
    return _get_global_client().deploy_group(parameter_group, _blocking=_blocking)
--- a/python/ray/serve/controller.py
+++ b/python/ray/serve/controller.py
@ -358,6 +358,22 @@ class ServeController:
        return goal_id, updating
    def deploy_group(
        self, deployment_args_list: List[Dict]
    ) -> List[Tuple[Optional[GoalId], bool]]:
        """
        Takes in a list of dictionaries that contain keyword arguments for the
        controller's deploy() function. Calls deploy on all the argument
        dictionaries in the list. Effectively executes an atomic deploy on a
        group of deployments.
        """
        update_goals: List[Tuple[Optional[GoalId], bool]] = []
        for deployment_args in deployment_args_list:
            update_goals.append(self.deploy(**deployment_args))
        return update_goals
    def delete_deployment(self, name: str) -> Optional[GoalId]:
        self.endpoint_state.delete_endpoint(name)
        return self.deployment_state_manager.delete_deployment(name)
--- a/python/ray/serve/tests/test_deploy.py
+++ b/python/ray/serve/tests/test_deploy.py
@ -16,6 +16,8 @@ from ray import serve
 from ray.serve.exceptions import RayServeException
 from ray.serve.utils import get_random_letters
 from ray.serve.api import deploy_group
@pytest.mark.parametrize("use_handle", [True, False])
 def test_deploy(serve_instance, use_handle):
@ -1193,6 +1195,142 @@ def test_http_proxy_request_cancellation(serve_instance):
    assert requests.get(url).text == "2"
 class TestDeployGroup:
    @serve.deployment
    def f():
        return "f reached"
    @serve.deployment
    def g():
        return "g reached"
    @serve.deployment
    class C:
        async def __call__(self):
            return "C reached"
    @serve.deployment
    class D:
        async def __call__(self):
            return "D reached"
    def deploy_and_check_responses(
        self, deployments, responses, blocking=True, client=None
    ):
        """
        Helper function that deploys the list of deployments, calls them with
        their handles, and checks whether they return the objects in responses.
        If blocking is False, this function uses a non-blocking deploy and uses
        the client to wait until the deployments finish deploying.
        """
        goal_ids = deploy_group(deployments, _blocking=blocking)
        if blocking:
            assert len(goal_ids) == 0
        else:
            assert len(goal_ids) == len(deployments)
            if client:
                for id in goal_ids:
                    client._wait_for_goal(id)
        for deployment, response in zip(deployments, responses):
            assert ray.get(deployment.get_handle().remote()) == response
    def test_basic_deploy_group(self, serve_instance):
        """
        Atomically deploys a group of deployments, including both functions and
        classes. Checks whether they deploy correctly.
        """
        deployments = [self.f, self.g, self.C, self.D]
        responses = ["f reached", "g reached", "C reached", "D reached"]
        self.deploy_and_check_responses(deployments, responses)
    def test_non_blocking_deploy_group(self, serve_instance):
        """Checks deploy_group's behavior when _blocking=False."""
        deployments = [self.f, self.g, self.C, self.D]
        responses = ["f reached", "g reached", "C reached", "D reached"]
        self.deploy_and_check_responses(
            deployments, responses, blocking=False, client=serve_instance
        )
    def test_mutual_handles(self, serve_instance):
        """
        Atomically deploys a group of deployments that get handles to other
        deployments in the group inside their __init__ functions. The handle
        references should fail in a non-atomic deployment. Checks whether the
        deployments deploy correctly.
        """
        @serve.deployment
        class MutualHandles:
            async def __init__(self, handle_name):
                self.handle = serve.get_deployment(handle_name).get_handle()
            async def __call__(self, echo: str):
                return await self.handle.request_echo.remote(echo)
            async def request_echo(self, echo: str):
                return echo
        names = []
        for i in range(10):
            names.append("a" * i)
        deployments = []
        for idx in range(len(names)):
            # Each deployment will hold a ServeHandle with the next name in
            # the list
            deployment_name = names[idx]
            handle_name = names[(idx + 1) % len(names)]
            deployments.append(
                MutualHandles.options(name=deployment_name, init_args=(handle_name,))
            )
        deploy_group(deployments)
        for deployment in deployments:
            assert (ray.get(deployment.get_handle().remote("hello"))) == "hello"
    def test_decorated_deployments(self, serve_instance):
        """
        Checks deploy_group's behavior when deployments have options set in
        their @serve.deployment decorator.
        """
        @serve.deployment(num_replicas=2, max_concurrent_queries=5)
        class DecoratedClass1:
            async def __call__(self):
                return "DecoratedClass1 reached"
        @serve.deployment(num_replicas=4, max_concurrent_queries=2)
        class DecoratedClass2:
            async def __call__(self):
                return "DecoratedClass2 reached"
        deployments = [DecoratedClass1, DecoratedClass2]
        responses = ["DecoratedClass1 reached", "DecoratedClass2 reached"]
        self.deploy_and_check_responses(deployments, responses)
    def test_empty_list(self, serve_instance):
        """Checks deploy_group's behavior when deployment group is empty."""
        self.deploy_and_check_responses([], [])
    def test_invalid_input(self, serve_instance):
        """
        Checks deploy_group's behavior when deployment group contains
        non-Deployment objects.
        """
        with pytest.raises(TypeError):
            deploy_group([self.f, self.C, "not a Deployment object"])
 if __name__ == "__main__":
    import sys