mirror of
https://github.com/vale981/ray
synced 2025-03-08 19:41:38 -05:00
[serve] Add properties + docstring + test for Deployment class (#15917)
This commit is contained in:
parent
836c739fe5
commit
a116875abc
4 changed files with 135 additions and 55 deletions
|
@ -1056,7 +1056,7 @@ def ingress(app: Union["FastAPI", "APIRouter"]):
|
||||||
|
|
||||||
class Deployment:
|
class Deployment:
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
backend_def: Callable,
|
func_or_class: Callable,
|
||||||
name: str,
|
name: str,
|
||||||
config: BackendConfig,
|
config: BackendConfig,
|
||||||
version: Optional[str] = None,
|
version: Optional[str] = None,
|
||||||
|
@ -1075,7 +1075,7 @@ class Deployment:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
"The Deployment constructor should not be called "
|
"The Deployment constructor should not be called "
|
||||||
"directly. Use `@serve.deployment` instead.")
|
"directly. Use `@serve.deployment` instead.")
|
||||||
if not callable(backend_def):
|
if not callable(func_or_class):
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"@serve.deployment must be called on a class or function.")
|
"@serve.deployment must be called on a class or function.")
|
||||||
if not isinstance(name, str):
|
if not isinstance(name, str):
|
||||||
|
@ -1101,43 +1101,92 @@ class Deployment:
|
||||||
if init_args is None:
|
if init_args is None:
|
||||||
init_args = ()
|
init_args = ()
|
||||||
|
|
||||||
self.backend_def = backend_def
|
self._func_or_class = func_or_class
|
||||||
self.name = name
|
self._name = name
|
||||||
self.version = version
|
self._version = version
|
||||||
self.config = config
|
self._config = config
|
||||||
self.init_args = init_args
|
self._init_args = init_args
|
||||||
self.route_prefix = route_prefix
|
self._route_prefix = route_prefix
|
||||||
self.ray_actor_options = ray_actor_options
|
self._ray_actor_options = ray_actor_options
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
"""Unique name of this deployment."""
|
||||||
|
return self._name
|
||||||
|
|
||||||
|
@property
|
||||||
|
def version(self) -> Optional[str]:
|
||||||
|
"""Version of this deployment.
|
||||||
|
|
||||||
|
If None, will be redeployed every time `.deploy()` is called.
|
||||||
|
"""
|
||||||
|
|
||||||
|
return self._version
|
||||||
|
|
||||||
|
@property
|
||||||
|
def func_or_class(self) -> Callable:
|
||||||
|
"""Underlying class or function that this deployment wraps."""
|
||||||
|
return self._func_or_class
|
||||||
|
|
||||||
|
@property
|
||||||
|
def num_replicas(self) -> int:
|
||||||
|
"""Current target number of replicas."""
|
||||||
|
return self._config.num_replicas
|
||||||
|
|
||||||
|
@property
|
||||||
|
def user_config(self) -> Any:
|
||||||
|
"""Current dynamic user-provided config options."""
|
||||||
|
return self._config.user_config
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_concurrent_queries(self) -> int:
|
||||||
|
"""Current max outstanding queries from each handle."""
|
||||||
|
return self._config.max_concurrent_queries
|
||||||
|
|
||||||
|
@property
|
||||||
|
def route_prefix(self) -> Optional[str]:
|
||||||
|
"""HTTP route prefix that this deploymet is exposed under."""
|
||||||
|
return self._route_prefix
|
||||||
|
|
||||||
|
@property
|
||||||
|
def ray_actor_options(self) -> Optional[Dict]:
|
||||||
|
"""Actor options such as resources required for each replica."""
|
||||||
|
return self._ray_actor_options
|
||||||
|
|
||||||
|
@property
|
||||||
|
def init_args(self) -> Tuple[Any]:
|
||||||
|
"""Arguments passed to the underlying class' constructor."""
|
||||||
|
return self._init_args
|
||||||
|
|
||||||
def __call__(self):
|
def __call__(self):
|
||||||
raise RuntimeError("Deployments cannot be constructed directly. "
|
raise RuntimeError("Deployments cannot be constructed directly. "
|
||||||
"Use `deployment.deploy() instead.`")
|
"Use `deployment.deploy() instead.`")
|
||||||
|
|
||||||
def deploy(self, *init_args, _blocking=True):
|
def deploy(self, *init_args, _blocking=True):
|
||||||
"""Deploy this deployment.
|
"""Deploy or update this deployment.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
*init_args (optional): args to pass to the class __init__
|
*init_args (optional): args to pass to the class __init__
|
||||||
method. Not valid if this deployment wraps a function.
|
method. Not valid if this deployment wraps a function.
|
||||||
"""
|
"""
|
||||||
if len(init_args) == 0 and self.init_args is not None:
|
if len(init_args) == 0 and self._init_args is not None:
|
||||||
init_args = self.init_args
|
init_args = self._init_args
|
||||||
|
|
||||||
return _get_global_client().deploy(
|
return _get_global_client().deploy(
|
||||||
self.name,
|
self._name,
|
||||||
self.backend_def,
|
self._func_or_class,
|
||||||
*init_args,
|
*init_args,
|
||||||
ray_actor_options=self.ray_actor_options,
|
ray_actor_options=self._ray_actor_options,
|
||||||
config=self.config,
|
config=self._config,
|
||||||
version=self.version,
|
version=self._version,
|
||||||
route_prefix=self.route_prefix,
|
route_prefix=self._route_prefix,
|
||||||
_blocking=_blocking,
|
_blocking=_blocking,
|
||||||
_internal=True)
|
_internal=True)
|
||||||
|
|
||||||
def delete(self):
|
def delete(self):
|
||||||
"""Delete this deployment."""
|
"""Delete this deployment."""
|
||||||
return _get_global_client().delete_deployment(
|
return _get_global_client().delete_deployment(
|
||||||
self.name, _internal=True)
|
self._name, _internal=True)
|
||||||
|
|
||||||
def get_handle(self, sync: Optional[bool] = True
|
def get_handle(self, sync: Optional[bool] = True
|
||||||
) -> Union[RayServeHandle, RayServeSyncHandle]:
|
) -> Union[RayServeHandle, RayServeSyncHandle]:
|
||||||
|
@ -1153,7 +1202,7 @@ class Deployment:
|
||||||
ServeHandle
|
ServeHandle
|
||||||
"""
|
"""
|
||||||
return _get_global_client().get_handle(
|
return _get_global_client().get_handle(
|
||||||
self.name,
|
self._name,
|
||||||
missing_ok=True,
|
missing_ok=True,
|
||||||
sync=sync,
|
sync=sync,
|
||||||
_internal_use_serve_request=False,
|
_internal_use_serve_request=False,
|
||||||
|
@ -1161,7 +1210,7 @@ class Deployment:
|
||||||
|
|
||||||
def options(
|
def options(
|
||||||
self,
|
self,
|
||||||
backend_def: Optional[Callable] = None,
|
func_or_class: Optional[Callable] = None,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
version: Optional[str] = None,
|
version: Optional[str] = None,
|
||||||
init_args: Optional[Tuple[Any]] = None,
|
init_args: Optional[Tuple[Any]] = None,
|
||||||
|
@ -1176,7 +1225,7 @@ class Deployment:
|
||||||
Only those options passed in will be updated, all others will remain
|
Only those options passed in will be updated, all others will remain
|
||||||
unchanged from the existing deployment.
|
unchanged from the existing deployment.
|
||||||
"""
|
"""
|
||||||
new_config = self.config.copy()
|
new_config = self._config.copy()
|
||||||
if num_replicas is not None:
|
if num_replicas is not None:
|
||||||
new_config.num_replicas = num_replicas
|
new_config.num_replicas = num_replicas
|
||||||
if user_config is not None:
|
if user_config is not None:
|
||||||
|
@ -1184,29 +1233,29 @@ class Deployment:
|
||||||
if max_concurrent_queries is not None:
|
if max_concurrent_queries is not None:
|
||||||
new_config.max_concurrent_queries = max_concurrent_queries
|
new_config.max_concurrent_queries = max_concurrent_queries
|
||||||
|
|
||||||
if backend_def is None:
|
if func_or_class is None:
|
||||||
backend_def = self.backend_def
|
func_or_class = self._func_or_class
|
||||||
|
|
||||||
if name is None:
|
if name is None:
|
||||||
name = self.name
|
name = self._name
|
||||||
|
|
||||||
if version is None:
|
if version is None:
|
||||||
version = self.version
|
version = self._version
|
||||||
|
|
||||||
if init_args is None:
|
if init_args is None:
|
||||||
init_args = self.init_args
|
init_args = self._init_args
|
||||||
|
|
||||||
if route_prefix is None:
|
if route_prefix is None:
|
||||||
if self.route_prefix == f"/{self.name}":
|
if self._route_prefix == f"/{self._name}":
|
||||||
route_prefix = None
|
route_prefix = None
|
||||||
else:
|
else:
|
||||||
route_prefix = self.route_prefix
|
route_prefix = self._route_prefix
|
||||||
|
|
||||||
if ray_actor_options is None:
|
if ray_actor_options is None:
|
||||||
ray_actor_options = self.ray_actor_options
|
ray_actor_options = self._ray_actor_options
|
||||||
|
|
||||||
return Deployment(
|
return Deployment(
|
||||||
backend_def,
|
func_or_class,
|
||||||
name,
|
name,
|
||||||
new_config,
|
new_config,
|
||||||
version=version,
|
version=version,
|
||||||
|
@ -1218,21 +1267,21 @@ class Deployment:
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return all([
|
return all([
|
||||||
self.name == other.name,
|
self._name == other._name,
|
||||||
self.version == other.version,
|
self._version == other._version,
|
||||||
self.config == other.config,
|
self._config == other._config,
|
||||||
self.init_args == other.init_args,
|
self._init_args == other._init_args,
|
||||||
self.route_prefix == other.route_prefix,
|
self._route_prefix == other._route_prefix,
|
||||||
self.ray_actor_options == self.ray_actor_options,
|
self._ray_actor_options == self._ray_actor_options,
|
||||||
])
|
])
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
if self.route_prefix is None:
|
if self._route_prefix is None:
|
||||||
route_prefix = f"/{self.name}"
|
route_prefix = f"/{self._name}"
|
||||||
else:
|
else:
|
||||||
route_prefix = self.route_prefix
|
route_prefix = self._route_prefix
|
||||||
return (f"Deployment(name={self.name},"
|
return (f"Deployment(name={self._name},"
|
||||||
f"version={self.version},"
|
f"version={self._version},"
|
||||||
f"route_prefix={route_prefix})")
|
f"route_prefix={route_prefix})")
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
@ -1240,7 +1289,7 @@ class Deployment:
|
||||||
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
def deployment(backend_def: Callable) -> Deployment:
|
def deployment(func_or_class: Callable) -> Deployment:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -1257,7 +1306,7 @@ def deployment(name: Optional[str] = None,
|
||||||
|
|
||||||
|
|
||||||
def deployment(
|
def deployment(
|
||||||
_backend_def: Optional[Callable] = None,
|
_func_or_class: Optional[Callable] = None,
|
||||||
name: Optional[str] = None,
|
name: Optional[str] = None,
|
||||||
version: Optional[str] = None,
|
version: Optional[str] = None,
|
||||||
num_replicas: Optional[int] = None,
|
num_replicas: Optional[int] = None,
|
||||||
|
@ -1322,10 +1371,10 @@ def deployment(
|
||||||
if max_concurrent_queries is not None:
|
if max_concurrent_queries is not None:
|
||||||
config.max_concurrent_queries = max_concurrent_queries
|
config.max_concurrent_queries = max_concurrent_queries
|
||||||
|
|
||||||
def decorator(_backend_def):
|
def decorator(_func_or_class):
|
||||||
return Deployment(
|
return Deployment(
|
||||||
_backend_def,
|
_func_or_class,
|
||||||
name if name is not None else _backend_def.__name__,
|
name if name is not None else _func_or_class.__name__,
|
||||||
config,
|
config,
|
||||||
version=version,
|
version=version,
|
||||||
init_args=init_args,
|
init_args=init_args,
|
||||||
|
@ -1336,7 +1385,7 @@ def deployment(
|
||||||
|
|
||||||
# This handles both parametrized and non-parametrized usage of the
|
# This handles both parametrized and non-parametrized usage of the
|
||||||
# decorator. See the @serve.batch code for more details.
|
# decorator. See the @serve.batch code for more details.
|
||||||
return decorator(_backend_def) if callable(_backend_def) else decorator
|
return decorator(_func_or_class) if callable(_func_or_class) else decorator
|
||||||
|
|
||||||
|
|
||||||
def get_deployment(name: str) -> Deployment:
|
def get_deployment(name: str) -> Deployment:
|
||||||
|
|
|
@ -14,7 +14,7 @@ def test_serve_forceful_shutdown(serve_instance):
|
||||||
while True:
|
while True:
|
||||||
time.sleep(1000)
|
time.sleep(1000)
|
||||||
|
|
||||||
sleeper.config.experimental_graceful_shutdown_timeout_s = 0.1
|
sleeper._config.experimental_graceful_shutdown_timeout_s = 0.1
|
||||||
sleeper.deploy()
|
sleeper.deploy()
|
||||||
|
|
||||||
handle = sleeper.get_handle()
|
handle = sleeper.get_handle()
|
||||||
|
@ -34,8 +34,8 @@ def test_serve_graceful_shutdown(serve_instance):
|
||||||
await signal_actor.wait.remote()
|
await signal_actor.wait.remote()
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
Wait.config.experimental_graceful_shutdown_wait_loop_s = 0.5
|
Wait._config.experimental_graceful_shutdown_wait_loop_s = 0.5
|
||||||
Wait.config.experimental_graceful_shutdown_timeout_s = 1000
|
Wait._config.experimental_graceful_shutdown_timeout_s = 1000
|
||||||
Wait.deploy()
|
Wait.deploy()
|
||||||
handle = Wait.get_handle()
|
handle = Wait.get_handle()
|
||||||
refs = [handle.remote(signal) for _ in range(10)]
|
refs = [handle.remote(signal) for _ in range(10)]
|
||||||
|
|
|
@ -234,7 +234,7 @@ def test_redeploy_single_replica(serve_instance, use_handle):
|
||||||
|
|
||||||
# Redeploy new version. This should not go through until the old version
|
# Redeploy new version. This should not go through until the old version
|
||||||
# replica completely stops.
|
# replica completely stops.
|
||||||
V2 = V1.options(backend_def=V2, version="2")
|
V2 = V1.options(func_or_class=V2, version="2")
|
||||||
goal_ref = V2.deploy(_blocking=False)
|
goal_ref = V2.deploy(_blocking=False)
|
||||||
assert not client._wait_for_goal(goal_ref, timeout=0.1)
|
assert not client._wait_for_goal(goal_ref, timeout=0.1)
|
||||||
|
|
||||||
|
@ -353,7 +353,7 @@ def test_redeploy_multiple_replicas(serve_instance, use_handle):
|
||||||
|
|
||||||
# Redeploy new version. Since there is one replica blocking, only one new
|
# Redeploy new version. Since there is one replica blocking, only one new
|
||||||
# replica should be started up.
|
# replica should be started up.
|
||||||
V2 = V1.options(backend_def=V2, version="2")
|
V2 = V1.options(func_or_class=V2, version="2")
|
||||||
goal_ref = V2.deploy(_blocking=False)
|
goal_ref = V2.deploy(_blocking=False)
|
||||||
assert not client._wait_for_goal(goal_ref, timeout=0.1)
|
assert not client._wait_for_goal(goal_ref, timeout=0.1)
|
||||||
responses3, blocking3 = make_nonblocking_calls(
|
responses3, blocking3 = make_nonblocking_calls(
|
||||||
|
@ -652,6 +652,37 @@ def test_input_validation():
|
||||||
Base.options(max_concurrent_queries=-1)
|
Base.options(max_concurrent_queries=-1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_deployment_properties():
|
||||||
|
class DClass():
|
||||||
|
pass
|
||||||
|
|
||||||
|
D = serve.deployment(
|
||||||
|
name="name",
|
||||||
|
init_args=("hello", 123),
|
||||||
|
version="version",
|
||||||
|
num_replicas=2,
|
||||||
|
user_config="hi",
|
||||||
|
max_concurrent_queries=100,
|
||||||
|
route_prefix="/hello",
|
||||||
|
ray_actor_options={"num_cpus": 2})(DClass)
|
||||||
|
|
||||||
|
assert D.name == "name"
|
||||||
|
assert D.init_args == ("hello", 123)
|
||||||
|
assert D.version == "version"
|
||||||
|
assert D.num_replicas == 2
|
||||||
|
assert D.user_config == "hi"
|
||||||
|
assert D.max_concurrent_queries == 100
|
||||||
|
assert D.route_prefix == "/hello"
|
||||||
|
assert D.ray_actor_options == {"num_cpus": 2}
|
||||||
|
|
||||||
|
D = serve.deployment(
|
||||||
|
version=None,
|
||||||
|
route_prefix=None,
|
||||||
|
)(DClass)
|
||||||
|
assert D.version is None
|
||||||
|
assert D.route_prefix is None
|
||||||
|
|
||||||
|
|
||||||
class TestGetDeployment:
|
class TestGetDeployment:
|
||||||
def get_deployment(self, name, use_list_api):
|
def get_deployment(self, name, use_list_api):
|
||||||
if use_list_api:
|
if use_list_api:
|
||||||
|
|
|
@ -47,7 +47,7 @@ def test_controller_failure(serve_instance):
|
||||||
|
|
||||||
ray.kill(serve.api._global_client._controller, no_restart=False)
|
ray.kill(serve.api._global_client._controller, no_restart=False)
|
||||||
|
|
||||||
function.options(backend_def=function2).deploy()
|
function.options(func_or_class=function2).deploy()
|
||||||
|
|
||||||
def check_controller_failure():
|
def check_controller_failure():
|
||||||
response = request_with_retries("/controller_failure/", timeout=30)
|
response = request_with_retries("/controller_failure/", timeout=30)
|
||||||
|
@ -96,7 +96,7 @@ def test_http_proxy_failure(serve_instance):
|
||||||
def function2(_):
|
def function2(_):
|
||||||
return "hello2"
|
return "hello2"
|
||||||
|
|
||||||
function.options(backend_def=function2).deploy()
|
function.options(func_or_class=function2).deploy()
|
||||||
|
|
||||||
def check_new():
|
def check_new():
|
||||||
for _ in range(10):
|
for _ in range(10):
|
||||||
|
|
Loading…
Add table
Reference in a new issue