mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[serve] Handle None
in ReplicaConfig
's resource_dict
(#23851)
This change sets `"memory"`'s default to `0` in the `resource_dict` but keeps the default as `None` in `ray_actor_options`. It adds logic to both problematic lines to handle `None` in case of future settings updates. It also adds unit tests to prevent regressions.
This commit is contained in:
parent
a940e643db
commit
fab33fa730
6 changed files with 51 additions and 8 deletions
|
@ -252,7 +252,7 @@ py_test(
|
|||
|
||||
py_test(
|
||||
name = "test_standalone2",
|
||||
size = "medium",
|
||||
size = "large",
|
||||
srcs = serve_tests_srcs,
|
||||
tags = ["exclusive", "team:serve"],
|
||||
deps = [":serve_lib"],
|
||||
|
|
|
@ -297,7 +297,7 @@ class ReplicaConfig:
|
|||
|
||||
# TODO(suquark): reuse options validation of remote function/actor.
|
||||
# Ray defaults to zero CPUs for placement, we default to one here.
|
||||
if self.ray_actor_options.get("num_cpus", None) is None:
|
||||
if self.ray_actor_options.get("num_cpus") is None:
|
||||
self.ray_actor_options["num_cpus"] = 1
|
||||
num_cpus = self.ray_actor_options["num_cpus"]
|
||||
if not isinstance(num_cpus, (int, float)):
|
||||
|
@ -306,7 +306,7 @@ class ReplicaConfig:
|
|||
raise ValueError("num_cpus in ray_actor_options must be >= 0.")
|
||||
self.resource_dict["CPU"] = num_cpus
|
||||
|
||||
if self.ray_actor_options.get("num_gpus", None) is None:
|
||||
if self.ray_actor_options.get("num_gpus") is None:
|
||||
self.ray_actor_options["num_gpus"] = 0
|
||||
num_gpus = self.ray_actor_options["num_gpus"]
|
||||
if not isinstance(num_gpus, (int, float)):
|
||||
|
@ -315,6 +315,7 @@ class ReplicaConfig:
|
|||
raise ValueError("num_gpus in ray_actor_options must be >= 0.")
|
||||
self.resource_dict["GPU"] = num_gpus
|
||||
|
||||
# Serve deployments use Ray's default for actor memory.
|
||||
self.ray_actor_options.setdefault("memory", None)
|
||||
memory = self.ray_actor_options["memory"]
|
||||
if memory is not None and not isinstance(memory, (int, float)):
|
||||
|
@ -335,7 +336,7 @@ class ReplicaConfig:
|
|||
raise ValueError("object_store_memory in ray_actor_options must be >= 0.")
|
||||
self.resource_dict["object_store_memory"] = object_store_memory
|
||||
|
||||
if self.ray_actor_options.get("resources", None) is None:
|
||||
if self.ray_actor_options.get("resources") is None:
|
||||
self.ray_actor_options["resources"] = {}
|
||||
custom_resources = self.ray_actor_options["resources"]
|
||||
if not isinstance(custom_resources, dict):
|
||||
|
|
|
@ -267,9 +267,11 @@ class ActorReplicaWrapper:
|
|||
)
|
||||
|
||||
self._actor_resources = deployment_info.replica_config.resource_dict
|
||||
# it is currently not possiible to create a placement group
|
||||
# it is currently not possible to create a placement group
|
||||
# with no resources (https://github.com/ray-project/ray/issues/20401)
|
||||
has_resources_assigned = all((r > 0 for r in self._actor_resources.values()))
|
||||
has_resources_assigned = all(
|
||||
(r is not None and r > 0 for r in self._actor_resources.values())
|
||||
)
|
||||
if USE_PLACEMENT_GROUP and has_resources_assigned:
|
||||
self._placement_group = self.create_placement_group(
|
||||
self._placement_group_name, self._actor_resources
|
||||
|
@ -752,7 +754,11 @@ class DeploymentReplica(VersionedReplica):
|
|||
if self._actor.actor_resources is None:
|
||||
return "UNKNOWN", "UNKNOWN"
|
||||
|
||||
required = {k: v for k, v in self._actor.actor_resources.items() if v > 0}
|
||||
required = {
|
||||
k: v
|
||||
for k, v in self._actor.actor_resources.items()
|
||||
if v is not None and v > 0
|
||||
}
|
||||
available = {
|
||||
k: v for k, v in self._actor.available_resources.items() if k in required
|
||||
}
|
||||
|
|
|
@ -184,7 +184,12 @@ def test_replica_config_validation():
|
|||
|
||||
@pytest.mark.parametrize(
|
||||
"memory_omitted_options",
|
||||
[None, {}, {"CPU": 1, "GPU": 3}, {"CPU": 1, "GPU": 3, "memory": None}],
|
||||
[
|
||||
None,
|
||||
{},
|
||||
{"num_cpus": 1, "num_gpus": 3},
|
||||
{"num_cpus": 1, "num_gpus": 3, "memory": None},
|
||||
],
|
||||
)
|
||||
def test_replica_config_default_memory_none(memory_omitted_options):
|
||||
"""Checks that ReplicaConfig's default memory is None."""
|
||||
|
|
|
@ -21,6 +21,7 @@ from ray.serve.deployment_state import (
|
|||
DeploymentState,
|
||||
DeploymentStateManager,
|
||||
DeploymentVersion,
|
||||
DeploymentReplica,
|
||||
ReplicaStartupStatus,
|
||||
ReplicaState,
|
||||
ReplicaStateContainer,
|
||||
|
@ -2086,5 +2087,21 @@ def test_stopping_replicas_ranking():
|
|||
compare([2, 2, 3, 3], [2, 2, 3, 3]) # if equal, ordering should be kept
|
||||
|
||||
|
||||
def test_resource_requirements_none(mock_deployment_state):
|
||||
"""Ensure resource_requirements doesn't break if a requirement is None"""
|
||||
|
||||
class FakeActor:
|
||||
|
||||
actor_resources = {"num_cpus": 2, "fake": None}
|
||||
available_resources = {}
|
||||
|
||||
# Make a DeploymentReplica just to accesss its resource_requirement function
|
||||
replica = DeploymentReplica(None, None, None, None, None)
|
||||
replica._actor = FakeActor()
|
||||
|
||||
# resource_requirements() should not error
|
||||
replica.resource_requirements()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(pytest.main(["-v", "-s", __file__]))
|
||||
|
|
|
@ -52,6 +52,20 @@ def test_standalone_actor_outside_serve():
|
|||
ray.shutdown()
|
||||
|
||||
|
||||
def test_memory_omitted_option(ray_shutdown):
|
||||
"""Ensure that omitting memory doesn't break the deployment."""
|
||||
|
||||
@serve.deployment(ray_actor_options={"num_cpus": 1, "num_gpus": 1})
|
||||
def hello(*args, **kwargs):
|
||||
return "world"
|
||||
|
||||
ray.init(num_gpus=3, namespace="serve")
|
||||
serve.start()
|
||||
hello.deploy()
|
||||
|
||||
assert ray.get(hello.get_handle().remote()) == "world"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("detached", [True, False])
|
||||
def test_override_namespace(shutdown_ray, detached):
|
||||
"""Test the _override_controller_namespace flag in serve.start()."""
|
||||
|
|
Loading…
Add table
Reference in a new issue