mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[Serve] Skip flaky test_autoscaling_policy on windows (#25526)
This commit is contained in:
parent
9dc0bb3d5e
commit
a9e7836e8c
1 changed files with 50 additions and 3 deletions
|
@ -170,7 +170,7 @@ def get_deployment_start_time(controller: ServeController, deployment: Deploymen
|
||||||
return deployment_info.start_time_ms
|
return deployment_info.start_time_ms
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("min_replicas", [0, 1])
|
@pytest.mark.parametrize("min_replicas", [1, 2])
|
||||||
def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
||||||
"""Send 100 requests and check that we autoscale up, and then back down."""
|
"""Send 100 requests and check that we autoscale up, and then back down."""
|
||||||
|
|
||||||
|
@ -180,6 +180,52 @@ def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
||||||
_autoscaling_config={
|
_autoscaling_config={
|
||||||
"metrics_interval_s": 0.1,
|
"metrics_interval_s": 0.1,
|
||||||
"min_replicas": min_replicas,
|
"min_replicas": min_replicas,
|
||||||
|
"max_replicas": 3,
|
||||||
|
"look_back_period_s": 0.2,
|
||||||
|
"downscale_delay_s": 0,
|
||||||
|
"upscale_delay_s": 0,
|
||||||
|
},
|
||||||
|
# We will send over a lot of queries. This will make sure replicas are
|
||||||
|
# killed quickly during cleanup.
|
||||||
|
_graceful_shutdown_timeout_s=1,
|
||||||
|
max_concurrent_queries=1000,
|
||||||
|
version="v1",
|
||||||
|
)
|
||||||
|
class A:
|
||||||
|
def __call__(self):
|
||||||
|
ray.get(signal.wait.remote())
|
||||||
|
|
||||||
|
A.deploy()
|
||||||
|
|
||||||
|
controller = serve_instance._controller
|
||||||
|
start_time = get_deployment_start_time(controller, A)
|
||||||
|
|
||||||
|
handle = A.get_handle()
|
||||||
|
[handle.remote() for _ in range(100)]
|
||||||
|
|
||||||
|
# scale up one more replica from min_replicas
|
||||||
|
wait_for_condition(
|
||||||
|
lambda: get_num_running_replicas(controller, A) >= min_replicas + 1
|
||||||
|
)
|
||||||
|
signal.send.remote()
|
||||||
|
|
||||||
|
# As the queue is drained, we should scale back down.
|
||||||
|
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= min_replicas)
|
||||||
|
|
||||||
|
# Make sure start time did not change for the deployment
|
||||||
|
assert get_deployment_start_time(controller, A) == start_time
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
|
||||||
|
def test_e2e_basic_scale_up_down_with_0_replica(serve_instance):
|
||||||
|
"""Send 100 requests and check that we autoscale up, and then back down."""
|
||||||
|
|
||||||
|
signal = SignalActor.remote()
|
||||||
|
|
||||||
|
@serve.deployment(
|
||||||
|
_autoscaling_config={
|
||||||
|
"metrics_interval_s": 0.1,
|
||||||
|
"min_replicas": 0,
|
||||||
"max_replicas": 2,
|
"max_replicas": 2,
|
||||||
"look_back_period_s": 0.2,
|
"look_back_period_s": 0.2,
|
||||||
"downscale_delay_s": 0,
|
"downscale_delay_s": 0,
|
||||||
|
@ -203,11 +249,12 @@ def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
||||||
handle = A.get_handle()
|
handle = A.get_handle()
|
||||||
[handle.remote() for _ in range(100)]
|
[handle.remote() for _ in range(100)]
|
||||||
|
|
||||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) >= 2)
|
# scale up one more replica from min_replicas
|
||||||
|
wait_for_condition(lambda: get_num_running_replicas(controller, A) >= 1)
|
||||||
signal.send.remote()
|
signal.send.remote()
|
||||||
|
|
||||||
# As the queue is drained, we should scale back down.
|
# As the queue is drained, we should scale back down.
|
||||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= min_replicas)
|
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= 0)
|
||||||
|
|
||||||
# Make sure start time did not change for the deployment
|
# Make sure start time did not change for the deployment
|
||||||
assert get_deployment_start_time(controller, A) == start_time
|
assert get_deployment_start_time(controller, A) == start_time
|
||||||
|
|
Loading…
Add table
Reference in a new issue