mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[Serve] Skip flaky test_autoscaling_policy on windows (#25526)
This commit is contained in:
parent
9dc0bb3d5e
commit
a9e7836e8c
1 changed files with 50 additions and 3 deletions
|
@ -170,7 +170,7 @@ def get_deployment_start_time(controller: ServeController, deployment: Deploymen
|
|||
return deployment_info.start_time_ms
|
||||
|
||||
|
||||
@pytest.mark.parametrize("min_replicas", [0, 1])
|
||||
@pytest.mark.parametrize("min_replicas", [1, 2])
|
||||
def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
||||
"""Send 100 requests and check that we autoscale up, and then back down."""
|
||||
|
||||
|
@ -180,6 +180,52 @@ def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
|||
_autoscaling_config={
|
||||
"metrics_interval_s": 0.1,
|
||||
"min_replicas": min_replicas,
|
||||
"max_replicas": 3,
|
||||
"look_back_period_s": 0.2,
|
||||
"downscale_delay_s": 0,
|
||||
"upscale_delay_s": 0,
|
||||
},
|
||||
# We will send over a lot of queries. This will make sure replicas are
|
||||
# killed quickly during cleanup.
|
||||
_graceful_shutdown_timeout_s=1,
|
||||
max_concurrent_queries=1000,
|
||||
version="v1",
|
||||
)
|
||||
class A:
|
||||
def __call__(self):
|
||||
ray.get(signal.wait.remote())
|
||||
|
||||
A.deploy()
|
||||
|
||||
controller = serve_instance._controller
|
||||
start_time = get_deployment_start_time(controller, A)
|
||||
|
||||
handle = A.get_handle()
|
||||
[handle.remote() for _ in range(100)]
|
||||
|
||||
# scale up one more replica from min_replicas
|
||||
wait_for_condition(
|
||||
lambda: get_num_running_replicas(controller, A) >= min_replicas + 1
|
||||
)
|
||||
signal.send.remote()
|
||||
|
||||
# As the queue is drained, we should scale back down.
|
||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= min_replicas)
|
||||
|
||||
# Make sure start time did not change for the deployment
|
||||
assert get_deployment_start_time(controller, A) == start_time
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
|
||||
def test_e2e_basic_scale_up_down_with_0_replica(serve_instance):
|
||||
"""Send 100 requests and check that we autoscale up, and then back down."""
|
||||
|
||||
signal = SignalActor.remote()
|
||||
|
||||
@serve.deployment(
|
||||
_autoscaling_config={
|
||||
"metrics_interval_s": 0.1,
|
||||
"min_replicas": 0,
|
||||
"max_replicas": 2,
|
||||
"look_back_period_s": 0.2,
|
||||
"downscale_delay_s": 0,
|
||||
|
@ -203,11 +249,12 @@ def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
|
|||
handle = A.get_handle()
|
||||
[handle.remote() for _ in range(100)]
|
||||
|
||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) >= 2)
|
||||
# scale up one more replica from min_replicas
|
||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) >= 1)
|
||||
signal.send.remote()
|
||||
|
||||
# As the queue is drained, we should scale back down.
|
||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= min_replicas)
|
||||
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= 0)
|
||||
|
||||
# Make sure start time did not change for the deployment
|
||||
assert get_deployment_start_time(controller, A) == start_time
|
||||
|
|
Loading…
Add table
Reference in a new issue