[Serve] Skip flaky test_autoscaling_policy on windows (#25526)

This commit is contained in:
Sihan Wang 2022-06-08 10:33:40 -07:00 committed by GitHub
parent 9dc0bb3d5e
commit a9e7836e8c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -170,7 +170,7 @@ def get_deployment_start_time(controller: ServeController, deployment: Deploymen
return deployment_info.start_time_ms return deployment_info.start_time_ms
@pytest.mark.parametrize("min_replicas", [0, 1]) @pytest.mark.parametrize("min_replicas", [1, 2])
def test_e2e_basic_scale_up_down(min_replicas, serve_instance): def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
"""Send 100 requests and check that we autoscale up, and then back down.""" """Send 100 requests and check that we autoscale up, and then back down."""
@ -180,6 +180,52 @@ def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
_autoscaling_config={ _autoscaling_config={
"metrics_interval_s": 0.1, "metrics_interval_s": 0.1,
"min_replicas": min_replicas, "min_replicas": min_replicas,
"max_replicas": 3,
"look_back_period_s": 0.2,
"downscale_delay_s": 0,
"upscale_delay_s": 0,
},
# We will send over a lot of queries. This will make sure replicas are
# killed quickly during cleanup.
_graceful_shutdown_timeout_s=1,
max_concurrent_queries=1000,
version="v1",
)
class A:
def __call__(self):
ray.get(signal.wait.remote())
A.deploy()
controller = serve_instance._controller
start_time = get_deployment_start_time(controller, A)
handle = A.get_handle()
[handle.remote() for _ in range(100)]
# scale up one more replica from min_replicas
wait_for_condition(
lambda: get_num_running_replicas(controller, A) >= min_replicas + 1
)
signal.send.remote()
# As the queue is drained, we should scale back down.
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= min_replicas)
# Make sure start time did not change for the deployment
assert get_deployment_start_time(controller, A) == start_time
@pytest.mark.skipif(sys.platform == "win32", reason="Failing on Windows.")
def test_e2e_basic_scale_up_down_with_0_replica(serve_instance):
"""Send 100 requests and check that we autoscale up, and then back down."""
signal = SignalActor.remote()
@serve.deployment(
_autoscaling_config={
"metrics_interval_s": 0.1,
"min_replicas": 0,
"max_replicas": 2, "max_replicas": 2,
"look_back_period_s": 0.2, "look_back_period_s": 0.2,
"downscale_delay_s": 0, "downscale_delay_s": 0,
@ -203,11 +249,12 @@ def test_e2e_basic_scale_up_down(min_replicas, serve_instance):
handle = A.get_handle() handle = A.get_handle()
[handle.remote() for _ in range(100)] [handle.remote() for _ in range(100)]
wait_for_condition(lambda: get_num_running_replicas(controller, A) >= 2) # scale up one more replica from min_replicas
wait_for_condition(lambda: get_num_running_replicas(controller, A) >= 1)
signal.send.remote() signal.send.remote()
# As the queue is drained, we should scale back down. # As the queue is drained, we should scale back down.
wait_for_condition(lambda: get_num_running_replicas(controller, A) <= min_replicas) wait_for_condition(lambda: get_num_running_replicas(controller, A) <= 0)
# Make sure start time did not change for the deployment # Make sure start time did not change for the deployment
assert get_deployment_start_time(controller, A) == start_time assert get_deployment_start_time(controller, A) == start_time