Add Pydantic validation to Serve AutoscalingConfig class (#20779)

This commit is contained in:
shrekris-anyscale 2021-12-06 11:51:02 -08:00 committed by GitHub
parent b9a418352b
commit 2d58664f98
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 10 deletions

View file

@ -5,7 +5,8 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
import pydantic
from google.protobuf.json_format import MessageToDict
from pydantic import BaseModel, NonNegativeFloat, PositiveInt, validator
from pydantic import (BaseModel, NonNegativeFloat, PositiveFloat,
NonNegativeInt, PositiveInt, validator)
from ray.serve.constants import DEFAULT_HTTP_HOST, DEFAULT_HTTP_PORT
from ray.serve.generated.serve_pb2 import (
DeploymentConfig as DeploymentConfigProto, AutoscalingConfig as
@ -20,30 +21,38 @@ class AutoscalingConfig(BaseModel):
# `src/ray/protobuf/serve.proto`.
# Publicly exposed options
min_replicas: int = 1
max_replicas: int = 1
target_num_ongoing_requests_per_replica: int = 1
min_replicas: NonNegativeInt = 1
max_replicas: PositiveInt = 1
target_num_ongoing_requests_per_replica: NonNegativeInt = 1
# Private options below.
# Metrics scraping options
# How often to scrape for metrics
metrics_interval_s: float = 10.0
metrics_interval_s: PositiveFloat = 10.0
# Time window to average over for metrics.
look_back_period_s: float = 30.0
look_back_period_s: PositiveFloat = 30.0
# Internal autoscaling configuration options
# Multiplicative "gain" factor to limit scaling decisions
smoothing_factor: float = 1.0
smoothing_factor: PositiveFloat = 1.0
# How frequently to make autoscaling decisions
# loop_period_s: float = CONTROL_LOOP_PERIOD_S
# How long to wait before scaling down replicas
downscale_delay_s: float = 600.0
downscale_delay_s: NonNegativeFloat = 600.0
# How long to wait before scaling up replicas
upscale_delay_s: float = 30.0
upscale_delay_s: NonNegativeFloat = 30.0
@validator("max_replicas")
def max_replicas_greater_than_or_equal_to_min_replicas(cls, v, values):
if "min_replicas" in values and v < values["min_replicas"]:
raise ValueError(f"""max_replicas ({v}) must be greater than """
f"""or equal to min_replicas """
f"""({values["min_replicas"]})!""")
return v
# TODO(architkulkarni): implement below
# The number of replicas to start with when creating the deployment
@ -53,7 +62,6 @@ class AutoscalingConfig(BaseModel):
# panic_mode_threshold: float = 2.0
# TODO(architkulkarni): Add reasonable defaults
# TODO(architkulkarni): Add pydantic validation. E.g. max_replicas>=min
class DeploymentConfig(BaseModel):

View file

@ -6,6 +6,29 @@ from ray.serve.config import (DeploymentConfig, DeploymentMode, HTTPOptions,
from ray.serve.config import AutoscalingConfig
def test_autoscaling_config_validation():
# Check validation over publicly exposed options
with pytest.raises(ValidationError):
# min_replicas must be nonnegative
AutoscalingConfig(min_replicas=-1)
with pytest.raises(ValidationError):
# max_replicas must be positive
AutoscalingConfig(max_replicas=0)
with pytest.raises(ValidationError):
# max_replicas must be nonnegative
AutoscalingConfig(target_num_ongoing_requests_per_replica=-1)
with pytest.raises(ValueError):
# max_replicas must be greater than or equal to min_replicas
AutoscalingConfig(min_replicas=100, max_replicas=1)
# Default values should not raise an error
AutoscalingConfig()
def test_deployment_config_validation():
# Test unknown key.
with pytest.raises(ValidationError):