mirror of
https://github.com/vale981/ray
synced 2025-03-07 02:51:39 -05:00
79 lines
1.7 KiB
Python
79 lines
1.7 KiB
Python
from ray import serve
|
|
import time
|
|
import os
|
|
|
|
|
|
# __updating_a_deployment_start__
|
|
@serve.deployment(name="my_deployment", num_replicas=1)
|
|
class SimpleDeployment:
|
|
pass
|
|
|
|
|
|
# Creates one initial replica.
|
|
serve.run(SimpleDeployment.bind())
|
|
|
|
|
|
# Re-deploys, creating an additional replica.
|
|
# This could be the SAME Python script, modified and re-run.
|
|
@serve.deployment(name="my_deployment", num_replicas=2)
|
|
class SimpleDeployment:
|
|
pass
|
|
|
|
|
|
serve.run(SimpleDeployment.bind())
|
|
|
|
# You can also use Deployment.options() to change options without redefining
|
|
# the class. This is useful for programmatically updating deployments.
|
|
serve.run(SimpleDeployment.options(num_replicas=2).bind())
|
|
# __updating_a_deployment_end__
|
|
|
|
|
|
# __scaling_out_start__
|
|
# Create with a single replica.
|
|
@serve.deployment(num_replicas=1)
|
|
def func(*args):
|
|
pass
|
|
|
|
|
|
serve.run(func.bind())
|
|
|
|
# Scale up to 3 replicas.
|
|
serve.run(func.options(num_replicas=3).bind())
|
|
|
|
# Scale back down to 1 replica.
|
|
serve.run(func.options(num_replicas=1).bind())
|
|
# __scaling_out_end__
|
|
|
|
|
|
# __autoscaling_start__
|
|
@serve.deployment(
|
|
autoscaling_config={
|
|
"min_replicas": 1,
|
|
"max_replicas": 5,
|
|
"target_num_ongoing_requests_per_replica": 10,
|
|
}
|
|
)
|
|
def func(_):
|
|
time.sleep(1)
|
|
return ""
|
|
|
|
|
|
serve.run(
|
|
func.bind()
|
|
) # The func deployment will now autoscale based on requests demand.
|
|
# __autoscaling_end__
|
|
|
|
|
|
# __configure_parallism_start__
|
|
@serve.deployment
|
|
class MyDeployment:
|
|
def __init__(self, parallelism: str):
|
|
os.environ["OMP_NUM_THREADS"] = parallelism
|
|
# Download model weights, initialize model, etc.
|
|
|
|
def __call__(self):
|
|
pass
|
|
|
|
|
|
serve.run(MyDeployment.bind("12"))
|
|
# __configure_parallism_end__
|