mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[DOC] Added a full example how to access deployments (#22401)
This commit is contained in:
parent
6eb805b357
commit
0246f3532e
7 changed files with 298 additions and 1 deletions
|
@ -110,6 +110,7 @@ py_test(
|
|||
tags = ["exclusive", "pytorch", "team:ml"]
|
||||
)
|
||||
|
||||
# Ray Serve
|
||||
py_test(
|
||||
name = "doc_code_metrics_example",
|
||||
size = "small",
|
||||
|
@ -126,6 +127,14 @@ py_test(
|
|||
tags = ["exclusive", "post_wheel_build", "team:serve"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "doc_code_create_deployment_example",
|
||||
size = "small",
|
||||
main = "create_deployment.py",
|
||||
srcs = ["//doc/source/serve/doc_code:serve_doc_code"],
|
||||
tags = ["exclusive", "post_wheel_build", "team:serve"]
|
||||
)
|
||||
|
||||
# --------------------------------------------------------------------
|
||||
# Test all doc/source/tune/examples notebooks.
|
||||
# --------------------------------------------------------------------
|
||||
|
|
96
doc/source/serve/_examples/doc_code/create_deployment.py
Normal file
96
doc/source/serve/_examples/doc_code/create_deployment.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
# flake8: noqa
|
||||
# fmt: off
|
||||
#
|
||||
# __serve_example_begin__
|
||||
#
|
||||
# This brief example shows how to create, deploy, and expose access to
|
||||
# deployment models, using the simple Ray Serve deployment APIs.
|
||||
# Once deployed, you can access deployment via two methods:
|
||||
# ServerHandle API and HTTP
|
||||
#
|
||||
import os
|
||||
from random import random
|
||||
|
||||
import requests
|
||||
import starlette
|
||||
from starlette.requests import Request
|
||||
import ray
|
||||
from ray import serve
|
||||
|
||||
#
|
||||
# A simple example model stored in a pickled format at an accessible path
|
||||
# that can be reloaded and deserialized into a model instance. Once deployed
|
||||
# in Ray Serve, we can use it for prediction. The prediction is a fake condition,
|
||||
# based on threshold of weight greater than 0.5.
|
||||
#
|
||||
|
||||
|
||||
class Model:
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
def predict(self, data):
|
||||
return random() + data if data > 0.5 else data
|
||||
|
||||
|
||||
@serve.deployment
|
||||
class Deployment:
|
||||
# Take in a path to load your desired model
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.model = Model(path)
|
||||
# Get the pid on which this deployment is running on
|
||||
self.pid = os.getpid()
|
||||
|
||||
# Deployments are callable. Here we simply return a prediction from
|
||||
# our request
|
||||
def __call__(self, starlette_request) -> str:
|
||||
# Request came via an HTTP
|
||||
if isinstance(starlette_request, starlette.requests.Request):
|
||||
data = starlette_request.query_params['data']
|
||||
else:
|
||||
# Request came via a ServerHandle API method call.
|
||||
data = starlette_request
|
||||
pred = self.model.predict(float(data))
|
||||
return f"(pid: {self.pid}); path: {self.path}; data: {float(data):.3f}; prediction: {pred:.3f}"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Start a Ray Serve instance. This will automatically start
|
||||
# or connect to an existing Ray cluster.
|
||||
serve.start()
|
||||
|
||||
# Create two distinct deployments of the same class as
|
||||
# two replicas. Associate each deployment with a unique 'name'.
|
||||
# This name can be used as to fetch its respective serve handle.
|
||||
# See code below for method 1.
|
||||
Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl")
|
||||
Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl")
|
||||
|
||||
# Get the current list of deployments
|
||||
print(serve.list_deployments())
|
||||
|
||||
print("ServerHandle API responses: " + "--" * 5)
|
||||
|
||||
# Method 1) Access each deployment using the ServerHandle API
|
||||
for _ in range(2):
|
||||
for d_name in ["rep-1", "rep-2"]:
|
||||
# Get handle to the each deployment and invoke its method.
|
||||
# Which replica the request is dispatched to is determined
|
||||
# by the Router actor.
|
||||
handle = serve.get_deployment(d_name).get_handle()
|
||||
print(f"handle name : {d_name}")
|
||||
print(f"prediction : {ray.get(handle.remote(random()))}")
|
||||
print("-" * 2)
|
||||
|
||||
print("HTTP responses: " + "--" * 5)
|
||||
|
||||
# Method 2) Access deployment via HTTP Request
|
||||
for _ in range(2):
|
||||
for d_name in ["rep-1", "rep-2"]:
|
||||
# Send HTTP request along with data payload
|
||||
url = f"http://127.0.0.1:8000/{d_name}"
|
||||
print(f"handle name : {d_name}")
|
||||
print(f"prediction : {requests.get(url, params= {'data': random()}).text}")
|
||||
# __serve_example_end__
|
|
@ -99,6 +99,50 @@ We can also query the deployment using the :mod:`ServeHandle <ray.serve.handle.R
|
|||
|
||||
print(ray.get(handle.remote()))
|
||||
|
||||
As noted above, there are two ways to expose deployments. The first is by using the :mod:`ServeHandle <ray.serve.handle.RayServeHandle>`
|
||||
interface. This method allows you to access deployments within a Python script or code, making it convenient for a
|
||||
Python developer. And the second is by using the HTTP request, allowing access to deployments via a web client application.
|
||||
|
||||
Let's look at a simple end-to-end example using both ways to expose and access deployments. Your output may
|
||||
vary due to random nature of how the prediction is computed; however, the example illustrates two things:
|
||||
1) how to expose and use deployments and 2) how to use replicas, to which requests are sent. Note that each pid
|
||||
is a separate replica associated with each deployment name, ``rep-1`` and ``rep-2`` respectively.
|
||||
|
||||
.. literalinclude:: _examples/doc_code/create_deployment.py
|
||||
:language: python
|
||||
:start-after: __serve_example_begin__
|
||||
:end-before: __serve_example_end__
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
||||
# Output:
|
||||
# {'rep-1': Deployment(name=rep-1,version=None,route_prefix=/rep-1),
|
||||
# 'rep-2': Deployment(name=rep-2,version=None,route_prefix=/rep-2)}
|
||||
#
|
||||
# ServerHandle API responses: ----------
|
||||
# handle name : rep-1
|
||||
# prediction : (pid: 62636); path: /model/rep-1.pkl; data: 0.600; prediction: 1.292
|
||||
# --
|
||||
# handle name : rep-2
|
||||
# prediction : (pid: 62635); path: /model/rep-2.pkl; data: 0.075; prediction: 0.075
|
||||
# --
|
||||
# handle name : rep-1
|
||||
# prediction : (pid: 62634); path: /model/rep-1.pkl; data: 0.186; prediction: 0.186
|
||||
# --
|
||||
# handle name : rep-2
|
||||
# prediction : (pid: 62637); path: /model/rep-2.pkl; data: 0.751; prediction: 1.444
|
||||
# --
|
||||
# HTTP responses: ----------
|
||||
# handle name : rep-1
|
||||
# prediction : (pid: 62636); path: /model/rep-1.pkl; data: 0.582; prediction: 1.481
|
||||
# handle name : rep-2
|
||||
# prediction : (pid: 62637); path: /model/rep-2.pkl; data: 0.778; prediction: 1.678
|
||||
# handle name : rep-1
|
||||
# prediction : (pid: 62634); path: /model/rep-1.pkl; data: 0.139; prediction: 0.139
|
||||
# handle name : rep-2
|
||||
# prediction : (pid: 62635); path: /model/rep-2.pkl; data: 0.569; prediction: 1.262
|
||||
|
||||
Updating a Deployment
|
||||
=====================
|
||||
|
||||
|
|
9
doc/source/serve/doc_code/BUILD
Normal file
9
doc/source/serve/doc_code/BUILD
Normal file
|
@ -0,0 +1,9 @@
|
|||
py_library(
|
||||
name = "serve_doc_code",
|
||||
srcs = glob(["**/*.py"]),
|
||||
visibility = [
|
||||
"//doc:__subpackages__",
|
||||
"//python/ray/serve:__subpackages__",
|
||||
"//python/ray/serve:__pkg__"
|
||||
],
|
||||
)
|
96
doc/source/serve/doc_code/create_deployment.py
Normal file
96
doc/source/serve/doc_code/create_deployment.py
Normal file
|
@ -0,0 +1,96 @@
|
|||
# flake8: noqa
|
||||
# fmt: off
|
||||
#
|
||||
# __serve_example_begin__
|
||||
#
|
||||
# This brief example shows how to create, deploy, and expose access to
|
||||
# deployment models, using the simple Ray Serve deployment APIs.
|
||||
# Once deployed, you can access deployment via two methods:
|
||||
# ServerHandle API and HTTP
|
||||
#
|
||||
import os
|
||||
from random import random
|
||||
|
||||
import requests
|
||||
import starlette
|
||||
from starlette.requests import Request
|
||||
import ray
|
||||
from ray import serve
|
||||
|
||||
#
|
||||
# A simple example model stored in a pickled format at an accessible path
|
||||
# that can be reloaded and deserialized into a model instance. Once deployed
|
||||
# in Ray Serve, we can use it for prediction. The prediction is a fake condition,
|
||||
# based on threshold of weight greater than 0.5.
|
||||
#
|
||||
|
||||
|
||||
class Model:
|
||||
def __init__(self, path):
|
||||
self.path = path
|
||||
|
||||
def predict(self, data):
|
||||
return random() + data if data > 0.5 else data
|
||||
|
||||
|
||||
@serve.deployment
|
||||
class Deployment:
|
||||
# Take in a path to load your desired model
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.model = Model(path)
|
||||
# Get the pid on which this deployment is running on
|
||||
self.pid = os.getpid()
|
||||
|
||||
# Deployments are callable. Here we simply return a prediction from
|
||||
# our request
|
||||
def __call__(self, starlette_request) -> str:
|
||||
# Request came via an HTTP
|
||||
if isinstance(starlette_request, starlette.requests.Request):
|
||||
data = starlette_request.query_params['data']
|
||||
else:
|
||||
# Request came via a ServerHandle API method call.
|
||||
data = starlette_request
|
||||
pred = self.model.predict(float(data))
|
||||
return f"(pid: {self.pid}); path: {self.path}; data: {float(data):.3f}; prediction: {pred:.3f}"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# Start a Ray Serve instance. This will automatically start
|
||||
# or connect to an existing Ray cluster.
|
||||
serve.start()
|
||||
|
||||
# Create two distinct deployments of the same class as
|
||||
# two replicas. Associate each deployment with a unique 'name'.
|
||||
# This name can be used as to fetch its respective serve handle.
|
||||
# See code below for method 1.
|
||||
Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl")
|
||||
Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl")
|
||||
|
||||
# Get the current list of deployments
|
||||
print(serve.list_deployments())
|
||||
|
||||
print("ServerHandle API responses: " + "--" * 5)
|
||||
|
||||
# Method 1) Access each deployment using the ServerHandle API
|
||||
for _ in range(2):
|
||||
for d_name in ["rep-1", "rep-2"]:
|
||||
# Get handle to the each deployment and invoke its method.
|
||||
# Which replica the request is dispatched to is determined
|
||||
# by the Router actor.
|
||||
handle = serve.get_deployment(d_name).get_handle()
|
||||
print(f"handle name : {d_name}")
|
||||
print(f"prediction : {ray.get(handle.remote(random()))}")
|
||||
print("-" * 2)
|
||||
|
||||
print("HTTP responses: " + "--" * 5)
|
||||
|
||||
# Method 2) Access deployment via HTTP Request
|
||||
for _ in range(2):
|
||||
for d_name in ["rep-1", "rep-2"]:
|
||||
# Send HTTP request along with data payload
|
||||
url = f"http://127.0.0.1:8000/{d_name}"
|
||||
print(f"handle name : {d_name}")
|
||||
print(f"prediction : {requests.get(url, params= {'data': random()}).text}")
|
||||
# __serve_example_end__
|
43
doc/source/serve/doc_code/quick_start.py
Normal file
43
doc/source/serve/doc_code/quick_start.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
# flake8: noqa
|
||||
# fmt: off
|
||||
|
||||
# __serve_example_begin__
|
||||
import requests
|
||||
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
|
||||
from ray import serve
|
||||
|
||||
serve.start()
|
||||
|
||||
# Train model.
|
||||
iris_dataset = load_iris()
|
||||
model = GradientBoostingClassifier()
|
||||
model.fit(iris_dataset["data"], iris_dataset["target"])
|
||||
|
||||
|
||||
@serve.deployment(route_prefix="/iris")
|
||||
class BoostingModel:
|
||||
def __init__(self, model):
|
||||
self.model = model
|
||||
self.label_list = iris_dataset["target_names"].tolist()
|
||||
|
||||
async def __call__(self, request):
|
||||
payload = (await request.json())["vector"]
|
||||
print(f"Received http request with data {payload}")
|
||||
|
||||
prediction = self.model.predict([payload])[0]
|
||||
human_name = self.label_list[prediction]
|
||||
return {"result": human_name}
|
||||
|
||||
|
||||
# Deploy model.
|
||||
BoostingModel.deploy(model)
|
||||
|
||||
# Query it!
|
||||
sample_request_input = {"vector": [1.2, 1.0, 1.1, 0.9]}
|
||||
response = requests.get(
|
||||
"http://localhost:8000/iris", json=sample_request_input)
|
||||
print(response.text)
|
||||
# __serve_example_end__
|
|
@ -397,7 +397,7 @@ py_test(
|
|||
name = "tutorial_sklearn_ray_overview",
|
||||
size = "small",
|
||||
main = "quick_start.py",
|
||||
srcs = ["//doc/source/serve/_examples/doc_code:serve_doc_code"],
|
||||
srcs = ["//doc/source/serve/doc_code:serve_doc_code"],
|
||||
tags = ["exclusive", "team:serve"],
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue