From 0246f3532e2499d603f7797ce00e5e04060499af Mon Sep 17 00:00:00 2001 From: "Jules S. Damji" Date: Mon, 14 Mar 2022 19:15:52 -0700 Subject: [PATCH] [DOC] Added a full example how to access deployments (#22401) --- doc/BUILD | 9 ++ .../_examples/doc_code/create_deployment.py | 96 +++++++++++++++++++ doc/source/serve/core-apis.rst | 44 +++++++++ doc/source/serve/doc_code/BUILD | 9 ++ .../serve/doc_code/create_deployment.py | 96 +++++++++++++++++++ doc/source/serve/doc_code/quick_start.py | 43 +++++++++ python/ray/serve/BUILD | 2 +- 7 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 doc/source/serve/_examples/doc_code/create_deployment.py create mode 100644 doc/source/serve/doc_code/BUILD create mode 100644 doc/source/serve/doc_code/create_deployment.py create mode 100644 doc/source/serve/doc_code/quick_start.py diff --git a/doc/BUILD b/doc/BUILD index b02c0cfd9..d8b56df95 100644 --- a/doc/BUILD +++ b/doc/BUILD @@ -110,6 +110,7 @@ py_test( tags = ["exclusive", "pytorch", "team:ml"] ) +# Ray Serve py_test( name = "doc_code_metrics_example", size = "small", @@ -126,6 +127,14 @@ py_test( tags = ["exclusive", "post_wheel_build", "team:serve"] ) +py_test( + name = "doc_code_create_deployment_example", + size = "small", + main = "create_deployment.py", + srcs = ["//doc/source/serve/doc_code:serve_doc_code"], + tags = ["exclusive", "post_wheel_build", "team:serve"] +) + # -------------------------------------------------------------------- # Test all doc/source/tune/examples notebooks. # -------------------------------------------------------------------- diff --git a/doc/source/serve/_examples/doc_code/create_deployment.py b/doc/source/serve/_examples/doc_code/create_deployment.py new file mode 100644 index 000000000..b9c4d2ca0 --- /dev/null +++ b/doc/source/serve/_examples/doc_code/create_deployment.py @@ -0,0 +1,96 @@ +# flake8: noqa +# fmt: off +# +# __serve_example_begin__ +# +# This brief example shows how to create, deploy, and expose access to +# deployment models, using the simple Ray Serve deployment APIs. +# Once deployed, you can access deployment via two methods: +# ServerHandle API and HTTP +# +import os +from random import random + +import requests +import starlette +from starlette.requests import Request +import ray +from ray import serve + +# +# A simple example model stored in a pickled format at an accessible path +# that can be reloaded and deserialized into a model instance. Once deployed +# in Ray Serve, we can use it for prediction. The prediction is a fake condition, +# based on threshold of weight greater than 0.5. +# + + +class Model: + def __init__(self, path): + self.path = path + + def predict(self, data): + return random() + data if data > 0.5 else data + + +@serve.deployment +class Deployment: + # Take in a path to load your desired model + def __init__(self, path: str) -> None: + self.path = path + self.model = Model(path) + # Get the pid on which this deployment is running on + self.pid = os.getpid() + + # Deployments are callable. Here we simply return a prediction from + # our request + def __call__(self, starlette_request) -> str: + # Request came via an HTTP + if isinstance(starlette_request, starlette.requests.Request): + data = starlette_request.query_params['data'] + else: + # Request came via a ServerHandle API method call. + data = starlette_request + pred = self.model.predict(float(data)) + return f"(pid: {self.pid}); path: {self.path}; data: {float(data):.3f}; prediction: {pred:.3f}" + + +if __name__ == '__main__': + + # Start a Ray Serve instance. This will automatically start + # or connect to an existing Ray cluster. + serve.start() + + # Create two distinct deployments of the same class as + # two replicas. Associate each deployment with a unique 'name'. + # This name can be used as to fetch its respective serve handle. + # See code below for method 1. + Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl") + Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl") + + # Get the current list of deployments + print(serve.list_deployments()) + + print("ServerHandle API responses: " + "--" * 5) + + # Method 1) Access each deployment using the ServerHandle API + for _ in range(2): + for d_name in ["rep-1", "rep-2"]: + # Get handle to the each deployment and invoke its method. + # Which replica the request is dispatched to is determined + # by the Router actor. + handle = serve.get_deployment(d_name).get_handle() + print(f"handle name : {d_name}") + print(f"prediction : {ray.get(handle.remote(random()))}") + print("-" * 2) + + print("HTTP responses: " + "--" * 5) + + # Method 2) Access deployment via HTTP Request + for _ in range(2): + for d_name in ["rep-1", "rep-2"]: + # Send HTTP request along with data payload + url = f"http://127.0.0.1:8000/{d_name}" + print(f"handle name : {d_name}") + print(f"prediction : {requests.get(url, params= {'data': random()}).text}") +# __serve_example_end__ diff --git a/doc/source/serve/core-apis.rst b/doc/source/serve/core-apis.rst index bfc1c7fff..c049a1216 100644 --- a/doc/source/serve/core-apis.rst +++ b/doc/source/serve/core-apis.rst @@ -99,6 +99,50 @@ We can also query the deployment using the :mod:`ServeHandle ` +interface. This method allows you to access deployments within a Python script or code, making it convenient for a +Python developer. And the second is by using the HTTP request, allowing access to deployments via a web client application. + +Let's look at a simple end-to-end example using both ways to expose and access deployments. Your output may +vary due to random nature of how the prediction is computed; however, the example illustrates two things: +1) how to expose and use deployments and 2) how to use replicas, to which requests are sent. Note that each pid +is a separate replica associated with each deployment name, ``rep-1`` and ``rep-2`` respectively. + +.. literalinclude:: _examples/doc_code/create_deployment.py + :language: python + :start-after: __serve_example_begin__ + :end-before: __serve_example_end__ + +.. code-block:: python + + + # Output: + # {'rep-1': Deployment(name=rep-1,version=None,route_prefix=/rep-1), + # 'rep-2': Deployment(name=rep-2,version=None,route_prefix=/rep-2)} + # + # ServerHandle API responses: ---------- + # handle name : rep-1 + # prediction : (pid: 62636); path: /model/rep-1.pkl; data: 0.600; prediction: 1.292 + # -- + # handle name : rep-2 + # prediction : (pid: 62635); path: /model/rep-2.pkl; data: 0.075; prediction: 0.075 + # -- + # handle name : rep-1 + # prediction : (pid: 62634); path: /model/rep-1.pkl; data: 0.186; prediction: 0.186 + # -- + # handle name : rep-2 + # prediction : (pid: 62637); path: /model/rep-2.pkl; data: 0.751; prediction: 1.444 + # -- + # HTTP responses: ---------- + # handle name : rep-1 + # prediction : (pid: 62636); path: /model/rep-1.pkl; data: 0.582; prediction: 1.481 + # handle name : rep-2 + # prediction : (pid: 62637); path: /model/rep-2.pkl; data: 0.778; prediction: 1.678 + # handle name : rep-1 + # prediction : (pid: 62634); path: /model/rep-1.pkl; data: 0.139; prediction: 0.139 + # handle name : rep-2 + # prediction : (pid: 62635); path: /model/rep-2.pkl; data: 0.569; prediction: 1.262 + Updating a Deployment ===================== diff --git a/doc/source/serve/doc_code/BUILD b/doc/source/serve/doc_code/BUILD new file mode 100644 index 000000000..e08d72a7b --- /dev/null +++ b/doc/source/serve/doc_code/BUILD @@ -0,0 +1,9 @@ +py_library( + name = "serve_doc_code", + srcs = glob(["**/*.py"]), + visibility = [ + "//doc:__subpackages__", + "//python/ray/serve:__subpackages__", + "//python/ray/serve:__pkg__" + ], +) diff --git a/doc/source/serve/doc_code/create_deployment.py b/doc/source/serve/doc_code/create_deployment.py new file mode 100644 index 000000000..b9c4d2ca0 --- /dev/null +++ b/doc/source/serve/doc_code/create_deployment.py @@ -0,0 +1,96 @@ +# flake8: noqa +# fmt: off +# +# __serve_example_begin__ +# +# This brief example shows how to create, deploy, and expose access to +# deployment models, using the simple Ray Serve deployment APIs. +# Once deployed, you can access deployment via two methods: +# ServerHandle API and HTTP +# +import os +from random import random + +import requests +import starlette +from starlette.requests import Request +import ray +from ray import serve + +# +# A simple example model stored in a pickled format at an accessible path +# that can be reloaded and deserialized into a model instance. Once deployed +# in Ray Serve, we can use it for prediction. The prediction is a fake condition, +# based on threshold of weight greater than 0.5. +# + + +class Model: + def __init__(self, path): + self.path = path + + def predict(self, data): + return random() + data if data > 0.5 else data + + +@serve.deployment +class Deployment: + # Take in a path to load your desired model + def __init__(self, path: str) -> None: + self.path = path + self.model = Model(path) + # Get the pid on which this deployment is running on + self.pid = os.getpid() + + # Deployments are callable. Here we simply return a prediction from + # our request + def __call__(self, starlette_request) -> str: + # Request came via an HTTP + if isinstance(starlette_request, starlette.requests.Request): + data = starlette_request.query_params['data'] + else: + # Request came via a ServerHandle API method call. + data = starlette_request + pred = self.model.predict(float(data)) + return f"(pid: {self.pid}); path: {self.path}; data: {float(data):.3f}; prediction: {pred:.3f}" + + +if __name__ == '__main__': + + # Start a Ray Serve instance. This will automatically start + # or connect to an existing Ray cluster. + serve.start() + + # Create two distinct deployments of the same class as + # two replicas. Associate each deployment with a unique 'name'. + # This name can be used as to fetch its respective serve handle. + # See code below for method 1. + Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl") + Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl") + + # Get the current list of deployments + print(serve.list_deployments()) + + print("ServerHandle API responses: " + "--" * 5) + + # Method 1) Access each deployment using the ServerHandle API + for _ in range(2): + for d_name in ["rep-1", "rep-2"]: + # Get handle to the each deployment and invoke its method. + # Which replica the request is dispatched to is determined + # by the Router actor. + handle = serve.get_deployment(d_name).get_handle() + print(f"handle name : {d_name}") + print(f"prediction : {ray.get(handle.remote(random()))}") + print("-" * 2) + + print("HTTP responses: " + "--" * 5) + + # Method 2) Access deployment via HTTP Request + for _ in range(2): + for d_name in ["rep-1", "rep-2"]: + # Send HTTP request along with data payload + url = f"http://127.0.0.1:8000/{d_name}" + print(f"handle name : {d_name}") + print(f"prediction : {requests.get(url, params= {'data': random()}).text}") +# __serve_example_end__ diff --git a/doc/source/serve/doc_code/quick_start.py b/doc/source/serve/doc_code/quick_start.py new file mode 100644 index 000000000..bba109b5f --- /dev/null +++ b/doc/source/serve/doc_code/quick_start.py @@ -0,0 +1,43 @@ +# flake8: noqa +# fmt: off + +# __serve_example_begin__ +import requests + +from sklearn.datasets import load_iris +from sklearn.ensemble import GradientBoostingClassifier + +from ray import serve + +serve.start() + +# Train model. +iris_dataset = load_iris() +model = GradientBoostingClassifier() +model.fit(iris_dataset["data"], iris_dataset["target"]) + + +@serve.deployment(route_prefix="/iris") +class BoostingModel: + def __init__(self, model): + self.model = model + self.label_list = iris_dataset["target_names"].tolist() + + async def __call__(self, request): + payload = (await request.json())["vector"] + print(f"Received http request with data {payload}") + + prediction = self.model.predict([payload])[0] + human_name = self.label_list[prediction] + return {"result": human_name} + + +# Deploy model. +BoostingModel.deploy(model) + +# Query it! +sample_request_input = {"vector": [1.2, 1.0, 1.1, 0.9]} +response = requests.get( + "http://localhost:8000/iris", json=sample_request_input) +print(response.text) +# __serve_example_end__ diff --git a/python/ray/serve/BUILD b/python/ray/serve/BUILD index d018ab0df..7c9ce8dcb 100644 --- a/python/ray/serve/BUILD +++ b/python/ray/serve/BUILD @@ -397,7 +397,7 @@ py_test( name = "tutorial_sklearn_ray_overview", size = "small", main = "quick_start.py", - srcs = ["//doc/source/serve/_examples/doc_code:serve_doc_code"], + srcs = ["//doc/source/serve/doc_code:serve_doc_code"], tags = ["exclusive", "team:serve"], )