From 0246f3532e2499d603f7797ce00e5e04060499af Mon Sep 17 00:00:00 2001
From: "Jules S. Damji" <dmatrix@comcast.net>
Date: Mon, 14 Mar 2022 19:15:52 -0700
Subject: [PATCH] [DOC] Added a full example how to access deployments 
 (#22401)

---
 doc/BUILD                                     |  9 ++
 .../_examples/doc_code/create_deployment.py   | 96 +++++++++++++++++++
 doc/source/serve/core-apis.rst                | 44 +++++++++
 doc/source/serve/doc_code/BUILD               |  9 ++
 .../serve/doc_code/create_deployment.py       | 96 +++++++++++++++++++
 doc/source/serve/doc_code/quick_start.py      | 43 +++++++++
 python/ray/serve/BUILD                        |  2 +-
 7 files changed, 298 insertions(+), 1 deletion(-)
 create mode 100644 doc/source/serve/_examples/doc_code/create_deployment.py
 create mode 100644 doc/source/serve/doc_code/BUILD
 create mode 100644 doc/source/serve/doc_code/create_deployment.py
 create mode 100644 doc/source/serve/doc_code/quick_start.py

diff --git a/doc/BUILD b/doc/BUILD
index b02c0cfd9..d8b56df95 100644
--- a/doc/BUILD
+++ b/doc/BUILD
@@ -110,6 +110,7 @@ py_test(
     tags = ["exclusive", "pytorch", "team:ml"]
 )
 
+# Ray Serve
 py_test(
     name = "doc_code_metrics_example",
     size = "small",
@@ -126,6 +127,14 @@ py_test(
     tags = ["exclusive", "post_wheel_build", "team:serve"]
 )
 
+py_test(
+    name = "doc_code_create_deployment_example",
+    size = "small",
+    main = "create_deployment.py",
+    srcs = ["//doc/source/serve/doc_code:serve_doc_code"],
+    tags = ["exclusive", "post_wheel_build", "team:serve"]
+)
+
 # --------------------------------------------------------------------
 # Test all doc/source/tune/examples notebooks.
 # --------------------------------------------------------------------
diff --git a/doc/source/serve/_examples/doc_code/create_deployment.py b/doc/source/serve/_examples/doc_code/create_deployment.py
new file mode 100644
index 000000000..b9c4d2ca0
--- /dev/null
+++ b/doc/source/serve/_examples/doc_code/create_deployment.py
@@ -0,0 +1,96 @@
+# flake8: noqa
+# fmt: off
+#
+# __serve_example_begin__
+#
+# This brief example shows how to create, deploy, and expose access to
+# deployment models, using the simple Ray Serve deployment APIs.
+# Once deployed, you can access deployment via two methods:
+# ServerHandle API and HTTP
+#
+import os
+from random import random
+
+import requests
+import starlette
+from starlette.requests import Request
+import ray
+from ray import serve
+
+#
+# A simple example model stored in a pickled format at an accessible path
+# that can be reloaded and deserialized into a model instance. Once deployed
+# in Ray Serve, we can use it for prediction. The prediction is a fake condition,
+# based on threshold of weight greater than 0.5.
+#
+
+
+class Model:
+    def __init__(self, path):
+        self.path = path
+
+    def predict(self, data):
+        return random() + data if data > 0.5 else data
+
+
+@serve.deployment
+class Deployment:
+    # Take in a path to load your desired model
+    def __init__(self, path: str) -> None:
+        self.path = path
+        self.model = Model(path)
+        # Get the pid on which this deployment is running on
+        self.pid = os.getpid()
+
+    # Deployments are callable. Here we simply return a prediction from
+    # our request
+    def __call__(self, starlette_request) -> str:
+        # Request came via an HTTP
+        if isinstance(starlette_request, starlette.requests.Request):
+            data = starlette_request.query_params['data']
+        else:
+            # Request came via a ServerHandle API method call.
+            data = starlette_request
+        pred = self.model.predict(float(data))
+        return f"(pid: {self.pid}); path: {self.path}; data: {float(data):.3f}; prediction: {pred:.3f}"
+
+
+if __name__ == '__main__':
+
+    # Start a Ray Serve instance. This will automatically start
+    # or connect to an existing Ray cluster.
+    serve.start()
+
+    # Create two distinct deployments of the same class as
+    # two replicas. Associate each deployment with a unique 'name'.
+    # This name can be used as to fetch its respective serve handle.
+    # See code below for method 1.
+    Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl")
+    Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl")
+
+    # Get the current list of deployments
+    print(serve.list_deployments())
+
+    print("ServerHandle API responses: " + "--" * 5)
+
+    # Method 1) Access each deployment using the ServerHandle API
+    for _ in range(2):
+        for d_name in ["rep-1", "rep-2"]:
+            # Get handle to the each deployment and invoke its method.
+            # Which replica the request is dispatched to is determined
+            # by the Router actor.
+            handle = serve.get_deployment(d_name).get_handle()
+            print(f"handle name : {d_name}")
+            print(f"prediction  : {ray.get(handle.remote(random()))}")
+            print("-" * 2)
+
+    print("HTTP responses: " + "--" * 5)
+
+    # Method 2) Access deployment via HTTP Request
+    for _ in range(2):
+        for d_name in ["rep-1", "rep-2"]:
+            # Send HTTP request along with data payload
+            url = f"http://127.0.0.1:8000/{d_name}"
+            print(f"handle name : {d_name}")
+            print(f"prediction  : {requests.get(url, params= {'data': random()}).text}")
+# __serve_example_end__
diff --git a/doc/source/serve/core-apis.rst b/doc/source/serve/core-apis.rst
index bfc1c7fff..c049a1216 100644
--- a/doc/source/serve/core-apis.rst
+++ b/doc/source/serve/core-apis.rst
@@ -99,6 +99,50 @@ We can also query the deployment using the :mod:`ServeHandle <ray.serve.handle.R
 
   print(ray.get(handle.remote()))
 
+As noted above, there are two ways to expose deployments. The first is by using the :mod:`ServeHandle <ray.serve.handle.RayServeHandle>`
+interface. This method allows you to access deployments within a Python script or code, making it convenient for a
+Python developer. And the second is by using the HTTP request, allowing access to deployments via a web client application.
+
+Let's look at a simple end-to-end example using both ways to expose and access deployments. Your output may
+vary due to random nature of how the prediction is computed; however, the example illustrates two things:
+1) how to expose and use deployments and 2) how to use replicas, to which requests are sent. Note that each pid
+is a separate replica associated with each deployment name, ``rep-1`` and ``rep-2`` respectively.
+
+.. literalinclude:: _examples/doc_code/create_deployment.py
+    :language: python
+    :start-after: __serve_example_begin__
+    :end-before:  __serve_example_end__
+
+.. code-block:: python
+
+
+    # Output:
+    # {'rep-1': Deployment(name=rep-1,version=None,route_prefix=/rep-1),
+    # 'rep-2': Deployment(name=rep-2,version=None,route_prefix=/rep-2)}
+    #
+    # ServerHandle API responses: ----------
+    # handle name : rep-1
+    # prediction  : (pid: 62636); path: /model/rep-1.pkl; data: 0.600; prediction: 1.292
+    # --
+    # handle name : rep-2
+    # prediction  : (pid: 62635); path: /model/rep-2.pkl; data: 0.075; prediction: 0.075
+    # --
+    # handle name : rep-1
+    # prediction  : (pid: 62634); path: /model/rep-1.pkl; data: 0.186; prediction: 0.186
+    # --
+    # handle name : rep-2
+    # prediction  : (pid: 62637); path: /model/rep-2.pkl; data: 0.751; prediction: 1.444
+    # --
+    # HTTP responses: ----------
+    # handle name : rep-1
+    # prediction  : (pid: 62636); path: /model/rep-1.pkl; data: 0.582; prediction: 1.481
+    # handle name : rep-2
+    # prediction  : (pid: 62637); path: /model/rep-2.pkl; data: 0.778; prediction: 1.678
+    # handle name : rep-1
+    # prediction  : (pid: 62634); path: /model/rep-1.pkl; data: 0.139; prediction: 0.139
+    # handle name : rep-2
+    # prediction  : (pid: 62635); path: /model/rep-2.pkl; data: 0.569; prediction: 1.262
+
 Updating a Deployment
 =====================
 
diff --git a/doc/source/serve/doc_code/BUILD b/doc/source/serve/doc_code/BUILD
new file mode 100644
index 000000000..e08d72a7b
--- /dev/null
+++ b/doc/source/serve/doc_code/BUILD
@@ -0,0 +1,9 @@
+py_library(
+    name = "serve_doc_code",
+    srcs = glob(["**/*.py"]),
+    visibility = [
+        "//doc:__subpackages__",
+        "//python/ray/serve:__subpackages__",
+        "//python/ray/serve:__pkg__"
+    ],
+)
diff --git a/doc/source/serve/doc_code/create_deployment.py b/doc/source/serve/doc_code/create_deployment.py
new file mode 100644
index 000000000..b9c4d2ca0
--- /dev/null
+++ b/doc/source/serve/doc_code/create_deployment.py
@@ -0,0 +1,96 @@
+# flake8: noqa
+# fmt: off
+#
+# __serve_example_begin__
+#
+# This brief example shows how to create, deploy, and expose access to
+# deployment models, using the simple Ray Serve deployment APIs.
+# Once deployed, you can access deployment via two methods:
+# ServerHandle API and HTTP
+#
+import os
+from random import random
+
+import requests
+import starlette
+from starlette.requests import Request
+import ray
+from ray import serve
+
+#
+# A simple example model stored in a pickled format at an accessible path
+# that can be reloaded and deserialized into a model instance. Once deployed
+# in Ray Serve, we can use it for prediction. The prediction is a fake condition,
+# based on threshold of weight greater than 0.5.
+#
+
+
+class Model:
+    def __init__(self, path):
+        self.path = path
+
+    def predict(self, data):
+        return random() + data if data > 0.5 else data
+
+
+@serve.deployment
+class Deployment:
+    # Take in a path to load your desired model
+    def __init__(self, path: str) -> None:
+        self.path = path
+        self.model = Model(path)
+        # Get the pid on which this deployment is running on
+        self.pid = os.getpid()
+
+    # Deployments are callable. Here we simply return a prediction from
+    # our request
+    def __call__(self, starlette_request) -> str:
+        # Request came via an HTTP
+        if isinstance(starlette_request, starlette.requests.Request):
+            data = starlette_request.query_params['data']
+        else:
+            # Request came via a ServerHandle API method call.
+            data = starlette_request
+        pred = self.model.predict(float(data))
+        return f"(pid: {self.pid}); path: {self.path}; data: {float(data):.3f}; prediction: {pred:.3f}"
+
+
+if __name__ == '__main__':
+
+    # Start a Ray Serve instance. This will automatically start
+    # or connect to an existing Ray cluster.
+    serve.start()
+
+    # Create two distinct deployments of the same class as
+    # two replicas. Associate each deployment with a unique 'name'.
+    # This name can be used as to fetch its respective serve handle.
+    # See code below for method 1.
+    Deployment.options(name="rep-1", num_replicas=2).deploy("/model/rep-1.pkl")
+    Deployment.options(name="rep-2", num_replicas=2).deploy("/model/rep-2.pkl")
+
+    # Get the current list of deployments
+    print(serve.list_deployments())
+
+    print("ServerHandle API responses: " + "--" * 5)
+
+    # Method 1) Access each deployment using the ServerHandle API
+    for _ in range(2):
+        for d_name in ["rep-1", "rep-2"]:
+            # Get handle to the each deployment and invoke its method.
+            # Which replica the request is dispatched to is determined
+            # by the Router actor.
+            handle = serve.get_deployment(d_name).get_handle()
+            print(f"handle name : {d_name}")
+            print(f"prediction  : {ray.get(handle.remote(random()))}")
+            print("-" * 2)
+
+    print("HTTP responses: " + "--" * 5)
+
+    # Method 2) Access deployment via HTTP Request
+    for _ in range(2):
+        for d_name in ["rep-1", "rep-2"]:
+            # Send HTTP request along with data payload
+            url = f"http://127.0.0.1:8000/{d_name}"
+            print(f"handle name : {d_name}")
+            print(f"prediction  : {requests.get(url, params= {'data': random()}).text}")
+# __serve_example_end__
diff --git a/doc/source/serve/doc_code/quick_start.py b/doc/source/serve/doc_code/quick_start.py
new file mode 100644
index 000000000..bba109b5f
--- /dev/null
+++ b/doc/source/serve/doc_code/quick_start.py
@@ -0,0 +1,43 @@
+# flake8: noqa
+# fmt: off
+
+# __serve_example_begin__
+import requests
+
+from sklearn.datasets import load_iris
+from sklearn.ensemble import GradientBoostingClassifier
+
+from ray import serve
+
+serve.start()
+
+# Train model.
+iris_dataset = load_iris()
+model = GradientBoostingClassifier()
+model.fit(iris_dataset["data"], iris_dataset["target"])
+
+
+@serve.deployment(route_prefix="/iris")
+class BoostingModel:
+    def __init__(self, model):
+        self.model = model
+        self.label_list = iris_dataset["target_names"].tolist()
+
+    async def __call__(self, request):
+        payload = (await request.json())["vector"]
+        print(f"Received http request with data {payload}")
+
+        prediction = self.model.predict([payload])[0]
+        human_name = self.label_list[prediction]
+        return {"result": human_name}
+
+
+# Deploy model.
+BoostingModel.deploy(model)
+
+# Query it!
+sample_request_input = {"vector": [1.2, 1.0, 1.1, 0.9]}
+response = requests.get(
+    "http://localhost:8000/iris", json=sample_request_input)
+print(response.text)
+# __serve_example_end__
diff --git a/python/ray/serve/BUILD b/python/ray/serve/BUILD
index d018ab0df..7c9ce8dcb 100644
--- a/python/ray/serve/BUILD
+++ b/python/ray/serve/BUILD
@@ -397,7 +397,7 @@ py_test(
     name = "tutorial_sklearn_ray_overview",
     size = "small",
     main = "quick_start.py",
-    srcs = ["//doc/source/serve/_examples/doc_code:serve_doc_code"],
+    srcs = ["//doc/source/serve/doc_code:serve_doc_code"],
     tags = ["exclusive", "team:serve"],
 )