[Serve] Cleanup examples and tests (#16042)

2025-03-06 02:21:39 -05:00 · 2021-05-25 15:32:36 -07:00 · 2021-05-25 15:32:36 -07:00 · 2aee4ac40d
commit 2aee4ac40d
parent ec8b591f32
11 changed files with 11 additions and 468 deletions
--- a/ci/travis/ci.sh
+++ b/ci/travis/ci.sh
@ -143,6 +143,7 @@ test_python() {
      -python/ray/serve:test_router # timeout
      -python/ray/serve:test_handle # "fatal error" (?) https://github.com/ray-project/ray/pull/13695
      -python/ray/serve:test_backend_worker # memory error
      -python/ray/serve:test_controller_crashes # timeout
      -python/ray/tests:test_actor_advanced # timeout
      -python/ray/tests:test_actor_failures # flaky
      -python/ray/tests:test_advanced_2
--- a/python/ray/serve/BUILD
+++ b/python/ray/serve/BUILD
@ -5,9 +5,7 @@ py_library(
    srcs = glob(["**/*.py"], exclude=["tests/*.py"]),
 )
-serve_tests_srcs = glob(["tests/*.py"],
+serve_tests_srcs = glob(["tests/*.py"])
     exclude=["tests/test_serve.py",
             ])
 py_test(
    name = "test_api",
@ -225,30 +223,22 @@ py_test(
 py_test(
    name = "test_fastapi",
-    size = "small",
+    size = "medium",
    srcs = serve_tests_srcs,
    tags = ["exclusive"],
    deps = [":serve_lib"],
 )
 # Runs test_api and test_failure with injected failures in the controller.
 # TODO(simon): Tests are disabled until #11683 is fixed.
 # py_test(
 #     name = "test_controller_crashes",
 #     size = "large",
 #     srcs = glob(["tests/test_controller_crashes.py",
 #                 "tests/test_api.py",
 #                 "tests/test_failure.py",
 #                 "**/conftest.py"],
 #                 exclude=["tests/test_serve.py"]),
 # )
 py_test(
-    name = "echo_full",
+   name = "test_controller_crashes",
-    size = "small",
+   size = "medium",
-    srcs = glob(["examples/*.py"]),
+   srcs = glob(["tests/test_controller_crashes.py",
-    tags = ["exclusive"],
+               "tests/test_api.py",
-    deps = [":serve_lib"]
+               "tests/test_failure.py",
               "**/conftest.py"]),
   tags = ["exclusive"],
   deps = [":serve_lib"],
 )
 # Make sure the example showing in doc is tested
--- a/python/ray/serve/examples/echo.py
+++ b/python/ray/serve/examples/echo.py
@ -1,25 +0,0 @@
 """
 Example service that prints out http context.
 """
 import time
 import requests
 from ray import serve
 def echo(starlette_request):
    return ["hello " + starlette_request.query_params.get("name", "serve!")]
 serve.start()
 serve.create_backend("echo:v1", echo)
 serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 while True:
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(resp)
    print("...Sleeping for 2 seconds...")
    time.sleep(2)
--- a/python/ray/serve/examples/echo_actor.py
+++ b/python/ray/serve/examples/echo_actor.py
@ -1,58 +0,0 @@
 """
 Example actor that adds an increment to a number. This number can
 come from either web (parsing Starlette request) or python call.
 This actor can be called from HTTP as well as from Python.
 """
 import json
 import time
 from pygments import formatters, highlight, lexers
 import requests
 import ray
 from ray import serve
 def pformat_color_json(d):
    """Use pygments to pretty format and colorize dictionary"""
    formatted_json = json.dumps(d, sort_keys=True, indent=4)
    colorful_json = highlight(formatted_json, lexers.JsonLexer(),
                              formatters.TerminalFormatter())
    return colorful_json
 class MagicCounter:
    def __init__(self, increment):
        self.increment = increment
    def __call__(self, starlette_request, base_number=None):
        if serve.context.web:
            base_number = int(
                starlette_request.query_params.get("base_number", "0"))
        return base_number + self.increment
 serve.start()
 serve.create_backend("counter:v1", MagicCounter, 42)  # increment=42
 serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")
 print("Sending ten queries via HTTP")
 for i in range(10):
    url = "http://127.0.0.1:8000/counter?base_number={}".format(i)
    print("> Pinging {}".format(url))
    resp = requests.get(url).json()
    print(pformat_color_json(resp))
    time.sleep(0.2)
 print("Sending ten queries via Python")
 handle = serve.get_handle("magic_counter")
 for i in range(10):
    print("> Pinging handle.remote(base_number={})".format(i))
    result = ray.get(handle.remote(base_number=i))
    print("< Result {}".format(result))
--- a/python/ray/serve/examples/echo_actor_batch.py
+++ b/python/ray/serve/examples/echo_actor_batch.py
@ -1,66 +0,0 @@
 """
 Example actor that adds an increment to a number. This number can
 come from either web (parsing Starlette request) or python call.
 The queries incoming to this actor are batched.
 This actor can be called from HTTP as well as from Python.
 """
 import json
 import time
 from pygments import formatters, highlight, lexers
 import requests
 import ray
 from ray import serve
 def pformat_color_json(d):
    """Use pygments to pretty format and colorize dictionary"""
    formatted_json = json.dumps(d, sort_keys=True, indent=4)
    colorful_json = highlight(formatted_json, lexers.JsonLexer(),
                              formatters.TerminalFormatter())
    return colorful_json
 class MagicCounter:
    def __init__(self, increment):
        self.increment = increment
    @serve.batch(max_batch_size=5)
    async def handle_batch(self, base_number):
        result = []
        for b in base_number:
            ans = b + self.increment
            result.append(ans)
        return result
    async def __call__(self, request, base_number=None):
        # batch_size = serve.context.batch_size
        if serve.context.web:
            base_number = int(request.query_params.get("base_number", "0"))
        return await self.handle_batch(base_number)
 serve.start()
 serve.create_backend("counter:v1", MagicCounter, 42)  # increment=42
 serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")
 print("Sending ten queries via HTTP")
 for i in range(10):
    url = "http://127.0.0.1:8000/counter?base_number={}".format(i)
    print("> Pinging {}".format(url))
    resp = requests.get(url).json()
    print(pformat_color_json(resp))
    time.sleep(0.2)
 print("Sending ten queries via Python")
 handle = serve.get_handle("magic_counter")
 for i in range(10):
    print("> Pinging handle.remote(base_number={})".format(i))
    result = ray.get(handle.remote(base_number=i))
    print("< Result {}".format(result))
--- a/python/ray/serve/examples/echo_batching.py
+++ b/python/ray/serve/examples/echo_batching.py
@ -1,47 +0,0 @@
 """
 This example has backend which has batching functionality enabled.
 """
 import ray
 from ray import serve
 class MagicCounter:
    def __init__(self, increment):
        self.increment = increment
    @serve.batch(max_batch_size=5)
    async def handle_batch(self, base_numbers):
        # Must preserve the batch size.
        result = []
        for base_num in base_numbers:
            ret_str = "Number: {} Batch size: {}".format(
                base_num, len(base_numbers))
            result.append(ret_str)
        return result
    async def __call__(self, starlette_request, base_number=None):
        return await self.handle_batch(base_number)
 serve.start()
 serve.create_backend("counter:v1", MagicCounter, 42)  # increment=42
 serve.create_endpoint("magic_counter", backend="counter:v1", route="/counter")
 handle = serve.get_handle("magic_counter")
 future_list = []
 # fire 30 requests
 for r in range(30):
    print("> [REMOTE] Pinging handle.remote(base_number={})".format(r))
    f = handle.remote(base_number=r)
    future_list.append(f)
 # get results of queries as they complete
 left_futures = future_list
 while left_futures:
    completed_futures, remaining_futures = ray.wait(left_futures, timeout=0.05)
    if len(completed_futures) > 0:
        result = ray.get(completed_futures[0])
        print("< " + result)
    left_futures = remaining_futures
--- a/python/ray/serve/examples/echo_error.py
+++ b/python/ray/serve/examples/echo_error.py
@ -1,54 +0,0 @@
 """
 Example of error handling mechanism in ray serve.
 We are going to define a buggy function that raise some exception:
 >>> def echo(_):
        raise Exception("oh no")
 The expected behavior is:
 - HTTP server should respond with "internal error" in the response JSON
 - ray.get(handle.remote()) should raise RayTaskError with traceback.
 This shows that error is hidden from HTTP side but always visible when calling
 from Python.
 """
 import json
 import time
 from pygments import formatters, highlight, lexers
 import requests
 import ray
 from ray import serve
 def pformat_color_json(d):
    """Use pygments to pretty format and colorize dictionary"""
    formatted_json = json.dumps(d, sort_keys=True, indent=4)
    colorful_json = highlight(formatted_json, lexers.JsonLexer(),
                              formatters.TerminalFormatter())
    return colorful_json
 def echo(_):
    raise Exception("Something went wrong...")
 serve.start()
 serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 for _ in range(2):
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))
    print("...Sleeping for 2 seconds...")
    time.sleep(2)
 handle = serve.get_handle("my_endpoint")
 print("Invoke from python will raise exception with traceback:")
 ray.get(handle.remote())
--- a/python/ray/serve/examples/echo_full.py
+++ b/python/ray/serve/examples/echo_full.py
@ -1,52 +0,0 @@
 import time
 import requests
 import ray
 import ray.serve as serve
 # initialize ray serve system.
 ray.init(num_cpus=10)
 serve.start()
 # a backend can be a function or class.
 # it can be made to be invoked from web as well as python.
 def echo_v1(starlette_request):
    response = starlette_request.query_params.get("response", "web")
    return response
 serve.create_backend("echo:v1", echo_v1)
 # An endpoint is associated with an HTTP path and traffic to the endpoint
 # will be serviced by the echo:v1 backend.
 serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 print(requests.get("http://127.0.0.1:8000/echo", timeout=0.5).text)
 # The service will be reachable from http
 print(ray.get(serve.get_handle("my_endpoint").remote(response="hello")))
 # as well as within the ray system.
 # We can also add a new backend and split the traffic.
 def echo_v2(starlette_request):
    # magic, only from web.
    return "something new"
 serve.create_backend("echo:v2", echo_v2)
 # The two backend will now split the traffic 50%-50%.
 serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
 # Observe requests are now split between two backends.
 for _ in range(10):
    print(requests.get("http://127.0.0.1:8000/echo").text)
    time.sleep(0.2)
 # You can also change number of replicas for each backend independently.
 serve.update_backend_config("echo:v1", {"num_replicas": 2})
 serve.update_backend_config("echo:v2", {"num_replicas": 2})
--- a/python/ray/serve/examples/echo_pipeline.py
+++ b/python/ray/serve/examples/echo_pipeline.py
@ -1,79 +0,0 @@
 """
 Ray serve pipeline example
 """
 import ray
 import ray.serve as serve
 import time
 # Initialize ray serve instance.
 serve.start()
 # A backend can be a function or class.
 # It can be made to be invoked via HTTP as well as python.
 def echo_v1(_, response="hello from python!"):
    return f"echo_v1({response})"
 serve.create_backend("echo_v1", echo_v1)
 serve.create_endpoint("echo_v1", backend="echo_v1", route="/echo_v1")
 def echo_v2(_, relay=""):
    return f"echo_v2({relay})"
 serve.create_backend("echo_v2", echo_v2)
 serve.create_endpoint("echo_v2", backend="echo_v2", route="/echo_v2")
 def echo_v3(_, relay=""):
    return f"echo_v3({relay})"
 serve.create_backend("echo_v3", echo_v3)
 serve.create_endpoint("echo_v3", backend="echo_v3", route="/echo_v3")
 def echo_v4(_, relay1="", relay2=""):
    return f"echo_v4({relay1} , {relay2})"
 serve.create_backend("echo_v4", echo_v4)
 serve.create_endpoint("echo_v4", backend="echo_v4", route="/echo_v4")
 """
 The pipeline created is as follows -
            "my_endpoint1"
                  /\
                 /  \
                /    \
               /      \
              /        \
             /          \
  "my_endpoint2"     "my_endpoint3"
            \            /
             \          /
              \        /
               \      /
                \    /
                 \  /
                  \/
            "my_endpoint4"
 """
 # get the handle of the endpoints
 handle1 = serve.get_handle("echo_v1")
 handle2 = serve.get_handle("echo_v2")
 handle3 = serve.get_handle("echo_v3")
 handle4 = serve.get_handle("echo_v4")
 start = time.time()
 print("Start firing to the pipeline: {} s".format(time.time()))
 handle1_oid = handle1.remote(response="hello")
 handle4_oid = handle4.remote(
    relay1=handle2.remote(relay=handle1_oid),
    relay2=handle3.remote(relay=handle1_oid))
 print("Firing ended now waiting for the result,"
      "time taken: {} s".format(time.time() - start))
 result = ray.get(handle4_oid)
 print("Result: {}, time taken: {} s".format(result, time.time() - start))
--- a/python/ray/serve/examples/echo_split.py
+++ b/python/ray/serve/examples/echo_split.py
@ -1,52 +0,0 @@
 """
 Example of traffic splitting. We will first use echo:v1. Then v1 and v2
 will split the incoming traffic evenly.
 """
 import json
 import time
 from pygments import formatters, highlight, lexers
 import requests
 from ray import serve
 def pformat_color_json(d):
    """Use pygments to pretty format and colorize dictionary"""
    formatted_json = json.dumps(d, sort_keys=True, indent=4)
    colorful_json = highlight(formatted_json, lexers.JsonLexer(),
                              formatters.TerminalFormatter())
    return colorful_json
 def echo_v1(_):
    return "v1"
 def echo_v2(_):
    return "v2"
 serve.start()
 serve.create_backend("echo:v1", echo_v1)
 serve.create_endpoint("my_endpoint", backend="echo:v1", route="/echo")
 for _ in range(3):
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))
    print("...Sleeping for 2 seconds...")
    time.sleep(2)
 serve.create_backend("echo:v2", echo_v2)
 serve.set_traffic("my_endpoint", {"echo:v1": 0.5, "echo:v2": 0.5})
 while True:
    resp = requests.get("http://127.0.0.1:8000/echo").json()
    print(pformat_color_json(resp))
    print("...Sleeping for 2 seconds...")
    time.sleep(2)
--- a/python/ray/serve/tests/test_serve.py
+++ b/python/ray/serve/tests/test_serve.py
@ -1,15 +0,0 @@
 import pytest
 from pathlib import Path
 import sys
 if __name__ == "__main__":
    curr_dir = Path(__file__).parent
    test_paths = curr_dir.rglob("test_*.py")
    sorted_path = sorted(map(lambda path: str(path.absolute()), test_paths))
    serve_tests_files = list(sorted_path)
    print("Testing the following files")
    for test_file in serve_tests_files:
        print("->", test_file.split("/")[-1])
    sys.exit(pytest.main(["-v", "-s"] + serve_tests_files))