[Serve] Fix Serve Release Tests (#12777)

2025-03-06 02:21:39 -05:00 · 2020-12-11 11:53:47 -08:00 · 2020-12-11 11:53:47 -08:00 · 3d8c1cbae6
commit 3d8c1cbae6
parent 4ad4463be6
8 changed files with 80 additions and 99 deletions
--- a/python/ray/serve/benchmarks/cluster.yaml
+++ b/python/ray/serve/benchmarks/cluster.yaml
@ -1,7 +1,7 @@
 cluster_name: default
-min_workers: 22
-max_workers: 22
-initial_workers: 22
+min_workers: 5
+max_workers: 5
+initial_workers: 5
 autoscaling_mode: default
 docker:
    image: 'anyscale/ray-ml:latest'
@ -28,6 +28,7 @@ initialization_commands: []
 setup_commands:
    - apt-get install build-essential libssl-dev git -y
    - 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && cp wrk /usr/local/bin'
+    - ray install-nightly
 head_setup_commands: []
 worker_setup_commands: []
 head_start_ray_commands:
--- a/python/ray/serve/benchmarks/microbenchmark.py
+++ b/python/ray/serve/benchmarks/microbenchmark.py
@ -86,13 +86,14 @@ async def main():
    client.create_backend("backend", backend)
    client.create_endpoint("endpoint", backend="backend", route="/api")
    for intermediate_handles in [False, True]:
-        if (intermediate_handles):
+        if intermediate_handles:

            client.create_endpoint(
                "backend", backend="backend", route="/backend")

            class forwardActor:
                def __init__(self):
+                    client = serve.connect()
                    self.handle = client.get_handle("backend")

                def __call__(self, _):
--- a/python/ray/serve/benchmarks/scalability.py
+++ b/python/ray/serve/benchmarks/scalability.py
@ -36,73 +36,76 @@ from ray import serve
 from ray.serve import BackendConfig
 from ray.serve.utils import logger

-from ray.util.placement_group import (placement_group, remove_placement_group)
+from ray.util.placement_group import placement_group, remove_placement_group

 ray.shutdown()
 ray.init(address="auto")
-client = serve.start()

-# These numbers need to correspond with the autoscaler config file.
-# The number of remote nodes in the autoscaler should upper bound
-# these because sometimes nodes fail to update.
-num_workers = 20
-expected_num_nodes = num_workers + 1
-cpus_per_node = 4
-num_remote_cpus = expected_num_nodes * cpus_per_node
+# We ask for more worker but only need to run on smaller subset.
+# This should account for worker nodes failed to launch.
+expected_num_nodes = 6
+num_replicas = 11
+# wrk HTTP load testing config
+num_connections = 20
+num_threads = 2
+time_to_run = "20s"

 # Wait until the expected number of nodes have joined the cluster.
 while True:
-    num_nodes = len(ray.nodes())
+    num_nodes = len(list(filter(lambda node: node["Alive"], ray.nodes())))
    logger.info("Waiting for nodes {}/{}".format(num_nodes,
                                                 expected_num_nodes))
    if num_nodes >= expected_num_nodes:
        break
    time.sleep(5)
+
 logger.info("Nodes have all joined. There are %s resources.",
            ray.cluster_resources())

+client = serve.start()
+

 def hey(_):
    time.sleep(0.01)  # Sleep for 10ms
    return b"hey"


-num_connections = int(num_remote_cpus * 0.75)
-num_threads = 2
-time_to_run = "10s"
-
 pg = placement_group(
    [{
        "CPU": 1
    } for _ in range(expected_num_nodes)], strategy="STRICT_SPREAD")
 ray.get(pg.ready())

-# The number of replicas is the number of cores remaining after accounting
-# for the one HTTP proxy actor on each node, the "hey" requester task on each
-# node, and the serve controller.
-# num_replicas = expected_num_nodes * (cpus_per_node - 2) - 1
-num_replicas = ray.available_resources()["CPU"]
 logger.info("Starting %i replicas", num_replicas)
 client.create_backend(
    "hey", hey, config=BackendConfig(num_replicas=num_replicas))
 client.create_endpoint("hey", backend="hey", route="/hey")


-@ray.remote
+@ray.remote(num_cpus=0)
 def run_wrk():
-    logger.info("Warming up for ~3 seconds")
-    for _ in range(5):
-        resp = requests.get("http://127.0.0.1:8000/hey").text
-        logger.info("Received response \'" + resp + "\'")
-        time.sleep(0.5)
+    logger.info("Warming up")
+    for _ in range(10):
+        try:
+            resp = requests.get("http://127.0.0.1:8000/hey").text
+            logger.info("Received response '" + resp + "'")
+            time.sleep(0.5)
+        except Exception as e:
+            logger.info(f"Got exception {e}")

    result = subprocess.run(
        [
-            "wrk", "-c",
-            str(num_connections), "-t",
-            str(num_threads), "-d", time_to_run, "http://127.0.0.1:8000/hey"
+            "wrk",
+            "-c",
+            str(num_connections),
+            "-t",
+            str(num_threads),
+            "-d",
+            time_to_run,
+            "http://127.0.0.1:8000/hey",
        ],
-        stdout=subprocess.PIPE)
+        stdout=subprocess.PIPE,
+    )
    return result.stdout.decode()


--- a/python/ray/serve/benchmarks/single.yaml
+++ b/python/ray/serve/benchmarks/single.yaml
@ -23,6 +23,7 @@ initialization_commands: []
 setup_commands:
    - apt-get install build-essential libssl-dev git -y
    - 'rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && cp wrk /usr/local/bin'
+    - ray install-nightly
 head_setup_commands: []
 worker_setup_commands: []
 head_start_ray_commands:
--- a/release/long_running_tests/cluster.yaml
+++ b/release/long_running_tests/cluster.yaml
@ -1,48 +1,17 @@
-cluster_name: default
-min_workers: 0
-max_workers: 0
-target_utilization_fraction: 0.8
-idle_timeout_minutes: 5
+cluster_name: ray-release-long-running-tests
+
+docker:
+    image: anyscale/ray:latest
+    container_name: ray_container
+    pull_before_run: False

-# Cloud-provider specific configuration.
 provider:
    type: aws
    region: us-west-2
-    availability_zone: us-west-2a
+    availability_zone: us-west-2a, us-west-2b, us-west-2c
+
 auth:
    ssh_user: ubuntu

 head_node:
-    InstanceType: m5.2xlarge
-    ImageId: ami-0888a3b5189309429  # DLAMI 7/1/19
-    BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 150
-
-worker_nodes:
-    InstanceType: m5.large
-    ImageId: ami-0888a3b5189309429  # DLAMI 7/1/19
-    BlockDeviceMappings:
-        - DeviceName: /dev/sda1
-          Ebs:
-              VolumeSize: 150
-
-    # Run workers on spot by default. Comment this out to use on-demand.
-    InstanceMarketOptions:
-        MarketType: spot
-
-# List of shell commands to run to set up nodes.
-setup_commands: []
-
-# Custom commands that will be run on the head node after common setup.
-head_setup_commands: []
-
-# Custom commands that will be run on worker nodes after common setup.
-worker_setup_commands: []
-
-# Command to start ray on the head node. You don't need to change this.
-head_start_ray_commands: []
-
-# Command to start ray on worker nodes. You don't need to change this.
-worker_start_ray_commands: []
+    InstanceType: m5.xlarge
--- a/release/long_running_tests/run.sh
+++ b/release/long_running_tests/run.sh
@ -1,6 +1,6 @@
 #!/usr/bin/env bash

-ray_version="" 
+ray_version=""
 commit=""
 ray_branch=""
 workload=""
@ -48,20 +48,20 @@ echo "commit: $commit"
 echo "branch: $ray_branch"
 echo "workload: $workload"

-wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp36-cp36m-manylinux2014_x86_64.whl"
+wheel="https://s3-us-west-2.amazonaws.com/ray-wheels/$ray_branch/$commit/ray-$ray_version-cp37-cp37m-manylinux2014_x86_64.whl"

-echo set-window-option -g mouse on > ~/.tmux.conf
-echo 'termcapinfo xterm* ti@:te@' > ~/.screenrc
 # Serve load testing tool
-rm -r wrk || true && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && sudo cp wrk /usr/local/bin
-pip install -U pip
-unset RAY_ADDRESS
-source activate tensorflow_p36 
-conda remove -y --force wrapt || true
+cur_dir=$(pwd)
+cd /tmp && rm -rf wrk && git clone https://github.com/wg/wrk.git wrk && cd wrk && make -j && cp wrk /usr/local/bin
+cd "$cur_dir" || exit
+
 pip install --upgrade pip
 pip install -U tensorflow==1.14
-pip install -q -U "$wheel" Click
+pip install -q -U "$wheel"
 pip install -q "ray[all]" "gym[atari]"
-cd ..
+
+ray stop && sleep 2
+
+unset RAY_ADDRESS
 python "./workloads/$workload.py"

--- a/release/long_running_tests/workloads/serve.py
+++ b/release/long_running_tests/workloads/serve.py
@ -11,7 +11,7 @@ from ray.cluster_utils import Cluster
 num_redis_shards = 1
 redis_max_memory = 10**8
 object_store_memory = 10**8
-num_nodes = 5
+num_nodes = 1
 cluster = Cluster()
 for i in range(num_nodes):
    cluster.add_node(
@ -22,21 +22,20 @@ for i in range(num_nodes):
        resources={str(i): 2},
        object_store_memory=object_store_memory,
        redis_max_memory=redis_max_memory,
-        dashboard_host="0.0.0.0")
+        dashboard_host="0.0.0.0",
+    )

 ray.init(address=cluster.address, dashboard_host="0.0.0.0")
 client = serve.start()


@serve.accept_batch
-def echo(_):
+def echo(requests_batch):
    time.sleep(0.01)  # Sleep for 10ms
-    ray.show_in_dashboard(
-        str(serve.context.batch_size), key="Current batch size")
-    return ["hi {}".format(i) for i in range(serve.context.batch_size)]
+    return ["hi" for _ in range(len(requests_batch))]


-config = {"num_replicas": 30, "max_batch_size": 16}
+config = {"num_replicas": 7, "max_batch_size": 16}
 client.create_backend("echo:v1", echo, config=config)
 client.create_endpoint("echo", backend="echo:v1", route="/echo")

@ -53,12 +52,18 @@ time_to_run = "60m"
 while True:
    proc = subprocess.Popen(
        [
-            "wrk", "-c",
-            str(connections), "-t",
-            str(num_threads), "-s", time_to_run, "http://127.0.0.1:8000/echo"
+            "wrk",
+            "-c",
+            str(connections),
+            "-t",
+            str(num_threads),
+            "-d",
+            time_to_run,
+            "http://127.0.0.1:8000/echo",
        ],
        stdout=PIPE,
-        stderr=PIPE)
+        stderr=PIPE,
+    )
    print("started load testing")
    proc.wait()
    out, err = proc.communicate()
--- a/release/long_running_tests/workloads/serve_failure.py
+++ b/release/long_running_tests/workloads/serve_failure.py
@ -11,19 +11,20 @@ from ray.cluster_utils import Cluster
 num_redis_shards = 1
 redis_max_memory = 10**8
 object_store_memory = 10**8
-num_nodes = 5
-cpus_per_node = 2
+num_nodes = 1
+cpus_per_node = 10
 cluster = Cluster()
 for i in range(num_nodes):
    cluster.add_node(
        redis_port=6379 if i == 0 else None,
        num_redis_shards=num_redis_shards if i == 0 else None,
-        num_cpus=2,
+        num_cpus=16,
        num_gpus=0,
        resources={str(i): 2},
        object_store_memory=object_store_memory,
        redis_max_memory=redis_max_memory,
-        dashboard_host="0.0.0.0")
+        dashboard_host="0.0.0.0",
+    )

 ray.init(
    address=cluster.address, dashboard_host="0.0.0.0", log_to_driver=False)