diff --git a/doc/source/serve/doc_code/counter_repro.py b/doc/source/serve/doc_code/counter_repro.py new file mode 100644 index 000000000..e98eb4560 --- /dev/null +++ b/doc/source/serve/doc_code/counter_repro.py @@ -0,0 +1,18 @@ +from ray import serve + +import logging + +logger = logging.getLogger("ray.serve") + + +@serve.deployment +class Counter: + def __init__(self): + self.count = 0 + + async def __call__(self, request): + self.count += 1 + return self.count + + +counter = Counter.bind() diff --git a/doc/source/serve/doc_code/monitoring.py b/doc/source/serve/doc_code/monitoring.py new file mode 100644 index 000000000..2334ff60f --- /dev/null +++ b/doc/source/serve/doc_code/monitoring.py @@ -0,0 +1,24 @@ +# __start_monitoring__ +# File name: monitoring.py + +from ray import serve + +import logging +import requests + +logger = logging.getLogger("ray.serve") + + +@serve.deployment +class SayHello: + async def __call__(self, request): + logger.info("Hello world!") + return "hi" + + +say_hello = SayHello.bind() + +serve.run(say_hello) + +requests.get("http://localhost:8000/") +# __end_monitoring__ diff --git a/doc/source/serve/monitoring.md b/doc/source/serve/monitoring.md index 48837a4d8..77bc6d041 100644 --- a/doc/source/serve/monitoring.md +++ b/doc/source/serve/monitoring.md @@ -1,23 +1,25 @@ (serve-monitoring)= -# Debugging & Monitoring +# Monitoring Ray Serve This section should help you understand how to debug and monitor your Serve application. +There are three main ways to do this: +Using the Ray Dashboard, using Ray logging, and using built-in Ray Serve metrics. ## Ray Dashboard A high-level way to monitor your Ray Serve application is via the Ray Dashboard. See the [Ray Dashboard documentation](ray-dashboard) for a detailed overview, including instructions on how to view the dashboard. -Below is an example of what the Ray Dashboard might look like for a Serve deployment: +Below is an example of what the "Actors" tab of the Ray Dashboard might look like for a running Serve application: -```{image} https://raw.githubusercontent.com/ray-project/Images/master/docs/dashboard/serve-dashboard.png +```{image} https://raw.githubusercontent.com/ray-project/Images/master/docs/dashboard/serve-dashboard-2-replicas.png :align: center ``` Here you can see the Serve controller actor, an HTTP proxy actor, and all of the replicas for each Serve deployment. To learn about the function of the controller and proxy actors, see the [Serve Architecture page](serve-architecture). -In this example pictured above, we have a single-node cluster with a deployment named Counter with `num_replicas=2`. +In this example pictured above, we have a single-node cluster with a deployment named `Translator` with `num_replicas=2`. ## Logging @@ -25,11 +27,11 @@ In this example pictured above, we have a single-node cluster with a deployment For an overview of logging in Ray, see [Ray Logging](ray-logging). ::: -Ray Serve uses Python's standard `logging` facility with the `"ray.serve"` named logger. +Ray Serve uses Python's standard `logging` facility with the logger named `"ray.serve"`. By default, logs are emitted from actors both to `stderr` and on disk on each node at `/tmp/ray/session_latest/logs/serve/`. This includes both system-level logs from the Serve controller and HTTP proxy as well as access logs and custom user logs produced from within deployment replicas. -In development, logs are streamed to the driver Ray program (the program that calls `.deploy()` or `serve.run`, or the `serve run` CLI command) that deployed the deployments, so it's most convenient to keep the driver running for debugging. +In development, logs are streamed to the driver Ray program (the Python script that calls `serve.run()` or the `serve run` CLI command), so it's most convenient to keep the driver running for debugging. For example, let's run a basic Serve application and view the logs that are emitted. You can run this in an interactive shell like IPython to follow along. @@ -51,47 +53,40 @@ This produces a few INFO-level log messages about startup from the Serve control Next, let's create a simple deployment that logs a custom log message when it's queried: -```python -import logging - -logger = logging.getLogger("ray.serve") - -@serve.deployment(route_prefix="/") -class SayHello: - def __call__(self, *args): - logger.info("Hello world!") - return "hi" - -SayHello.deploy() +```{literalinclude} ../serve/doc_code/monitoring.py +:start-after: __start_monitoring__ +:end-before: __end_monitoring__ +:language: python +:linenos: true ``` Running this code block, we first get some log messages from the controller saying that a new replica of the deployment is being created: ```bash -(ServeController pid=67312) INFO 2022-04-02 09:16:13,323 controller 67312 deployment_state.py:1198 - Adding 1 replicas to deployment 'SayHello'. +$ python monitoring.py +2022-08-02 16:16:21,498 INFO scripts.py:294 -- Deploying from import path: "monitoring:say_hello". +2022-08-02 16:16:24,141 INFO worker.py:1481 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8265. +(ServeController pid=71139) INFO 2022-08-02 16:16:28,542 controller 71139 http_state.py:123 - Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:SERVE_PROXY_ACTOR-f68e29b2048526b2d6ef5ba69e2b35d44b44d12ff3c85fc6132e9381' on node 'f68e29b2048526b2d6ef5ba69e2b35d44b44d12ff3c85fc6132e9381' listening on '127.0.0.1:8000' +(HTTPProxyActor pid=71150) INFO: Started server process [71150] +(ServeController pid=71139) INFO 2022-08-02 16:16:29,881 controller 71139 deployment_state.py:1232 - Adding 1 replicas to deployment 'SayHello'. +2022-08-02 16:16:31,889 SUCC scripts.py:315 -- Deployed successfully. ``` Then when we query the deployment, we get both a default access log as well as our custom `"Hello world!"` message. + +```python +import requests +requests.get("http://localhost:8000/") +``` + +``` +(HTTPProxyActor pid=71150) INFO 2022-08-02 16:17:02,502 http_proxy 127.0.0.1 http_proxy.py:315 - GET / 200 3.5ms +(ServeReplica:SayHello pid=71162) INFO 2022-08-02 16:17:02,501 SayHello SayHello#kKGBoa monitoring.py:13 - Hello world! +(ServeReplica:SayHello pid=71162) INFO 2022-08-02 16:17:02,501 SayHello SayHello#kKGBoa replica.py:482 - HANDLE __call__ OK 0.3ms +``` + Note that these log lines are tagged with the deployment name followed by a unique identifier for the specific replica. -These can be parsed by a logging stack such as ELK or Loki to enable searching logs by deployment and replica. - -```bash -handle = SayHello.get_handle() -ray.get(handle.remote()) -(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh :8 - Hello world! -(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh replica.py:466 - HANDLE __call__ OK 0.3ms -``` - -Querying the deployment over HTTP produces a similar access log message from the HTTP proxy: - -```bash -curl -X GET http://localhost:8000/ -(HTTPProxyActor pid=67315) INFO 2022-04-02 09:20:08,976 http_proxy 127.0.0.1 http_proxy.py:310 - GET / 200 2.6ms -(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh :8 - Hello world! -(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh replica.py:466 - HANDLE __call__ OK 0.3ms -``` - -You can also be able to view all of these log messages in the files in `/tmp/ray/session_latest/logs/serve/`. +These can be parsed by a logging stack such as ELK or Loki to enable searching logs by deployment and replica. These logs are stored at `/tmp/ray/session_latest/logs/serve/`. To silence the replica-level logs or otherwise configure logging, configure the `"ray.serve"` logger *from inside the deployment constructor:* @@ -165,14 +160,7 @@ Start Promtail and pass in the path to the config file we saved earlier: As above, you may need to replace `./promtail-darwin-amd64` with the appropriate filename and path. -Now we are ready to start our Ray Serve deployment. Start a long-running Ray cluster and Ray Serve instance in your terminal: - -```shell -ray start --head -serve start -``` - -Now run the following Python script to deploy a basic Serve deployment with a Serve deployment logger: +Run the following Python script to deploy a basic Serve deployment with a Serve deployment logger and make some requests: ```{literalinclude} ../../../python/ray/serve/examples/doc/deployment_logger.py ``` @@ -236,20 +224,28 @@ The following metrics are exposed by Ray Serve: - The number of non-200 HTTP responses returned by each deployment. ``` -To see this in action, run `ray start --head --metrics-export-port=8080` in your terminal, and then run the following script: +To see this in action, first run the following command to start Ray and set up the metrics export port: + +```bash +ray start --head --metrics-export-port=8080 +``` + +Then run the following script: ```{literalinclude} ../../../python/ray/serve/examples/doc/snippet_metrics.py ``` -In your web browser, navigate to `localhost:8080`. +The requests will loop and can be canceled with `Ctrl-C`. + +While this is running, in your web browser navigate to `localhost:8080`. In the output there, you can search for `serve_` to locate the metrics above. The metrics are updated once every ten seconds, and you will need to refresh the page to see the new values. -For example, after running the script for some time and refreshing `localhost:8080` you might see something that looks like: +For example, after running the script for some time and refreshing `localhost:8080` you should be able to find metrics similar to the following: ``` -ray_serve_deployment_processing_latency_ms_count{...,deployment="f",...} 99.0 -ray_serve_deployment_processing_latency_ms_sum{...,deployment="f",...} 99279.30498123169 +ray_serve_deployment_processing_latency_ms_count{..., replica="sleeper#jtzqhX"} 48.0 +ray_serve_deployment_processing_latency_ms_sum{..., replica="sleeper#jtzqhX"} 48160.6719493866 ``` which indicates that the average processing latency is just over one second, as expected. @@ -262,5 +258,13 @@ Here's an example: :start-after: __custom_metrics_deployment_start__ ``` +And the emitted logs: + +``` +# HELP ray_my_counter The number of odd-numbered requests to this deployment. +# TYPE ray_my_counter gauge +ray_my_counter{..., deployment="MyDeployment"} 5.0 +``` + See the [Ray Metrics documentation](ray-metrics) for more details, including instructions for scraping these metrics using Prometheus. diff --git a/doc/source/serve/user-guide.md b/doc/source/serve/user-guide.md index 2b8119855..872f9e17a 100644 --- a/doc/source/serve/user-guide.md +++ b/doc/source/serve/user-guide.md @@ -13,6 +13,6 @@ you will learn - [Serving ML Models](ml-models) - [Using Deployment Graph](deployment-graph) - [Deploying Ray Serve](deploying-serve) -- [Debugging/Monitoring](monitoring) +- [Monitoring Ray Serve](monitoring) - [Performance Tuning](performance) - [Autoscaling](autoscaling) diff --git a/python/ray/serve/examples/doc/deployment_logger.py b/python/ray/serve/examples/doc/deployment_logger.py index 348f0e6c7..f08f1004d 100644 --- a/python/ray/serve/examples/doc/deployment_logger.py +++ b/python/ray/serve/examples/doc/deployment_logger.py @@ -1,10 +1,7 @@ import logging -import ray from ray import serve import requests -ray.init(address="auto") - logger = logging.getLogger("ray.serve") @@ -19,7 +16,8 @@ class Counter: return {"count": self.count} -Counter.deploy() +counter = Counter.bind() +serve.run(counter) for i in range(10): - requests.get("http://127.0.0.1:8000/Counter") + requests.get("http://127.0.0.1:8000/") diff --git a/python/ray/serve/examples/doc/snippet_custom_metric.py b/python/ray/serve/examples/doc/snippet_custom_metric.py index 537f81957..4c5931440 100644 --- a/python/ray/serve/examples/doc/snippet_custom_metric.py +++ b/python/ray/serve/examples/doc/snippet_custom_metric.py @@ -1,34 +1,33 @@ -import ray +# __custom_metrics_deployment_start__ from ray import serve from ray.util import metrics import time - -ray.init(address="auto") -serve.start() +import requests -# __custom_metrics_deployment_start__ @serve.deployment class MyDeployment: def __init__(self): + self.num_requests = 0 self.my_counter = metrics.Counter( "my_counter", - description=("The number of excellent reqs to this deployment."), + description=("The number of odd-numbered requests to this deployment."), tag_keys=("deployment",), ) - self.my_counter.set_default_tags({"deployment": serve.get_current_deployment()}) + self.my_counter.set_default_tags({"deployment": "MyDeployment"}) - def call(self, excellent=False): - if excellent: + def __call__(self): + self.num_requests += 1 + if self.num_requests % 2 == 1: self.my_counter.inc() -# __custom_metrics_deployment_end__ +my_deployment = MyDeployment.bind() +serve.run(my_deployment) -MyDeployment.deploy() - -handle = MyDeployment.get_handle() while True: - ray.get(handle.call.remote(excellent=True)) + requests.get("http://localhost:8000/") time.sleep(1) + +# __custom_metrics_deployment_end__ diff --git a/python/ray/serve/examples/doc/snippet_metrics.py b/python/ray/serve/examples/doc/snippet_metrics.py index 7fa5691bf..067b260b0 100644 --- a/python/ray/serve/examples/doc/snippet_metrics.py +++ b/python/ray/serve/examples/doc/snippet_metrics.py @@ -1,19 +1,17 @@ -import ray from ray import serve import time - -ray.init(address="auto") -serve.start() +import requests @serve.deployment -def f(): +def sleeper(): time.sleep(1) -f.deploy() +s = sleeper.bind() + +serve.run(s) -handle = f.get_handle() while True: - ray.get(handle.remote()) + requests.get("http://localhost:8000/")