mirror of
https://github.com/vale981/ray
synced 2025-03-04 17:41:43 -05:00
[Doc] [Serve] Refresh code for "monitoring" for 2.0 (#27400)
This commit is contained in:
parent
b1d838446c
commit
1714d0266b
7 changed files with 120 additions and 79 deletions
18
doc/source/serve/doc_code/counter_repro.py
Normal file
18
doc/source/serve/doc_code/counter_repro.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
from ray import serve
|
||||
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("ray.serve")
|
||||
|
||||
|
||||
@serve.deployment
|
||||
class Counter:
|
||||
def __init__(self):
|
||||
self.count = 0
|
||||
|
||||
async def __call__(self, request):
|
||||
self.count += 1
|
||||
return self.count
|
||||
|
||||
|
||||
counter = Counter.bind()
|
24
doc/source/serve/doc_code/monitoring.py
Normal file
24
doc/source/serve/doc_code/monitoring.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
# __start_monitoring__
|
||||
# File name: monitoring.py
|
||||
|
||||
from ray import serve
|
||||
|
||||
import logging
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger("ray.serve")
|
||||
|
||||
|
||||
@serve.deployment
|
||||
class SayHello:
|
||||
async def __call__(self, request):
|
||||
logger.info("Hello world!")
|
||||
return "hi"
|
||||
|
||||
|
||||
say_hello = SayHello.bind()
|
||||
|
||||
serve.run(say_hello)
|
||||
|
||||
requests.get("http://localhost:8000/")
|
||||
# __end_monitoring__
|
|
@ -1,23 +1,25 @@
|
|||
(serve-monitoring)=
|
||||
|
||||
# Debugging & Monitoring
|
||||
# Monitoring Ray Serve
|
||||
|
||||
This section should help you understand how to debug and monitor your Serve application.
|
||||
There are three main ways to do this:
|
||||
Using the Ray Dashboard, using Ray logging, and using built-in Ray Serve metrics.
|
||||
|
||||
## Ray Dashboard
|
||||
|
||||
A high-level way to monitor your Ray Serve application is via the Ray Dashboard.
|
||||
See the [Ray Dashboard documentation](ray-dashboard) for a detailed overview, including instructions on how to view the dashboard.
|
||||
|
||||
Below is an example of what the Ray Dashboard might look like for a Serve deployment:
|
||||
Below is an example of what the "Actors" tab of the Ray Dashboard might look like for a running Serve application:
|
||||
|
||||
```{image} https://raw.githubusercontent.com/ray-project/Images/master/docs/dashboard/serve-dashboard.png
|
||||
```{image} https://raw.githubusercontent.com/ray-project/Images/master/docs/dashboard/serve-dashboard-2-replicas.png
|
||||
:align: center
|
||||
```
|
||||
|
||||
Here you can see the Serve controller actor, an HTTP proxy actor, and all of the replicas for each Serve deployment.
|
||||
To learn about the function of the controller and proxy actors, see the [Serve Architecture page](serve-architecture).
|
||||
In this example pictured above, we have a single-node cluster with a deployment named Counter with `num_replicas=2`.
|
||||
In this example pictured above, we have a single-node cluster with a deployment named `Translator` with `num_replicas=2`.
|
||||
|
||||
## Logging
|
||||
|
||||
|
@ -25,11 +27,11 @@ In this example pictured above, we have a single-node cluster with a deployment
|
|||
For an overview of logging in Ray, see [Ray Logging](ray-logging).
|
||||
:::
|
||||
|
||||
Ray Serve uses Python's standard `logging` facility with the `"ray.serve"` named logger.
|
||||
Ray Serve uses Python's standard `logging` facility with the logger named `"ray.serve"`.
|
||||
By default, logs are emitted from actors both to `stderr` and on disk on each node at `/tmp/ray/session_latest/logs/serve/`.
|
||||
This includes both system-level logs from the Serve controller and HTTP proxy as well as access logs and custom user logs produced from within deployment replicas.
|
||||
|
||||
In development, logs are streamed to the driver Ray program (the program that calls `.deploy()` or `serve.run`, or the `serve run` CLI command) that deployed the deployments, so it's most convenient to keep the driver running for debugging.
|
||||
In development, logs are streamed to the driver Ray program (the Python script that calls `serve.run()` or the `serve run` CLI command), so it's most convenient to keep the driver running for debugging.
|
||||
For example, let's run a basic Serve application and view the logs that are emitted.
|
||||
You can run this in an interactive shell like IPython to follow along.
|
||||
|
||||
|
@ -51,47 +53,40 @@ This produces a few INFO-level log messages about startup from the Serve control
|
|||
|
||||
Next, let's create a simple deployment that logs a custom log message when it's queried:
|
||||
|
||||
```python
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger("ray.serve")
|
||||
|
||||
@serve.deployment(route_prefix="/")
|
||||
class SayHello:
|
||||
def __call__(self, *args):
|
||||
logger.info("Hello world!")
|
||||
return "hi"
|
||||
|
||||
SayHello.deploy()
|
||||
```{literalinclude} ../serve/doc_code/monitoring.py
|
||||
:start-after: __start_monitoring__
|
||||
:end-before: __end_monitoring__
|
||||
:language: python
|
||||
:linenos: true
|
||||
```
|
||||
|
||||
Running this code block, we first get some log messages from the controller saying that a new replica of the deployment is being created:
|
||||
|
||||
```bash
|
||||
(ServeController pid=67312) INFO 2022-04-02 09:16:13,323 controller 67312 deployment_state.py:1198 - Adding 1 replicas to deployment 'SayHello'.
|
||||
$ python monitoring.py
|
||||
2022-08-02 16:16:21,498 INFO scripts.py:294 -- Deploying from import path: "monitoring:say_hello".
|
||||
2022-08-02 16:16:24,141 INFO worker.py:1481 -- Started a local Ray instance. View the dashboard at http://127.0.0.1:8265.
|
||||
(ServeController pid=71139) INFO 2022-08-02 16:16:28,542 controller 71139 http_state.py:123 - Starting HTTP proxy with name 'SERVE_CONTROLLER_ACTOR:SERVE_PROXY_ACTOR-f68e29b2048526b2d6ef5ba69e2b35d44b44d12ff3c85fc6132e9381' on node 'f68e29b2048526b2d6ef5ba69e2b35d44b44d12ff3c85fc6132e9381' listening on '127.0.0.1:8000'
|
||||
(HTTPProxyActor pid=71150) INFO: Started server process [71150]
|
||||
(ServeController pid=71139) INFO 2022-08-02 16:16:29,881 controller 71139 deployment_state.py:1232 - Adding 1 replicas to deployment 'SayHello'.
|
||||
2022-08-02 16:16:31,889 SUCC scripts.py:315 -- Deployed successfully.
|
||||
```
|
||||
|
||||
Then when we query the deployment, we get both a default access log as well as our custom `"Hello world!"` message.
|
||||
|
||||
```python
|
||||
import requests
|
||||
requests.get("http://localhost:8000/")
|
||||
```
|
||||
|
||||
```
|
||||
(HTTPProxyActor pid=71150) INFO 2022-08-02 16:17:02,502 http_proxy 127.0.0.1 http_proxy.py:315 - GET / 200 3.5ms
|
||||
(ServeReplica:SayHello pid=71162) INFO 2022-08-02 16:17:02,501 SayHello SayHello#kKGBoa monitoring.py:13 - Hello world!
|
||||
(ServeReplica:SayHello pid=71162) INFO 2022-08-02 16:17:02,501 SayHello SayHello#kKGBoa replica.py:482 - HANDLE __call__ OK 0.3ms
|
||||
```
|
||||
|
||||
Note that these log lines are tagged with the deployment name followed by a unique identifier for the specific replica.
|
||||
These can be parsed by a logging stack such as ELK or Loki to enable searching logs by deployment and replica.
|
||||
|
||||
```bash
|
||||
handle = SayHello.get_handle()
|
||||
ray.get(handle.remote())
|
||||
(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh <ipython-input-4-1e8854e5c9ba>:8 - Hello world!
|
||||
(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh replica.py:466 - HANDLE __call__ OK 0.3ms
|
||||
```
|
||||
|
||||
Querying the deployment over HTTP produces a similar access log message from the HTTP proxy:
|
||||
|
||||
```bash
|
||||
curl -X GET http://localhost:8000/
|
||||
(HTTPProxyActor pid=67315) INFO 2022-04-02 09:20:08,976 http_proxy 127.0.0.1 http_proxy.py:310 - GET / 200 2.6ms
|
||||
(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh <ipython-input-4-1e8854e5c9ba>:8 - Hello world!
|
||||
(SayHello pid=67352) INFO 2022-04-02 09:20:08,975 SayHello SayHello#LBINMh replica.py:466 - HANDLE __call__ OK 0.3ms
|
||||
```
|
||||
|
||||
You can also be able to view all of these log messages in the files in `/tmp/ray/session_latest/logs/serve/`.
|
||||
These can be parsed by a logging stack such as ELK or Loki to enable searching logs by deployment and replica. These logs are stored at `/tmp/ray/session_latest/logs/serve/`.
|
||||
|
||||
To silence the replica-level logs or otherwise configure logging, configure the `"ray.serve"` logger *from inside the deployment constructor:*
|
||||
|
||||
|
@ -165,14 +160,7 @@ Start Promtail and pass in the path to the config file we saved earlier:
|
|||
|
||||
As above, you may need to replace `./promtail-darwin-amd64` with the appropriate filename and path.
|
||||
|
||||
Now we are ready to start our Ray Serve deployment. Start a long-running Ray cluster and Ray Serve instance in your terminal:
|
||||
|
||||
```shell
|
||||
ray start --head
|
||||
serve start
|
||||
```
|
||||
|
||||
Now run the following Python script to deploy a basic Serve deployment with a Serve deployment logger:
|
||||
Run the following Python script to deploy a basic Serve deployment with a Serve deployment logger and make some requests:
|
||||
|
||||
```{literalinclude} ../../../python/ray/serve/examples/doc/deployment_logger.py
|
||||
```
|
||||
|
@ -236,20 +224,28 @@ The following metrics are exposed by Ray Serve:
|
|||
- The number of non-200 HTTP responses returned by each deployment.
|
||||
```
|
||||
|
||||
To see this in action, run `ray start --head --metrics-export-port=8080` in your terminal, and then run the following script:
|
||||
To see this in action, first run the following command to start Ray and set up the metrics export port:
|
||||
|
||||
```bash
|
||||
ray start --head --metrics-export-port=8080
|
||||
```
|
||||
|
||||
Then run the following script:
|
||||
|
||||
```{literalinclude} ../../../python/ray/serve/examples/doc/snippet_metrics.py
|
||||
```
|
||||
|
||||
In your web browser, navigate to `localhost:8080`.
|
||||
The requests will loop and can be canceled with `Ctrl-C`.
|
||||
|
||||
While this is running, in your web browser navigate to `localhost:8080`.
|
||||
In the output there, you can search for `serve_` to locate the metrics above.
|
||||
The metrics are updated once every ten seconds, and you will need to refresh the page to see the new values.
|
||||
|
||||
For example, after running the script for some time and refreshing `localhost:8080` you might see something that looks like:
|
||||
For example, after running the script for some time and refreshing `localhost:8080` you should be able to find metrics similar to the following:
|
||||
|
||||
```
|
||||
ray_serve_deployment_processing_latency_ms_count{...,deployment="f",...} 99.0
|
||||
ray_serve_deployment_processing_latency_ms_sum{...,deployment="f",...} 99279.30498123169
|
||||
ray_serve_deployment_processing_latency_ms_count{..., replica="sleeper#jtzqhX"} 48.0
|
||||
ray_serve_deployment_processing_latency_ms_sum{..., replica="sleeper#jtzqhX"} 48160.6719493866
|
||||
```
|
||||
|
||||
which indicates that the average processing latency is just over one second, as expected.
|
||||
|
@ -262,5 +258,13 @@ Here's an example:
|
|||
:start-after: __custom_metrics_deployment_start__
|
||||
```
|
||||
|
||||
And the emitted logs:
|
||||
|
||||
```
|
||||
# HELP ray_my_counter The number of odd-numbered requests to this deployment.
|
||||
# TYPE ray_my_counter gauge
|
||||
ray_my_counter{..., deployment="MyDeployment"} 5.0
|
||||
```
|
||||
|
||||
See the
|
||||
[Ray Metrics documentation](ray-metrics) for more details, including instructions for scraping these metrics using Prometheus.
|
||||
|
|
|
@ -13,6 +13,6 @@ you will learn
|
|||
- [Serving ML Models](ml-models)
|
||||
- [Using Deployment Graph](deployment-graph)
|
||||
- [Deploying Ray Serve](deploying-serve)
|
||||
- [Debugging/Monitoring](monitoring)
|
||||
- [Monitoring Ray Serve](monitoring)
|
||||
- [Performance Tuning](performance)
|
||||
- [Autoscaling](autoscaling)
|
||||
|
|
|
@ -1,10 +1,7 @@
|
|||
import logging
|
||||
import ray
|
||||
from ray import serve
|
||||
import requests
|
||||
|
||||
ray.init(address="auto")
|
||||
|
||||
logger = logging.getLogger("ray.serve")
|
||||
|
||||
|
||||
|
@ -19,7 +16,8 @@ class Counter:
|
|||
return {"count": self.count}
|
||||
|
||||
|
||||
Counter.deploy()
|
||||
counter = Counter.bind()
|
||||
serve.run(counter)
|
||||
|
||||
for i in range(10):
|
||||
requests.get("http://127.0.0.1:8000/Counter")
|
||||
requests.get("http://127.0.0.1:8000/")
|
||||
|
|
|
@ -1,34 +1,33 @@
|
|||
import ray
|
||||
# __custom_metrics_deployment_start__
|
||||
from ray import serve
|
||||
from ray.util import metrics
|
||||
|
||||
import time
|
||||
|
||||
ray.init(address="auto")
|
||||
serve.start()
|
||||
import requests
|
||||
|
||||
|
||||
# __custom_metrics_deployment_start__
|
||||
@serve.deployment
|
||||
class MyDeployment:
|
||||
def __init__(self):
|
||||
self.num_requests = 0
|
||||
self.my_counter = metrics.Counter(
|
||||
"my_counter",
|
||||
description=("The number of excellent reqs to this deployment."),
|
||||
description=("The number of odd-numbered requests to this deployment."),
|
||||
tag_keys=("deployment",),
|
||||
)
|
||||
self.my_counter.set_default_tags({"deployment": serve.get_current_deployment()})
|
||||
self.my_counter.set_default_tags({"deployment": "MyDeployment"})
|
||||
|
||||
def call(self, excellent=False):
|
||||
if excellent:
|
||||
def __call__(self):
|
||||
self.num_requests += 1
|
||||
if self.num_requests % 2 == 1:
|
||||
self.my_counter.inc()
|
||||
|
||||
|
||||
# __custom_metrics_deployment_end__
|
||||
my_deployment = MyDeployment.bind()
|
||||
serve.run(my_deployment)
|
||||
|
||||
MyDeployment.deploy()
|
||||
|
||||
handle = MyDeployment.get_handle()
|
||||
while True:
|
||||
ray.get(handle.call.remote(excellent=True))
|
||||
requests.get("http://localhost:8000/")
|
||||
time.sleep(1)
|
||||
|
||||
# __custom_metrics_deployment_end__
|
||||
|
|
|
@ -1,19 +1,17 @@
|
|||
import ray
|
||||
from ray import serve
|
||||
|
||||
import time
|
||||
|
||||
ray.init(address="auto")
|
||||
serve.start()
|
||||
import requests
|
||||
|
||||
|
||||
@serve.deployment
|
||||
def f():
|
||||
def sleeper():
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
f.deploy()
|
||||
s = sleeper.bind()
|
||||
|
||||
serve.run(s)
|
||||
|
||||
handle = f.get_handle()
|
||||
while True:
|
||||
ray.get(handle.remote())
|
||||
requests.get("http://localhost:8000/")
|
||||
|
|
Loading…
Add table
Reference in a new issue