mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
[metrics] Standardize metrics.Count API to prometheus counter (#14498)
This commit is contained in:
parent
505d2b6abe
commit
59221b2f31
7 changed files with 62 additions and 19 deletions
|
@ -190,7 +190,7 @@ class RayServeReplica:
|
|||
|
||||
self.num_ongoing_requests = 0
|
||||
|
||||
self.request_counter = metrics.Count(
|
||||
self.request_counter = metrics.Counter(
|
||||
"serve_backend_request_counter",
|
||||
description=("The number of queries that have been "
|
||||
"processed in this replica."),
|
||||
|
@ -321,7 +321,7 @@ class RayServeReplica:
|
|||
try:
|
||||
result = await method_to_call(arg)
|
||||
result = await self.ensure_serializable_response(result)
|
||||
self.request_counter.record(1)
|
||||
self.request_counter.inc()
|
||||
except Exception as e:
|
||||
import os
|
||||
if "RAY_PDB" in os.environ:
|
||||
|
@ -355,7 +355,7 @@ class RayServeReplica:
|
|||
"Please only send the same type of requests in batching "
|
||||
"mode.")
|
||||
|
||||
self.request_counter.record(batch_size)
|
||||
self.request_counter.inc(batch_size)
|
||||
|
||||
call_method = sync_to_async(call_methods.pop())
|
||||
result_list = await call_method(args)
|
||||
|
|
|
@ -10,7 +10,7 @@ client = serve.start()
|
|||
|
||||
class MyBackendClass:
|
||||
def __init__(self):
|
||||
self.my_counter = metrics.Count(
|
||||
self.my_counter = metrics.Counter(
|
||||
"my_counter",
|
||||
description=("The number of excellent requests to this backend."),
|
||||
tag_keys=("backend", ))
|
||||
|
@ -20,7 +20,7 @@ class MyBackendClass:
|
|||
|
||||
def __call__(self, request):
|
||||
if "excellent" in request.query_params:
|
||||
self.my_counter.record(1)
|
||||
self.my_counter.inc()
|
||||
|
||||
|
||||
client.create_backend("my_backend", MyBackendClass)
|
||||
|
|
|
@ -61,7 +61,7 @@ class RayServeHandle:
|
|||
self.handle_options = handle_options or HandleOptions()
|
||||
self.handle_tag = f"{self.endpoint_name}#{get_random_letters()}"
|
||||
|
||||
self.request_counter = metrics.Count(
|
||||
self.request_counter = metrics.Counter(
|
||||
"serve_handle_request_counter",
|
||||
description=("The number of handle.remote() calls that have been "
|
||||
"made on this handle."),
|
||||
|
@ -115,7 +115,7 @@ class RayServeHandle:
|
|||
``**kwargs``: All keyword arguments will be available in
|
||||
``request.query_params``.
|
||||
"""
|
||||
self.request_counter.record(1)
|
||||
self.request_counter.inc()
|
||||
return await self.router._remote(
|
||||
self.endpoint_name, self.handle_options, request_data, kwargs)
|
||||
|
||||
|
@ -148,7 +148,7 @@ class RayServeSyncHandle(RayServeHandle):
|
|||
``**kwargs``: All keyword arguments will be available in
|
||||
``request.args``.
|
||||
"""
|
||||
self.request_counter.record(1)
|
||||
self.request_counter.inc()
|
||||
coro = self.router._remote(self.endpoint_name, self.handle_options,
|
||||
request_data, kwargs)
|
||||
future: concurrent.futures.Future = asyncio.run_coroutine_threadsafe(
|
||||
|
|
|
@ -106,7 +106,7 @@ class HTTPProxy:
|
|||
LongPollKey.ROUTE_TABLE: self._update_route_table,
|
||||
})
|
||||
|
||||
self.request_counter = metrics.Count(
|
||||
self.request_counter = metrics.Counter(
|
||||
"serve_num_http_requests",
|
||||
description="The number of HTTP requests processed.",
|
||||
tag_keys=("route", ))
|
||||
|
@ -156,7 +156,7 @@ class HTTPProxy:
|
|||
assert scope["type"] == "http"
|
||||
current_path = scope["path"]
|
||||
|
||||
self.request_counter.record(1, tags={"route": current_path})
|
||||
self.request_counter.inc(tags={"route": current_path})
|
||||
|
||||
await self.router(scope, receive, send)
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ class Router:
|
|||
self._pending_endpoints: Dict[str, asyncio.Future] = dict()
|
||||
|
||||
# -- Metrics Registration -- #
|
||||
self.num_router_requests = metrics.Count(
|
||||
self.num_router_requests = metrics.Counter(
|
||||
"serve_num_router_requests",
|
||||
description="The number of requests processed by the router.",
|
||||
tag_keys=("endpoint", ))
|
||||
|
@ -286,7 +286,7 @@ class Router:
|
|||
(await self._get_or_create_replica_set(backend)
|
||||
.assign_replica(query))
|
||||
|
||||
self.num_router_requests.record(1, tags={"endpoint": endpoint})
|
||||
self.num_router_requests.inc(tags={"endpoint": endpoint})
|
||||
|
||||
return result_ref
|
||||
|
||||
|
|
|
@ -30,15 +30,15 @@ def _setup_cluster_for_test(ray_start_cluster):
|
|||
|
||||
# Generate a metric in the driver.
|
||||
counter = Count("test_driver_counter", description="desc")
|
||||
counter.record(1)
|
||||
counter.inc()
|
||||
|
||||
# Generate some metrics from actor & tasks.
|
||||
@ray.remote
|
||||
def f():
|
||||
counter = Count("test_counter", description="desc")
|
||||
counter.record(1)
|
||||
counter.inc()
|
||||
counter = ray.get(ray.put(counter)) # Test serialization.
|
||||
counter.record(1)
|
||||
counter.inc()
|
||||
ray.get(worker_should_exit.wait.remote())
|
||||
|
||||
@ray.remote
|
||||
|
@ -211,6 +211,11 @@ def test_basic_custom_metrics(metric_mock):
|
|||
# Make sure each of metric works as expected.
|
||||
# -- Count --
|
||||
count = Count("count", tag_keys=("a", ))
|
||||
with pytest.raises(TypeError):
|
||||
count.inc("hi")
|
||||
with pytest.raises(ValueError):
|
||||
count.inc(0)
|
||||
count.inc(-1)
|
||||
count._metric = metric_mock
|
||||
count.record(1, {"a": "1"})
|
||||
metric_mock.record.assert_called_with(1, tags={"a": "1"})
|
||||
|
|
|
@ -69,7 +69,7 @@ class Metric:
|
|||
self._default_tags = default_tags
|
||||
return self
|
||||
|
||||
def record(self, value: float, tags: dict = None) -> None:
|
||||
def record(self, value: float, tags: Dict[str, str] = None) -> None:
|
||||
"""Record the metric point of the metric.
|
||||
|
||||
Tags passed in will take precedence over the metric's default tags.
|
||||
|
@ -125,10 +125,11 @@ class Metric:
|
|||
}
|
||||
|
||||
|
||||
class Count(Metric):
|
||||
"""The count of the number of metric points.
|
||||
class Counter(Metric):
|
||||
"""A cumulative metric that is monotonically increasing.
|
||||
|
||||
This is corresponding to Prometheus' Count metric.
|
||||
This corresponds to Prometheus' counter metric:
|
||||
https://prometheus.io/docs/concepts/metric_types/#counter
|
||||
|
||||
Args:
|
||||
name(str): Name of the metric.
|
||||
|
@ -149,6 +150,43 @@ class Count(Metric):
|
|||
serialized_data = (self._name, self._description, self._tag_keys)
|
||||
return deserializer, serialized_data
|
||||
|
||||
def inc(self, value: float = 1.0, tags: Dict[str, str] = None):
|
||||
"""Increment the counter by `value` (defaults to 1).
|
||||
|
||||
Args:
|
||||
value(int, float): Value to increment the counter by (default=1).
|
||||
tags(Dict[str, str]): Tags to set or override for this counter.
|
||||
"""
|
||||
if not isinstance(value, (int, float)):
|
||||
raise TypeError(f"value must be int or float, got {type(value)}.")
|
||||
if value <= 0:
|
||||
raise ValueError(f"value must be >0, got {value}")
|
||||
|
||||
self.record(value, tags)
|
||||
|
||||
|
||||
class Count(Counter):
|
||||
"""The count of the number of metric points.
|
||||
|
||||
This corresponds to Prometheus' 'Count' metric.
|
||||
|
||||
This class is DEPRECATED, please use ray.util.metrics.Counter instead.
|
||||
|
||||
Args:
|
||||
name(str): Name of the metric.
|
||||
description(str): Description of the metric.
|
||||
tag_keys(tuple): Tag keys of the metric.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
name: str,
|
||||
description: str = "",
|
||||
tag_keys: Optional[Tuple[str]] = None):
|
||||
logger.warning(
|
||||
"`metrics.Count` has been renamed to `metrics.Counter`. "
|
||||
"`metrics.Count` will be removed in a future release.")
|
||||
super().__init__(name, description, tag_keys)
|
||||
|
||||
|
||||
class Histogram(Metric):
|
||||
"""Histogram distribution of metric points.
|
||||
|
|
Loading…
Add table
Reference in a new issue