Fix dashboard with prometheus-client 0.14 (#23766)

Why are these changes needed?

The dashboard wasn't working (blank screen). See the linked issue for details. The cause is this exception in /tmp/ray/session_latest/logs/dashboard_agent.log:

Traceback (most recent call last):
  File "/usr/local/lib/python3.9/site-packages/ray/dashboard/agent.py", line 391, in <module>
    loop.run_until_complete(agent.run())
  File "/usr/local/lib/python3.9/asyncio/base_events.py", line 647, in run_until_complete
    return future.result()
  File "/usr/local/lib/python3.9/site-packages/ray/dashboard/agent.py", line 178, in run
    modules = self._load_modules()
  File "/usr/local/lib/python3.9/site-packages/ray/dashboard/agent.py", line 120, in _load_modules
    c = cls(self)
  File "/usr/local/lib/python3.9/site-packages/ray/dashboard/modules/reporter/reporter_agent.py", line 161, in __init__
    self._metrics_agent = MetricsAgent(
  File "/usr/local/lib/python3.9/site-packages/ray/_private/metrics_agent.py", line 75, in __init__
    prometheus_exporter.new_stats_exporter(
  File "/usr/local/lib/python3.9/site-packages/ray/_private/prometheus_exporter.py", line 332, in new_stats_exporter
    exporter = PrometheusStatsExporter(
  File "/usr/local/lib/python3.9/site-packages/ray/_private/prometheus_exporter.py", line 265, in __init__
    self.serve_http()
  File "/usr/local/lib/python3.9/site-packages/ray/_private/prometheus_exporter.py", line 319, in serve_http
    start_http_server(
  File "/usr/local/lib/python3.9/site-packages/prometheus_client/exposition.py", line 167, in start_wsgi_server
    TmpServer.address_family, addr = _get_best_family(addr, port)
  File "/usr/local/lib/python3.9/site-packages/prometheus_client/exposition.py", line 156, in _get_best_family
    infos = socket.getaddrinfo(address, port)
  File "/usr/local/lib/python3.9/socket.py", line 954, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno -2] Name or service not known
There was a recent change in prometheus-client which passes the address given to start_http_server to socket.getaddrinfo. This prevents passing in an empty string, but we can get the same effect by passing None.

Related issue number
Closes #23765
This commit is contained in:
jonathan-conder-sm 2022-08-02 05:25:38 +12:00 committed by GitHub
parent 410fe1b5ec
commit 1d5fef2004
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 5 additions and 2 deletions

View file

@ -298,7 +298,9 @@ class PrometheusStatsExporter(base_exporter.StatsExporter):
def serve_http(self):
"""serve_http serves the Prometheus endpoint."""
start_http_server(port=self.options.port, addr=str(self.options.address))
address = str(self.options.address)
kwargs = {"addr": address} if address else {}
start_http_server(port=self.options.port, **kwargs)
def new_stats_exporter(option):

View file

@ -196,10 +196,11 @@ class Monitor:
AUTOSCALER_METRIC_PORT
)
)
kwargs = {"addr": "127.0.0.1"} if head_node_ip == "127.0.0.1" else {}
prometheus_client.start_http_server(
port=AUTOSCALER_METRIC_PORT,
addr="127.0.0.1" if head_node_ip == "127.0.0.1" else "",
registry=self.prom_metrics.registry,
**kwargs,
)
except Exception:
logger.exception(