mirror of
https://github.com/vale981/ray
synced 2025-03-10 05:16:49 -04:00

* Improve reporter module * Add test_node_physical_stats to test_reporter.py * Add test_class_method_route_table to test_dashboard.py * Add stats_collector module for dashboard * Subscribe actor table data * Add log module for dashboard * Only enable test module in some test cases * CI run all dashboard tests * Reduce test timeout to 10s * Use fstring * Remove unused code * Remove blank line * Fix dashboard tests * Fix asyncio.create_task not available in py36; Fix lint * Add format_web_url to ray.test_utils * Update dashboard/modules/reporter/reporter_head.py Co-authored-by: Max Fitton <mfitton@berkeley.edu> * Add DictChangeItem type for Dict change * Refine logger.exception * Refine GET /api/launch_profiling * Remove disable_test_module fixture * Fix test_basic may fail Co-authored-by: 刘宝 <po.lb@antfin.com> Co-authored-by: Max Fitton <mfitton@berkeley.edu>
102 lines
3.1 KiB
Python
102 lines
3.1 KiB
Python
import os
|
|
import sys
|
|
import logging
|
|
import requests
|
|
import time
|
|
|
|
import pytest
|
|
import ray
|
|
from ray.new_dashboard.tests.conftest import * # noqa
|
|
from ray.test_utils import (
|
|
format_web_url,
|
|
RayTestTimeoutException,
|
|
wait_until_server_available,
|
|
wait_for_condition,
|
|
)
|
|
|
|
os.environ["RAY_USE_NEW_DASHBOARD"] = "1"
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def test_profiling(shutdown_only):
|
|
addresses = ray.init(include_dashboard=True, num_cpus=6)
|
|
|
|
@ray.remote(num_cpus=2)
|
|
class Actor:
|
|
def getpid(self):
|
|
return os.getpid()
|
|
|
|
c = Actor.remote()
|
|
actor_pid = ray.get(c.getpid.remote())
|
|
|
|
webui_url = addresses["webui_url"]
|
|
assert (wait_until_server_available(webui_url) is True)
|
|
webui_url = format_web_url(webui_url)
|
|
|
|
start_time = time.time()
|
|
launch_profiling = None
|
|
while True:
|
|
# Sometimes some startup time is required
|
|
if time.time() - start_time > 10:
|
|
raise RayTestTimeoutException(
|
|
"Timed out while collecting profiling stats, "
|
|
f"launch_profiling: {launch_profiling}")
|
|
launch_profiling = requests.get(
|
|
webui_url + "/api/launch_profiling",
|
|
params={
|
|
"ip": ray.nodes()[0]["NodeManagerAddress"],
|
|
"pid": actor_pid,
|
|
"duration": 5
|
|
}).json()
|
|
if launch_profiling["result"]:
|
|
profiling_info = launch_profiling["data"]["profilingInfo"]
|
|
break
|
|
time.sleep(1)
|
|
logger.info(profiling_info)
|
|
|
|
|
|
def test_node_physical_stats(enable_test_module, shutdown_only):
|
|
addresses = ray.init(include_dashboard=True, num_cpus=6)
|
|
|
|
@ray.remote(num_cpus=1)
|
|
class Actor:
|
|
def getpid(self):
|
|
return os.getpid()
|
|
|
|
actors = [Actor.remote() for _ in range(6)]
|
|
actor_pids = ray.get([actor.getpid.remote() for actor in actors])
|
|
actor_pids = set(actor_pids)
|
|
|
|
webui_url = addresses["webui_url"]
|
|
assert (wait_until_server_available(webui_url) is True)
|
|
webui_url = format_web_url(webui_url)
|
|
|
|
def _check_workers():
|
|
try:
|
|
resp = requests.get(webui_url +
|
|
"/test/dump?key=node_physical_stats")
|
|
resp.raise_for_status()
|
|
result = resp.json()
|
|
assert result["result"] is True
|
|
node_physical_stats = result["data"]["nodePhysicalStats"]
|
|
assert len(node_physical_stats) == 1
|
|
current_stats = node_physical_stats[addresses["raylet_ip_address"]]
|
|
# Check Actor workers
|
|
current_actor_pids = set()
|
|
for worker in current_stats["workers"]:
|
|
if "ray::Actor" in worker["cmdline"][0]:
|
|
current_actor_pids.add(worker["pid"])
|
|
assert current_actor_pids == actor_pids
|
|
# Check raylet cmdline
|
|
assert "raylet" in current_stats["cmdline"][0]
|
|
return True
|
|
except Exception as ex:
|
|
logger.info(ex)
|
|
return False
|
|
|
|
wait_for_condition(_check_workers, timeout=10)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(pytest.main(["-v", __file__]))
|