ray/release/alerts/long_running_tests.py
SangBin Cho b1308b1c8c
[Test Infra] Unrevert team col (#21700)
This fixes the previous problems from team column revert.

This has 2 additional changes;

alert handler receives the team argument, which was the root cause of breakage; https://github.com/ray-project/ray/pull/21289

Previously, tests without a team column were raising an exception, but I made the condition weaker (warning logs). I will eventually change it to raise an exception, but for smoother transition, we will log warning instead for a short time
2022-01-19 13:29:53 -08:00

40 lines
1.3 KiB
Python

import datetime
from typing import Dict, Optional
def handle_result(created_on: datetime.datetime, category: str,
test_suite: str, test_name: str, status: str, results: Dict,
artifacts: Dict, last_logs: str, team: str) -> Optional[str]:
assert test_suite == "long_running_tests"
# elapsed_time = results.get("elapsed_time", 0.)
last_update_diff = results.get("last_update_diff", float("inf"))
if test_name in [
"actor_deaths",
"many_actor_tasks",
"many_drivers",
"many_tasks",
"many_tasks_serialized_ids",
"node_failures",
"object_spilling_shuffle",
]:
# Core tests
target_update_diff = 120
elif test_name in ["apex", "impala", "many_ppo", "pbt"]:
# Tune/RLLib style tests
target_update_diff = 360
elif test_name in ["serve", "serve_failure"]:
# Serve tests have workload logs every five minutes.
# Leave up to 60 seconds overhead.
target_update_diff = 360
else:
return None
if last_update_diff > target_update_diff:
return f"Last update to results json was too long ago " \
f"({last_update_diff:.2f} > {target_update_diff})"
return None