ray/release/ray_release/alerts/tune_tests.py
Kai Fricke 331b71ea8d
[ci/release] Refactor release test e2e into package (#22351)
Adds a unit-tested and restructured ray_release package for running release tests.

Relevant changes in behavior:

Per default, Buildkite will wait for the wheels of the current commit to be available. Alternatively, users can a) specify a different commit hash, b) a wheels URL (which we will also wait for to be available) or c) specify a branch (or user/branch combination), in which case the latest available wheels will be used (e.g. if master is passed, behavior matches old default behavior).

The main subpackages are:

    Cluster manager: Creates cluster envs/computes, starts cluster, terminates cluster
    Command runner: Runs commands, e.g. as client command or sdk command
    File manager: Uploads/downloads files to/from session
    Reporter: Reports results (e.g. to database)

Much of the code base is unit tested, but there are probably some pieces missing.

Example build (waited for wheels to be built): https://buildkite.com/ray-project/kf-dev/builds/51#_
Wheel build: https://buildkite.com/ray-project/ray-builders-branch/builds/6023
2022-02-16 17:35:02 +00:00

68 lines
2.1 KiB
Python

from typing import Optional
from ray_release.config import Test
from ray_release.result import Result
def handle_result(
test: Test,
result: Result,
) -> Optional[str]:
test_name = test["legacy"]["test_name"]
msg = ""
success = result.status == "finished"
time_taken = result.results.get("time_taken", float("inf"))
num_terminated = result.results.get("trial_states", {}).get("TERMINATED", 0)
was_smoke_test = result.results.get("smoke_test", False)
if not success:
if result.status == "timeout":
msg += "Test timed out."
else:
msg += "Test script failed. "
if test_name == "long_running_large_checkpoints":
last_update_diff = result.results.get("last_update_diff", float("inf"))
target_update_diff = 360
if last_update_diff > target_update_diff:
return (
f"Last update to results json was too long ago "
f"({last_update_diff:.2f} > {target_update_diff})"
)
return None
elif test_name == "bookkeeping_overhead":
target_terminated = 10000
target_time = 800
elif test_name == "durable_trainable":
target_terminated = 16
target_time = 600
elif test_name == "network_overhead":
target_terminated = 100 if not was_smoke_test else 20
target_time = 900 if not was_smoke_test else 400
elif test_name == "result_throughput_cluster":
target_terminated = 1000
target_time = 120
elif test_name == "result_throughput_single_node":
target_terminated = 96
target_time = 120
elif test_name == "xgboost_sweep":
target_terminated = 31
target_time = 3600
else:
return None
if num_terminated < target_terminated:
msg += (
f"Some trials failed "
f"(num_terminated={num_terminated} < {target_terminated}). "
)
if time_taken > target_time:
msg += (
f"Took too long to complete "
f"(time_taken={time_taken:.2f} > {target_time}). "
)
return msg or None