mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00

Currently release tests fail because they exceed the (rather arbitrary) timeout by 1-2 seconds.
68 lines
2.1 KiB
Python
68 lines
2.1 KiB
Python
from typing import Optional
|
|
|
|
from ray_release.config import Test
|
|
from ray_release.result import Result
|
|
|
|
|
|
def handle_result(
|
|
test: Test,
|
|
result: Result,
|
|
) -> Optional[str]:
|
|
test_name = test["legacy"]["test_name"]
|
|
|
|
msg = ""
|
|
success = result.status == "finished"
|
|
time_taken = result.results.get("time_taken", float("inf"))
|
|
num_terminated = result.results.get("trial_states", {}).get("TERMINATED", 0)
|
|
was_smoke_test = result.results.get("smoke_test", False)
|
|
|
|
if not success:
|
|
if result.status == "timeout":
|
|
msg += "Test timed out."
|
|
else:
|
|
msg += "Test script failed. "
|
|
|
|
if test_name == "long_running_large_checkpoints":
|
|
last_update_diff = result.results.get("last_update_diff", float("inf"))
|
|
target_update_diff = 360
|
|
|
|
if last_update_diff > target_update_diff:
|
|
return (
|
|
f"Last update to results json was too long ago "
|
|
f"({last_update_diff:.2f} > {target_update_diff})"
|
|
)
|
|
return None
|
|
|
|
elif test_name == "bookkeeping_overhead":
|
|
target_terminated = 10000
|
|
target_time = 800
|
|
elif test_name == "durable_trainable":
|
|
target_terminated = 16
|
|
target_time = 650
|
|
elif test_name == "network_overhead":
|
|
target_terminated = 100 if not was_smoke_test else 20
|
|
target_time = 900 if not was_smoke_test else 400
|
|
elif test_name == "result_throughput_cluster":
|
|
target_terminated = 1000
|
|
target_time = 130
|
|
elif test_name == "result_throughput_single_node":
|
|
target_terminated = 96
|
|
target_time = 120
|
|
elif test_name == "xgboost_sweep":
|
|
target_terminated = 31
|
|
target_time = 3600
|
|
else:
|
|
return None
|
|
|
|
if num_terminated < target_terminated:
|
|
msg += (
|
|
f"Some trials failed "
|
|
f"(num_terminated={num_terminated} < {target_terminated}). "
|
|
)
|
|
if time_taken > target_time:
|
|
msg += (
|
|
f"Took too long to complete "
|
|
f"(time_taken={time_taken:.2f} > {target_time}). "
|
|
)
|
|
|
|
return msg or None
|