mirror of
https://github.com/vale981/ray
synced 2025-03-12 14:16:39 -04:00

Adds a unit-tested and restructured ray_release package for running release tests. Relevant changes in behavior: Per default, Buildkite will wait for the wheels of the current commit to be available. Alternatively, users can a) specify a different commit hash, b) a wheels URL (which we will also wait for to be available) or c) specify a branch (or user/branch combination), in which case the latest available wheels will be used (e.g. if master is passed, behavior matches old default behavior). The main subpackages are: Cluster manager: Creates cluster envs/computes, starts cluster, terminates cluster Command runner: Runs commands, e.g. as client command or sdk command File manager: Uploads/downloads files to/from session Reporter: Reports results (e.g. to database) Much of the code base is unit tested, but there are probably some pieces missing. Example build (waited for wheels to be built): https://buildkite.com/ray-project/kf-dev/builds/51#_ Wheel build: https://buildkite.com/ray-project/ray-builders-branch/builds/6023
73 lines
2.1 KiB
Python
73 lines
2.1 KiB
Python
import os
|
|
import sys
|
|
from typing import Dict
|
|
|
|
import click
|
|
import yaml
|
|
|
|
|
|
def replace_prepare(dt: Dict):
|
|
if "prepare" in dt and "wait_cluster" in dt["prepare"]:
|
|
_, _, nodes, timeout = dt.pop("prepare").split(" ")
|
|
dt["wait_for_nodes"] = {"num_nodes": int(nodes), "timeout": int(timeout)}
|
|
|
|
|
|
@click.command()
|
|
@click.argument("legacy_config", type=str)
|
|
@click.argument("prefix", type=str)
|
|
@click.argument("group", type=str)
|
|
@click.argument("alert", type=str)
|
|
def main(legacy_config: str, prefix: str, group: str, alert: str):
|
|
with open(legacy_config, "rt") as fp:
|
|
config = yaml.safe_load(fp)
|
|
|
|
tests = []
|
|
for old in config:
|
|
test = {}
|
|
test["name"] = f"{prefix}_{old['name']}"
|
|
|
|
test["group"] = group
|
|
test["working_dir"] = os.path.basename(os.path.dirname(legacy_config))
|
|
|
|
test["legacy"] = {
|
|
"test_name": old["name"],
|
|
"test_suite": os.path.basename(legacy_config)[:-5],
|
|
}
|
|
|
|
test["frequency"] = "FILLOUT"
|
|
test["team"] = old.get("team", "FILLOUT")
|
|
|
|
test["cluster"] = {
|
|
"cluster_env": old["cluster"]["app_config"],
|
|
"cluster_compute": old["cluster"]["compute_template"],
|
|
}
|
|
|
|
if "driver_setup" in old:
|
|
test["driver_setup"] = "driver_setup"
|
|
|
|
use_connect = old["run"].pop("use_connect", False)
|
|
autosuspend = old["run"].pop("autosuspend_mins", None)
|
|
if autosuspend:
|
|
test["cluster"]["autosuspend_mins"] = int(autosuspend)
|
|
|
|
test["run"] = old["run"]
|
|
replace_prepare(test["run"])
|
|
if "smoke_test" in old:
|
|
test["smoke_test"] = old["smoke_test"]
|
|
if "run" in test["smoke_test"]:
|
|
replace_prepare(test["smoke_test"]["run"])
|
|
|
|
test["run"]["type"] = "sdk_command" if not use_connect else "client"
|
|
if not use_connect:
|
|
test["run"]["file_manager"] = "job"
|
|
|
|
test["alert"] = alert
|
|
|
|
tests.append(test)
|
|
|
|
yaml.dump(tests, sys.stdout, sort_keys=False)
|
|
sys.stdout.flush()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|