ray/release/ray_release/buildkite/step.py
Kai Fricke 7425fa6212
[ci/release] Add support for concurrency groups (#22728)
This PR adds concurrency groups to Buildkite release test runs with new release test package. Five concurrency groups are defined (large-gpu, small-gpu, large, medium, small). If not specified manually, concurrency groups are inferred from used cluster resources.

Example pipeline: https://buildkite.com/ray-project/release-tests-branch/builds/55#09109eac-d22e-43bc-889e-078cfb037373 (click on Artifacts --> pipeline.json)
2022-03-02 16:35:54 +01:00

84 lines
2.6 KiB
Python

import copy
from typing import Optional, Dict
from ray_release.buildkite.concurrency import CONCURRENY_GROUPS, get_concurrency_group
from ray_release.config import Test, get_test_env_var
from ray_release.exception import ReleaseTestConfigError
DEFAULT_STEP_TEMPLATE = {
"env": {
"ANYSCALE_CLOUD_ID": "cld_4F7k8814aZzGG8TNUGPKnc",
"ANYSCALE_PROJECT": "prj_2xR6uT6t7jJuu1aCwWMsle",
"RELEASE_AWS_BUCKET": "ray-release-automation-results",
"RELEASE_AWS_LOCATION": "dev",
"RELEASE_AWS_DB_NAME": "ray_ci",
"RELEASE_AWS_DB_TABLE": "release_test_result",
"AWS_REGION": "us-west-2",
},
"agents": {"queue": "runner_queue_branch"},
"plugins": [
{
"docker#v3.9.0": {
"image": "rayproject/ray",
"propagate-environment": True,
"volumes": [
"/var/lib/buildkite/builds:/var/lib/buildkite/builds",
"/usr/local/bin/buildkite-agent:/usr/local/bin/buildkite-agent",
"/tmp/ray_release_test_artifacts:"
"/tmp/ray_release_test_artifacts",
],
"environment": ["BUILDKITE_BUILD_PATH=/var/lib/buildkite/builds"],
}
}
],
"artifact_paths": ["/tmp/ray_release_test_artifacts/**/*"],
"priority": 0,
}
def get_step(
test: Test,
smoke_test: bool = False,
ray_wheels: Optional[str] = None,
env: Optional[Dict] = None,
priority_val: int = 0,
):
env = env or {}
step = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
cmd = f"./release/run_release_test.sh \"{test['name']}\" --report"
if smoke_test:
cmd += " --smoke-test"
if ray_wheels:
cmd += f" --ray-wheels {ray_wheels}"
step["command"] = cmd
step["env"].update(env)
commit = get_test_env_var("RAY_COMMIT")
branch = get_test_env_var("RAY_BRANCH")
label = commit[:7] if commit else branch
concurrency_group = test.get("concurrency_group", None)
if concurrency_group:
if concurrency_group not in CONCURRENY_GROUPS:
raise ReleaseTestConfigError(
f"Unknown concurrency group: {concurrency_group}"
)
concurrency_limit = CONCURRENY_GROUPS[concurrency_group]
else:
concurrency_group, concurrency_limit = get_concurrency_group(test)
step["concurrency_group"] = concurrency_group
step["concurrency"] = concurrency_limit
step["priority"] = priority_val
step["label"] = test["name"]
if smoke_test:
step["label"] += " [smoke test] "
step["label"] += f" ({label})"
return step