diff --git a/release/release_logs/fetch_release_logs.py b/release/release_logs/fetch_release_logs.py new file mode 100644 index 000000000..a4c596920 --- /dev/null +++ b/release/release_logs/fetch_release_logs.py @@ -0,0 +1,212 @@ +""" +This script will automatically fetch the latest release logs from the +OSS release testing pipeline on buildkite. + +Specifically, this will loop through all release test pipeline builds for the +specified Ray version and fetch the latest available results from the respective +tests. It will then write these to the directory in `ray/release/release_logs`. + +To use this script, either set the BUILDKITE_TOKEN environment variable to a +valid Buildkite API token with read access, or authenticate in AWS with the +OSS CI account. + +Usage: + + python fetch_release_logs.py + +Example: + + python fetch_release_logs 1.13.0rc0 + +Results in: + + Fetched microbenchmark.json for commit 025e4b01822214e03907db0b09f3af17203a6671 + ... + Writing 1.13.0rc0/microbenchmark.json + ... + +""" +import json +import os +import re +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Dict, List, Optional + +import boto3 +import click +from pybuildkite.buildkite import Buildkite + +BUILDKITE_ORGANIZATION = "ray-project" +BUILDKITE_PIPELINE = "release-tests-branch" + +# Format: job name regex --> filename to save results to +RESULTS_TO_FETCH = { + r"^microbenchmark \(.+\)$": "microbenchmark.json", + r"^many_actors \(.+\)$": "benchmarks/many_actors.json", + r"^many_nodes \(.+\)$": "benchmarks/many_nodes.json", + r"^many_pgs \(.+\)$": "benchmarks/many_pgs.json", + r"^many_tasks \(.+\)$": "benchmarks/many_tasks.json", + r"^object_store \(.+\)$": "scalability/object_store.json", + r"^single_node \(.+\)$": "scalability/single_node.json", + r"^stress_test_dead_actors \(.+\)$": "stress_tests/stress_test_dead_actors.json", + r"^stress_test_many_tasks \(.+\)$": "stress_tests/stress_test_many_tasks.json", + r"^stress_test_placement_group \(.+\)$": ( + "stress_tests/" "stress_test_placement_group.json" + ), +} + + +@dataclass +class Build: + id: str + number: int + commit: str + job_dict_list: List[Dict] + + pipeline: str = BUILDKITE_PIPELINE + organization: str = BUILDKITE_ORGANIZATION + + +@dataclass +class Job: + build: Build + id: str + name: Optional[str] + + +@dataclass +class Artifact: + job: Job + id: str + + +def get_buildkite_api() -> Buildkite: + bk = Buildkite() + buildkite_token = maybe_fetch_buildkite_token() + bk.set_access_token(buildkite_token) + return bk + + +def maybe_fetch_buildkite_token() -> str: + buildkite_token = os.environ.get("BUILDKITE_TOKEN", None) + + if buildkite_token: + return buildkite_token + + print("Missing BUILDKITE_TOKEN, retrieving from AWS secrets store") + os.environ["BUILDKITE_TOKEN"] = boto3.client( + "secretsmanager", region_name="us-west-2" + ).get_secret_value( + SecretId="arn:aws:secretsmanager:us-west-2:029272617770:secret:" + "buildkite/ro-token" + )[ + "SecretString" + ] + + +def get_results_from_build_collection( + bk: Buildkite, build_dict_list: List[Dict] +) -> Dict[str, Dict]: + results_to_fetch = RESULTS_TO_FETCH.copy() + fetched_results = {} + + for build_dict in sorted(build_dict_list, key=lambda bd: -bd["number"]): + if not results_to_fetch: + break + + build = Build( + id=build_dict["id"], + number=build_dict["number"], + commit=build_dict["commit"], + job_dict_list=build_dict["jobs"], + ) + build_results = get_results_from_build(bk, build, results_to_fetch) + fetched_results.update(build_results) + + return fetched_results + + +def get_results_from_build(bk: Buildkite, build: Build, results_to_fetch: Dict) -> Dict: + fetched_results = {} + + for job_dict in build.job_dict_list: + if not results_to_fetch: + break + + job = Job(build=build, id=job_dict["id"], name=job_dict.get("name", None)) + + if not job.name: + continue + + for job_regex, filename in list(results_to_fetch.items()): + if re.match(job_regex, job.name): + result = get_results_artifact_for_job(bk, job=job) + + if not result: + continue + + fetched_results[filename] = result + results_to_fetch.pop(job_regex) + print(f"Fetched {filename} for commit {job.build.commit}") + + return fetched_results + + +def get_results_artifact_for_job(bk: Buildkite, job: Job) -> Optional[Dict]: + artifacts = bk.artifacts().list_artifacts_for_job( + organization=job.build.organization, + pipeline=job.build.pipeline, + build=job.build.number, + job=job.id, + ) + for artifact in artifacts: + if "result.json" in artifact["filename"]: + artifact = Artifact(job=job, id=artifact["id"]) + return download_results_artifact(bk=bk, artifact=artifact) + return None + + +def download_results_artifact(bk: Buildkite, artifact: Artifact) -> Dict: + blob = bk.artifacts().download_artifact( + organization=artifact.job.build.organization, + pipeline=artifact.job.build.pipeline, + build=artifact.job.build.number, + job=artifact.job.id, + artifact=artifact.id, + ) + data_dict = json.loads(blob) + return data_dict.get("results", {}) + + +def write_results(log_dir: Path, fetched_results: Dict[str, Any]) -> None: + log_dir.mkdir(parents=True, exist_ok=True) + + for filepath, content in fetched_results.items(): + path = log_dir.joinpath(filepath) + path.parent.mkdir(parents=True, exist_ok=True) + + print(f"Writing {path}") + + with open(path, "w") as fp: + json.dump(content, fp, sort_keys=True, indent=4) + fp.write("\n") + + +@click.command() +@click.argument("version", required=True) +def main(version: str): + log_dir = Path(__file__).parent.joinpath(version) + branch = f"releases/{version}" + + bk = get_buildkite_api() + build_dict_list = bk.builds().list_all_for_pipeline( + organization=BUILDKITE_ORGANIZATION, pipeline=BUILDKITE_PIPELINE, branch=branch + ) + + fetched_results = get_results_from_build_collection(bk, build_dict_list) + write_results(log_dir, fetched_results) + + +if __name__ == "__main__": + main()