""" This script will automatically fetch the latest release logs from the OSS release testing pipeline on buildkite. Specifically, this will loop through all release test pipeline builds for the specified Ray version and fetch the latest available results from the respective tests. It will then write these to the directory in `ray/release/release_logs`. To use this script, either set the BUILDKITE_TOKEN environment variable to a valid Buildkite API token with read access, or authenticate in AWS with the OSS CI account. Usage: python fetch_release_logs.py Example: python fetch_release_logs 1.13.0rc0 Results in: Fetched microbenchmark.json for commit 025e4b01822214e03907db0b09f3af17203a6671 ... Writing 1.13.0rc0/microbenchmark.json ... """ import json import os import re from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional import boto3 import click from pybuildkite.buildkite import Buildkite BUILDKITE_ORGANIZATION = "ray-project" BUILDKITE_PIPELINE = "release-tests-branch" # Format: job name regex --> filename to save results to RESULTS_TO_FETCH = { r"^microbenchmark \(.+\)$": "microbenchmark.json", r"^many_actors \(.+\)$": "benchmarks/many_actors.json", r"^many_nodes \(.+\)$": "benchmarks/many_nodes.json", r"^many_pgs \(.+\)$": "benchmarks/many_pgs.json", r"^many_tasks \(.+\)$": "benchmarks/many_tasks.json", r"^object_store \(.+\)$": "scalability/object_store.json", r"^single_node \(.+\)$": "scalability/single_node.json", r"^stress_test_dead_actors \(.+\)$": "stress_tests/stress_test_dead_actors.json", r"^stress_test_many_tasks \(.+\)$": "stress_tests/stress_test_many_tasks.json", r"^stress_test_placement_group \(.+\)$": ( "stress_tests/" "stress_test_placement_group.json" ), } @dataclass class Build: id: str number: int commit: str job_dict_list: List[Dict] pipeline: str = BUILDKITE_PIPELINE organization: str = BUILDKITE_ORGANIZATION @dataclass class Job: build: Build id: str name: Optional[str] @dataclass class Artifact: job: Job id: str def get_buildkite_api() -> Buildkite: bk = Buildkite() buildkite_token = maybe_fetch_buildkite_token() bk.set_access_token(buildkite_token) return bk def maybe_fetch_buildkite_token() -> str: buildkite_token = os.environ.get("BUILDKITE_TOKEN", None) if buildkite_token: return buildkite_token print("Missing BUILDKITE_TOKEN, retrieving from AWS secrets store") buildkite_token = boto3.client( "secretsmanager", region_name="us-west-2" ).get_secret_value( SecretId="arn:aws:secretsmanager:us-west-2:029272617770:secret:" "buildkite/ro-token" )[ "SecretString" ] os.environ["BUILDKITE_TOKEN"] = buildkite_token return buildkite_token def get_results_from_build_collection( bk: Buildkite, build_dict_list: List[Dict] ) -> Dict[str, Dict]: results_to_fetch = RESULTS_TO_FETCH.copy() fetched_results = {} for build_dict in sorted(build_dict_list, key=lambda bd: -bd["number"]): if not results_to_fetch: break build = Build( id=build_dict["id"], number=build_dict["number"], commit=build_dict["commit"], job_dict_list=build_dict["jobs"], ) build_results = get_results_from_build(bk, build, results_to_fetch) fetched_results.update(build_results) return fetched_results def get_results_from_build(bk: Buildkite, build: Build, results_to_fetch: Dict) -> Dict: fetched_results = {} for job_dict in build.job_dict_list: if not results_to_fetch: break job = Job(build=build, id=job_dict["id"], name=job_dict.get("name", None)) if not job.name: continue for job_regex, filename in list(results_to_fetch.items()): if re.match(job_regex, job.name): result = get_results_artifact_for_job(bk, job=job) if not result: continue fetched_results[filename] = result results_to_fetch.pop(job_regex) print(f"Fetched {filename} for commit {job.build.commit}") return fetched_results def get_results_artifact_for_job(bk: Buildkite, job: Job) -> Optional[Dict]: artifacts = bk.artifacts().list_artifacts_for_job( organization=job.build.organization, pipeline=job.build.pipeline, build=job.build.number, job=job.id, ) for artifact in artifacts: if "result.json" in artifact["filename"]: artifact = Artifact(job=job, id=artifact["id"]) return download_results_artifact(bk=bk, artifact=artifact) return None def download_results_artifact(bk: Buildkite, artifact: Artifact) -> Dict: blob = bk.artifacts().download_artifact( organization=artifact.job.build.organization, pipeline=artifact.job.build.pipeline, build=artifact.job.build.number, job=artifact.job.id, artifact=artifact.id, ) data_dict = json.loads(blob) return data_dict.get("results", {}) def write_results(log_dir: Path, fetched_results: Dict[str, Any]) -> None: log_dir.mkdir(parents=True, exist_ok=True) for filepath, content in fetched_results.items(): path = log_dir.joinpath(filepath) path.parent.mkdir(parents=True, exist_ok=True) print(f"Writing {path}") with open(path, "w") as fp: json.dump(content, fp, sort_keys=True, indent=4) fp.write("\n") @click.command() @click.argument("version", required=True) def main(version: str): log_dir = Path(__file__).parent.joinpath(version) branch = f"releases/{version}" bk = get_buildkite_api() build_dict_list = bk.builds().list_all_for_pipeline( organization=BUILDKITE_ORGANIZATION, pipeline=BUILDKITE_PIPELINE, branch=branch ) fetched_results = get_results_from_build_collection(bk, build_dict_list) write_results(log_dir, fetched_results) if __name__ == "__main__": main()