[ci] Add short failing test summary for pytests (#24104)

It is sometimes hard to find all failing tests in buildkite output logs - even filtering for "FAILED" is cumbersome as the output can be overloaded. This PR adds a small utility to add a short summary log in a separate output section at the end of the buildkite job. The only shared directory between the Buildkite host machine and the test docker container is `/tmp/artifacts:/artifact-mount`. Thus, we write the summary file to this directory, and delete it before actually uploading it as an artifact in the `post-commands` hook.
2025-03-05 10:01:43 -05:00 · 2022-04-26 22:18:07 +01:00 · 2022-04-26 22:18:07 +01:00 · fc1cd89020
commit fc1cd89020
parent c3cea7ad5d
3 changed files with 148 additions and 1 deletions
--- a/.buildkite/hooks/post-command
+++ b/.buildkite/hooks/post-command
@ -0,0 +1,21 @@
 #!/bin/bash
 # This script is executed by Buildkite on the host machine.
 # In contrast, our build jobs are run in Docker containers.
 # This means that even though our build jobs write to
 # `/artifact-mount`, the directory on the host machine is
 # actually `/tmp/artifacts`.
 # Here, we cat all text files in artifact-mount/test-summaries
 # and upload them as a Buildkite annotation.
 # These files contain a condensed summary of all failing pytest
 # tests to make it easy for users to see which tests are failing.
 # Because we upload them to Buildkite, we don't need to
 # upload them as artifacts and delete them afterwards.
 set -e
 if [ -d "/tmp/artifacts/test-summaries" ] && [ "$(ls -A /tmp/artifacts/test-summaries)" ]; then
    cat /tmp/artifacts/test-summaries/*.txt | buildkite-agent annotate --job "${BUILDKITE_JOB_ID}" --append --style error --context "${BUILDKITE_JOB_ID}"
    # Remove test summaries files (don't actually upload as artifacts)
    # This has to be done with docker to avoid permission issues
    echo "--- Cleaning up"
    docker run --rm -v /tmp/artifacts:/artifact-mount alpine:latest /bin/sh -c 'rm -rf /artifact-mount/test-summaries' || true
 fi
--- a/ci/run/bazel_export_options
+++ b/ci/run/bazel_export_options
@ -3,5 +3,8 @@
 mkdir -p /tmp/bazel_event_logs
 event_json_flag=--build_event_json_file=$(mktemp /tmp/bazel_event_logs/bazel_log.XXXXX)
 logs_archive_flag=--test_env=RAY_TEST_FAILURE_LOGS_ARCHIVE_DIR=/artifact-mount/.failed_test_logs
 summary_directory_flag=--test_env=RAY_TEST_SUMMARY_DIR=/artifact-mount/test-summaries
 summary_directory_host_flag=--test_env=RAY_TEST_SUMMARY_DIR_HOST=/tmp/artifacts/test-summaries
 buildkite_env_vars="--test_env=BUILDKITE_COMMIT --test_env=BUILDKITE_LABEL --test_env=BUILDKITE_JOB_ID"
-echo "${event_json_flag} ${logs_archive_flag}"
+echo "${event_json_flag} ${logs_archive_flag} ${summary_directory_flag} ${summary_directory_host_flag} ${buildkite_env_vars}"
--- a/python/ray/tests/conftest.py
+++ b/python/ray/tests/conftest.py
@ -32,6 +32,7 @@ from ray._private.test_utils import (
    get_and_run_node_killer,
 )
 import ray.util.client.server.server as ray_client_server
 from typing import Tuple
@pytest.fixture
@ -690,6 +691,128 @@ def pytest_runtest_makereport(item, call):
    outcome = yield
    rep = outcome.get_result()
    append_short_test_summary(rep)
    create_ray_logs_for_failed_test(rep)
 def append_short_test_summary(rep):
    """Writes a short summary txt for failed tests to be printed later."""
    if rep.when != "call":
        return
    summary_dir = os.environ.get("RAY_TEST_SUMMARY_DIR")
    if platform.system() != "Linux":
        summary_dir = os.environ.get("RAY_TEST_SUMMARY_DIR_HOST")
    if not summary_dir:
        return
    if not os.path.exists(summary_dir):
        os.makedirs(summary_dir)
    test_name = rep.nodeid.replace(os.sep, "::")
    header_file = os.path.join(summary_dir, "000_header.txt")
    summary_file = os.path.join(summary_dir, test_name + ".txt")
    if rep.passed and os.path.exists(summary_file):
        # The test succeeded after failing, thus it is flaky.
        # We do not want to annotate flaky tests just now, so remove report.
        os.remove(summary_file)
        # If there is only the header file left, remove directory
        if len(os.listdir(summary_dir)) <= 1:
            shutil.rmtree(summary_dir)
        return
    # Only consider failed tests from now on
    if not rep.failed:
        return
    # No failing test information
    if rep.longrepr is None:
        return
    # No failing test information
    if not hasattr(rep.longrepr, "chain"):
        return
    if not os.path.exists(header_file):
        with open(header_file, "wt") as fp:
            test_label = os.environ.get("BUILDKITE_LABEL", "Unknown")
            job_id = os.environ.get("BUILDKITE_JOB_ID")
            fp.write(f"### Pytest failures for: [{test_label}](#{job_id})\n\n")
    # Use `wt` here to overwrite so we only have one result per test (exclude retries)
    with open(summary_file, "wt") as fp:
        fp.write(_get_markdown_annotation(rep))
 def _get_markdown_annotation(rep) -> str:
    # Main traceback is the last in the chain (where the last error is raised)
    main_tb, main_loc, _ = rep.longrepr.chain[-1]
    markdown = ""
    # Header: Main error message
    markdown += f"#### {rep.nodeid}\n\n"
    markdown += "<details>\n"
    markdown += f"<summary>{main_loc.message}</summary>\n\n"
    # Add link to test definition
    test_file, test_lineno, _test_node = rep.location
    test_path, test_url = _get_repo_github_path_and_link(
        os.path.abspath(test_file), test_lineno
    )
    markdown += f"Link to test: [{test_path}:{test_lineno}]({test_url})\n\n"
    # Print main traceback
    markdown += "##### Traceback\n\n"
    markdown += "```\n"
    markdown += str(main_tb)
    markdown += "\n```\n\n"
    # Print link to test definition in github
    path, url = _get_repo_github_path_and_link(main_loc.path, main_loc.lineno)
    markdown += f"[{path}:{main_loc.lineno}]({url})\n\n"
    # If this is a longer exception chain, users can expand the full traceback
    if len(rep.longrepr.chain) > 1:
        markdown += "<details><summary>Full traceback</summary>\n\n"
        # Here we just print each traceback and the link to the respective
        # lines in GutHub
        for tb, loc, _ in rep.longrepr.chain:
            path, url = _get_repo_github_path_and_link(loc.path, loc.lineno)
            markdown += "```\n"
            markdown += str(tb)
            markdown += "\n```\n\n"
            markdown += f"[{path}:{loc.lineno}]({url})\n\n"
        markdown += "</details>\n"
    markdown += "</details>\n\n"
    return markdown
 def _get_repo_github_path_and_link(file: str, lineno: int) -> Tuple[str, str]:
    base_url = "https://github.com/ray-project/ray/blob/{commit}/{path}#L{lineno}"
    commit = os.environ.get("BUILDKITE_COMMIT")
    if not commit:
        return file, ""
    path = os.path.relpath(file, "/ray")
    return path, base_url.format(commit=commit, path=path, lineno=lineno)
 def create_ray_logs_for_failed_test(rep):
    """Creates artifact zip of /tmp/ray/session_latest/logs for failed tests"""
    # We temporarily restrict to Linux until we have artifact dirs
    # for Windows and Mac
    if platform.system() != "Linux":