[ci] Add short failing test summary for pytests (#24104)

It is sometimes hard to find all failing tests in buildkite output logs - even filtering for "FAILED" is cumbersome as the output can be overloaded. This PR adds a small utility to add a short summary log in a separate output section at the end of the buildkite job. The only shared directory between the Buildkite host machine and the test docker container is `/tmp/artifacts:/artifact-mount`. Thus, we write the summary file to this directory, and delete it before actually uploading it as an artifact in the `post-commands` hook.
2025-03-04 17:41:43 -05:00 · 2022-04-26 22:18:07 +01:00 · 2022-04-26 22:18:07 +01:00 · fc1cd89020
commit fc1cd89020
parent c3cea7ad5d
3 changed files with 148 additions and 1 deletions
--- a/.buildkite/hooks/post-command
+++ b/.buildkite/hooks/post-command
@ -0,0 +1,21 @@
+#!/bin/bash
+# This script is executed by Buildkite on the host machine.
+# In contrast, our build jobs are run in Docker containers.
+# This means that even though our build jobs write to
+# `/artifact-mount`, the directory on the host machine is
+# actually `/tmp/artifacts`.
+# Here, we cat all text files in artifact-mount/test-summaries
+# and upload them as a Buildkite annotation.
+# These files contain a condensed summary of all failing pytest
+# tests to make it easy for users to see which tests are failing.
+# Because we upload them to Buildkite, we don't need to
+# upload them as artifacts and delete them afterwards.
+set -e
+if [ -d "/tmp/artifacts/test-summaries" ] && [ "$(ls -A /tmp/artifacts/test-summaries)" ]; then
+    cat /tmp/artifacts/test-summaries/*.txt | buildkite-agent annotate --job "${BUILDKITE_JOB_ID}" --append --style error --context "${BUILDKITE_JOB_ID}"
+
+    # Remove test summaries files (don't actually upload as artifacts)
+    # This has to be done with docker to avoid permission issues
+    echo "--- Cleaning up"
+    docker run --rm -v /tmp/artifacts:/artifact-mount alpine:latest /bin/sh -c 'rm -rf /artifact-mount/test-summaries' || true
+fi
--- a/ci/run/bazel_export_options
+++ b/ci/run/bazel_export_options
@ -3,5 +3,8 @@
 mkdir -p /tmp/bazel_event_logs
 event_json_flag=--build_event_json_file=$(mktemp /tmp/bazel_event_logs/bazel_log.XXXXX)
 logs_archive_flag=--test_env=RAY_TEST_FAILURE_LOGS_ARCHIVE_DIR=/artifact-mount/.failed_test_logs
+summary_directory_flag=--test_env=RAY_TEST_SUMMARY_DIR=/artifact-mount/test-summaries
+summary_directory_host_flag=--test_env=RAY_TEST_SUMMARY_DIR_HOST=/tmp/artifacts/test-summaries
+buildkite_env_vars="--test_env=BUILDKITE_COMMIT --test_env=BUILDKITE_LABEL --test_env=BUILDKITE_JOB_ID"

-echo "${event_json_flag} ${logs_archive_flag}"
+echo "${event_json_flag} ${logs_archive_flag} ${summary_directory_flag} ${summary_directory_host_flag} ${buildkite_env_vars}"
--- a/python/ray/tests/conftest.py
+++ b/python/ray/tests/conftest.py
@ -32,6 +32,7 @@ from ray._private.test_utils import (
    get_and_run_node_killer,
 )
 import ray.util.client.server.server as ray_client_server
+from typing import Tuple


@pytest.fixture
@ -690,6 +691,128 @@ def pytest_runtest_makereport(item, call):
    outcome = yield
    rep = outcome.get_result()

+    append_short_test_summary(rep)
+    create_ray_logs_for_failed_test(rep)
+
+
+def append_short_test_summary(rep):
+    """Writes a short summary txt for failed tests to be printed later."""
+    if rep.when != "call":
+        return
+
+    summary_dir = os.environ.get("RAY_TEST_SUMMARY_DIR")
+
+    if platform.system() != "Linux":
+        summary_dir = os.environ.get("RAY_TEST_SUMMARY_DIR_HOST")
+
+    if not summary_dir:
+        return
+
+    if not os.path.exists(summary_dir):
+        os.makedirs(summary_dir)
+
+    test_name = rep.nodeid.replace(os.sep, "::")
+
+    header_file = os.path.join(summary_dir, "000_header.txt")
+    summary_file = os.path.join(summary_dir, test_name + ".txt")
+
+    if rep.passed and os.path.exists(summary_file):
+        # The test succeeded after failing, thus it is flaky.
+        # We do not want to annotate flaky tests just now, so remove report.
+        os.remove(summary_file)
+
+        # If there is only the header file left, remove directory
+        if len(os.listdir(summary_dir)) <= 1:
+            shutil.rmtree(summary_dir)
+        return
+
+    # Only consider failed tests from now on
+    if not rep.failed:
+        return
+
+    # No failing test information
+    if rep.longrepr is None:
+        return
+
+    # No failing test information
+    if not hasattr(rep.longrepr, "chain"):
+        return
+
+    if not os.path.exists(header_file):
+        with open(header_file, "wt") as fp:
+            test_label = os.environ.get("BUILDKITE_LABEL", "Unknown")
+            job_id = os.environ.get("BUILDKITE_JOB_ID")
+
+            fp.write(f"### Pytest failures for: [{test_label}](#{job_id})\n\n")
+
+    # Use `wt` here to overwrite so we only have one result per test (exclude retries)
+    with open(summary_file, "wt") as fp:
+        fp.write(_get_markdown_annotation(rep))
+
+
+def _get_markdown_annotation(rep) -> str:
+    # Main traceback is the last in the chain (where the last error is raised)
+    main_tb, main_loc, _ = rep.longrepr.chain[-1]
+    markdown = ""
+
+    # Header: Main error message
+    markdown += f"#### {rep.nodeid}\n\n"
+    markdown += "<details>\n"
+    markdown += f"<summary>{main_loc.message}</summary>\n\n"
+
+    # Add link to test definition
+    test_file, test_lineno, _test_node = rep.location
+    test_path, test_url = _get_repo_github_path_and_link(
+        os.path.abspath(test_file), test_lineno
+    )
+    markdown += f"Link to test: [{test_path}:{test_lineno}]({test_url})\n\n"
+
+    # Print main traceback
+    markdown += "##### Traceback\n\n"
+    markdown += "```\n"
+    markdown += str(main_tb)
+    markdown += "\n```\n\n"
+
+    # Print link to test definition in github
+    path, url = _get_repo_github_path_and_link(main_loc.path, main_loc.lineno)
+    markdown += f"[{path}:{main_loc.lineno}]({url})\n\n"
+
+    # If this is a longer exception chain, users can expand the full traceback
+    if len(rep.longrepr.chain) > 1:
+        markdown += "<details><summary>Full traceback</summary>\n\n"
+
+        # Here we just print each traceback and the link to the respective
+        # lines in GutHub
+        for tb, loc, _ in rep.longrepr.chain:
+            path, url = _get_repo_github_path_and_link(loc.path, loc.lineno)
+
+            markdown += "```\n"
+            markdown += str(tb)
+            markdown += "\n```\n\n"
+            markdown += f"[{path}:{loc.lineno}]({url})\n\n"
+
+        markdown += "</details>\n"
+
+    markdown += "</details>\n\n"
+    return markdown
+
+
+def _get_repo_github_path_and_link(file: str, lineno: int) -> Tuple[str, str]:
+    base_url = "https://github.com/ray-project/ray/blob/{commit}/{path}#L{lineno}"
+
+    commit = os.environ.get("BUILDKITE_COMMIT")
+
+    if not commit:
+        return file, ""
+
+    path = os.path.relpath(file, "/ray")
+
+    return path, base_url.format(commit=commit, path=path, lineno=lineno)
+
+
+def create_ray_logs_for_failed_test(rep):
+    """Creates artifact zip of /tmp/ray/session_latest/logs for failed tests"""
+
    # We temporarily restrict to Linux until we have artifact dirs
    # for Windows and Mac
    if platform.system() != "Linux":