mirror of
https://github.com/vale981/ray
synced 2025-03-04 17:41:43 -05:00
[CI] Create zip of ray session_latest/logs
dir on test failure and upload to buildkite via /artifact-mount
(#23783)
Creates a zip of session_latest dir with test name and timestamp upon python test failure. Writes to dir specified by env var `RAY_TEST_FAILURE_LOGS_DIR`. Noop if env var does not exist. Downstream consumer (e.g. CI) can upload all created artifacts in this dir. Thereby, PR submitters can more easily debug their CI failures, especially if they can't repro locally. Limitations: - a conftest.py file importing the main ray conftest.py needs to be present in same dir as test. This presents a challenge for e.g. dashboard tests which are highly scattered
This commit is contained in:
parent
1807cff9b6
commit
e6a458a31e
13 changed files with 69 additions and 3 deletions
|
@ -43,7 +43,7 @@ The following practices can avoid such pitfalls while maintaining intuitive cont
|
|||
(The sheer length of the script is a secondary concern and can be mitigated by keeping functions modular.)
|
||||
|
||||
- Avoid adding new scripts if possible. If it's necessary that you do so, call them instead of sourcing them.
|
||||
Note that thies implies new scripts should not modify the environment, or the caller will not see such changes!
|
||||
Note that this implies new scripts should not modify the environment, or the caller will not see such changes!
|
||||
|
||||
- Always add code inside a function, not at global scope. Use `local` for variables where it makes sense.
|
||||
However, be careful and know the shell rules: for example, e.g. `local x=$(false)` succeeds even under `set -e`.
|
||||
|
|
3
ci/ci.sh
3
ci/ci.sh
|
@ -191,7 +191,8 @@ test_python() {
|
|||
|
||||
# For running large Python tests on Linux and MacOS.
|
||||
test_large() {
|
||||
bazel test --config=ci "$(./ci/run/bazel_export_options)" --test_env=CONDA_EXE --test_env=CONDA_PYTHON_EXE \
|
||||
# shellcheck disable=SC2046
|
||||
bazel test --config=ci $(./ci/run/bazel_export_options) --test_env=CONDA_EXE --test_env=CONDA_PYTHON_EXE \
|
||||
--test_env=CONDA_SHLVL --test_env=CONDA_PREFIX --test_env=CONDA_DEFAULT_ENV --test_env=CONDA_PROMPT_MODIFIER \
|
||||
--test_env=CI --test_tag_filters="large_size_python_tests_shard_${BUILDKITE_PARALLEL_JOB}" \
|
||||
-- python/ray/tests/...
|
||||
|
|
|
@ -1,4 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
mkdir -p /tmp/bazel_event_logs
|
||||
echo "--build_event_json_file=$(mktemp /tmp/bazel_event_logs/bazel_log.XXXXX)"
|
||||
event_json_flag=--build_event_json_file=$(mktemp /tmp/bazel_event_logs/bazel_log.XXXXX)
|
||||
logs_archive_flag=--test_env=RAY_TEST_FAILURE_LOGS_ARCHIVE_DIR=/artifact-mount/.failed_test_logs
|
||||
|
||||
echo "${event_json_flag} ${logs_archive_flag}"
|
||||
|
|
|
@ -9,6 +9,9 @@ from ray.data.block import BlockAccessor
|
|||
from ray.data.tests.mock_server import * # noqa
|
||||
from ray.data.datasource.file_based_datasource import BlockWritePathProvider
|
||||
|
||||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
def aws_credentials():
|
||||
|
|
2
python/ray/experimental/dag/tests/conftest.py
Normal file
2
python/ray/experimental/dag/tests/conftest.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
2
python/ray/ml/tests/conftest.py
Normal file
2
python/ray/ml/tests/conftest.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
|
@ -8,6 +8,8 @@ import random
|
|||
import ray
|
||||
from ray import serve
|
||||
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
||||
|
||||
# https://tools.ietf.org/html/rfc6335#section-6
|
||||
MIN_DYNAMIC_PORT = 49152
|
||||
MAX_DYNAMIC_PORT = 65535
|
||||
|
|
|
@ -11,6 +11,9 @@ import json
|
|||
import time
|
||||
from pathlib import Path
|
||||
from unittest import mock
|
||||
import shutil
|
||||
import platform
|
||||
from tempfile import gettempdir
|
||||
|
||||
import ray
|
||||
import ray.ray_constants as ray_constants
|
||||
|
@ -679,3 +682,40 @@ def set_bad_runtime_env_cache_ttl_seconds(request):
|
|||
os.environ["BAD_RUNTIME_ENV_CACHE_TTL_SECONDS"] = ttl
|
||||
yield ttl
|
||||
del os.environ["BAD_RUNTIME_ENV_CACHE_TTL_SECONDS"]
|
||||
|
||||
|
||||
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
|
||||
def pytest_runtest_makereport(item, call):
|
||||
# execute all other hooks to obtain the report object
|
||||
outcome = yield
|
||||
rep = outcome.get_result()
|
||||
|
||||
# We temporarily restrict to Linux until we have artifact dirs
|
||||
# for Windows and Mac
|
||||
if platform.system() != "Linux":
|
||||
return
|
||||
|
||||
# Only archive failed tests after the "call" phase of the test
|
||||
if rep.when != "call" or not rep.failed:
|
||||
return
|
||||
|
||||
# Get dir to write zipped logs to
|
||||
archive_dir = os.environ.get("RAY_TEST_FAILURE_LOGS_ARCHIVE_DIR")
|
||||
|
||||
if not archive_dir:
|
||||
return
|
||||
|
||||
if not os.path.exists(archive_dir):
|
||||
os.makedirs(archive_dir)
|
||||
|
||||
# Get logs dir from the latest ray session
|
||||
tmp_dir = gettempdir()
|
||||
logs_dir = os.path.join(tmp_dir, "ray", "session_latest", "logs")
|
||||
|
||||
if not os.path.exists(logs_dir):
|
||||
return
|
||||
|
||||
# Write zipped logs to logs archive dir
|
||||
test_name = rep.nodeid.replace(os.sep, "::")
|
||||
output_file = os.path.join(archive_dir, f"{test_name}_{time.time():.4f}")
|
||||
shutil.make_archive(output_file, "zip", logs_dir)
|
||||
|
|
2
python/ray/train/tests/conftest.py
Normal file
2
python/ray/train/tests/conftest.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
|
@ -23,6 +23,9 @@ from ray.train.constants import (
|
|||
from ray.train.worker_group import WorkerGroup
|
||||
from ray.util.placement_group import get_current_placement_group
|
||||
|
||||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ray_start_2_cpus():
|
||||
|
|
2
python/ray/tune/tests/conftest.py
Normal file
2
python/ray/tune/tests/conftest.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
|
@ -7,6 +7,9 @@ from mock_server import start_service, stop_process
|
|||
|
||||
import tempfile
|
||||
from ray.tests.conftest import get_default_fixture_ray_kwargs
|
||||
|
||||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
||||
import os
|
||||
import uuid
|
||||
from ray.workflow.tests import utils
|
||||
|
|
|
@ -1 +1,4 @@
|
|||
from ray.tests.conftest import ray_start_regular_shared # noqa: F401
|
||||
|
||||
# Trigger pytest hook to automatically zip test cluster logs to archive dir on failure
|
||||
from ray.tests.conftest import pytest_runtest_makereport # noqa
|
||||
|
|
Loading…
Add table
Reference in a new issue