diff --git a/.bazelrc b/.bazelrc index 7800d4459..f69f90921 100644 --- a/.bazelrc +++ b/.bazelrc @@ -31,6 +31,7 @@ build --host_copt="-Wno-microsoft-unqualified-friend" # This workaround is needed due to https://github.com/bazelbuild/bazel/issues/4341 build --per_file_copt="-\\.(asm|S)$,external/com_github_grpc_grpc/.*@-DGRPC_BAZEL_BUILD" build --http_timeout_scaling=5.0 +build --verbose_failures build:iwyu --experimental_action_listener=//:iwyu_cpp # Print relative paths when possible @@ -62,7 +63,6 @@ build:tsan --copt -fno-omit-frame-pointer build:tsan --linkopt -fsanitize=thread # Memory sanitizer configuration -# If you change these make sure you also change ci/asan_tests/bazelrc build:asan --strip=never build:asan --copt -g build:asan --copt -fsanitize=address @@ -71,17 +71,27 @@ build:asan --copt -DADDRESS_SANITIZER build:asan --copt -fno-omit-frame-pointer build:asan --linkopt -fsanitize=address build:asan --linkopt -fno-sanitize=vptr -test:asan --action_env=ASAN_OPTIONS=detect_leaks=0 +test:asan --jobs=1 +test:asan --test_env=ASAN_OPTIONS="detect_leaks=0" # This LD_PRELOAD is set for Travis. You will need to change it for local debugging. -test:asan --action_env=LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.2 +test:asan --test_env=LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libasan.so.2 /usr/lib/gcc/x86_64-linux-gnu/7/libasan.so" # For example, for Ubuntu 18.04 libasan can be found here: -# test:asan --action_env=LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so +# test:asan --test_env=LD_PRELOAD="/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so" +# CI configuration: +build:ci --color=yes +build:ci --curses=no +build:ci --disk_cache=~/ray-bazel-cache +build:ci --remote_cache="https://storage.googleapis.com/ray-bazel-cache" +build:ci --progress_report_interval=100 +build:ci --show_progress_rate_limit=15 +build:ci --show_task_finish +build:ci --ui_actions_shown=1024 +build:ci-travis --show_timestamps # Travis doesn't have an option to show timestamps, but GitHub Actions does +# GitHub Actions has low disk space, so prefer hardlinks there. +build:ci-github --experimental_repository_cache_hardlinks test:ci --flaky_test_attempts=3 test:ci --nocache_test_results -test:ci --progress_report_interval=100 -test:ci --show_progress_rate_limit=100 -test:ci --show_timestamps test:ci --spawn_strategy=local test:ci --test_output=errors test:ci --test_verbose_timeout_warnings diff --git a/.travis.yml b/.travis.yml index 80f0c0774..6ea7ac3b3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -33,7 +33,7 @@ matrix: - os: linux env: - - PYTHON=3.6 ENABLE_ASAN="--config=asan -j 2" + - PYTHON=3.6 BAZEL_CONFIG="asan" - PYTHONWARNINGS=ignore - RAY_DEFAULT_BUILD=1 install: @@ -42,7 +42,7 @@ matrix: - . ./ci/travis/ci.sh build script: # Run all C++ unit tests with ASAN enabled. ASAN adds too much overhead to run Python tests. - - bazel test $ENABLE_ASAN --config=ci --build_tests_only -- //:all + - bazel test --config=ci --build_tests_only -- //:all - os: osx osx_image: xcode7 @@ -176,7 +176,7 @@ matrix: before_script: - . ./ci/travis/ci.sh build script: - - ./ci/keep_alive bazel test --config=ci --test_output=errors --build_tests_only --test_tag_filters=learning_tests_tf rllib/... + - ./ci/keep_alive bazel test --config=ci --build_tests_only --test_tag_filters=learning_tests_tf rllib/... # RLlib: Learning tests with tf=1.x (from rllib/tuned_examples/*.yaml). # Requested by Edi (MS): Test all learning capabilities with tf1.x @@ -193,7 +193,7 @@ matrix: before_script: - . ./ci/travis/ci.sh build script: - - ./ci/keep_alive bazel test --config=ci --test_output=errors --build_tests_only --test_tag_filters=learning_tests_tf rllib/... + - ./ci/keep_alive bazel test --config=ci --build_tests_only --test_tag_filters=learning_tests_tf rllib/... # RLlib: Learning tests with torch (from rllib/tuned_examples/*.yaml). - os: linux @@ -209,7 +209,7 @@ matrix: before_script: - . ./ci/travis/ci.sh build script: - - ./ci/keep_alive bazel test --config=ci --test_output=errors --build_tests_only --test_tag_filters=learning_tests_torch rllib/... + - ./ci/keep_alive bazel test --config=ci --build_tests_only --test_tag_filters=learning_tests_torch rllib/... # RLlib: Quick Agent train.py runs (compilation & running, no(!) learning). # Agent single tests (compilation, loss-funcs, etc..). diff --git a/build.sh b/build.sh index 23b373026..d32b49257 100755 --- a/build.sh +++ b/build.sh @@ -128,7 +128,7 @@ pushd "$BUILD_DIR" if [ "$RAY_BUILD_JAVA" == "YES" ]; then - "$BAZEL_EXECUTABLE" build ${ENABLE_ASAN-} //java:ray_java_pkg --verbose_failures + "$BAZEL_EXECUTABLE" build //java:ray_java_pkg fi if [ "$RAY_BUILD_PYTHON" == "YES" ]; then @@ -160,7 +160,7 @@ if [ "$RAY_BUILD_PYTHON" == "YES" ]; then export PYTHON3_BIN_PATH="$PYTHON_EXECUTABLE" - "$BAZEL_EXECUTABLE" build ${ENABLE_ASAN-} //:ray_pkg --verbose_failures + "$BAZEL_EXECUTABLE" build //:ray_pkg fi popd diff --git a/ci/asan_tests/Makefile b/ci/asan_tests/Makefile deleted file mode 100644 index 09f2ca389..000000000 --- a/ci/asan_tests/Makefile +++ /dev/null @@ -1,28 +0,0 @@ -SHELL=/bin/bash - -echo: - @echo "Available commands: setup, run, recompile" - -setup: - echo "Setting up the environment" - pip install -r ray-project/requirements.txt - pip install -U pytest - - echo "Installing cython example" - cd "${HOME}"/ray/doc/examples/cython; python setup.py install --user - - echo "Settting up the shell" - cp bazelrc "${HOME}"/.bazelrc # Setup cache - echo "LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so" >> "${HOME}"/.bashrc - echo "ASAN_OPTIONS=detect_leaks=0" >> "${HOME}"/.bashrc - - echo "Compiling ray" - cd "${HOME}"/ray; git fetch; git pull origin master - cd "${HOME}"/ray/python; pip install -e . --verbose || true - -run: - ./run_asan_tests.sh - -recompile: - cd "${HOME}"/ray; git fetch; git checkout "${git_sha}" - cd "${HOME}"/ray/python; pip install -e . --verbose || true diff --git a/ci/asan_tests/bazelrc b/ci/asan_tests/bazelrc deleted file mode 100644 index b19a66b4d..000000000 --- a/ci/asan_tests/bazelrc +++ /dev/null @@ -1,30 +0,0 @@ -# This bazelrc specifies configuration for the release asan test. -# It exists because we want everything to be "default" configuration -# instead of "--config=ci" or "--config=asan". - -# Cache -build --remote_cache=https://storage.googleapis.com/ray-bazel-cache -build --disk_cache=/tmp/bazel-cache -build --repository_cache=/tmp/bazel-cache -build --remote_upload_local_results=false - -# Asan -build --strip=never -build --copt -g -build --copt -fsanitize=address -build --copt -fno-sanitize=vptr -build --copt -DADDRESS_SANITIZER -build --copt -fno-omit-frame-pointer -build --linkopt -fsanitize=address -build --linkopt -fno-sanitize=vptr -test --action_env=ASAN_OPTIONS=detect_leaks=0 -test --action_env=LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so - -# Travis test config -test --flaky_test_attempts=3 -test --nocache_test_results -test --show_timestamps -test --spawn_strategy=local -test --test_output=errors -test --test_verbose_timeout_warnings -test --jobs 1 diff --git a/ci/asan_tests/ray-project/project.yaml b/ci/asan_tests/ray-project/project.yaml index 1a7dcfedc..22add2a22 100644 --- a/ci/asan_tests/ray-project/project.yaml +++ b/ci/asan_tests/ray-project/project.yaml @@ -16,12 +16,12 @@ cluster: commands: - name: Setup - command: make setup + command: ./run_asan_tests.sh setup - name: Run - command: make run + command: ./run_asan_tests.sh run - name: Recompile command: | - git_sha={{git_sha}} make recompile + git_sha={{git_sha}} ./run_asan_tests.sh recompile params: - name: git_sha # Ray version string. default: "master" diff --git a/ci/asan_tests/run_asan_tests.sh b/ci/asan_tests/run_asan_tests.sh index eba288c1d..715676195 100755 --- a/ci/asan_tests/run_asan_tests.sh +++ b/ci/asan_tests/run_asan_tests.sh @@ -1,15 +1,58 @@ -set -e -set -x +#!/usr/bin/env bash -export LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so -export ASAN_OPTIONS=detect_leaks=0 +set -euxo pipefail -cd $HOME/ray -# async plasma test -python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd)" -# Ray tests -bazel test --test_tag_filters=-jenkins_only python/ray/serve/... -bazel test --test_tag_filters=-jenkins_only python/ray/dashboard/... -bazel test --test_tag_filters=-jenkins_only python/ray/tests/... -bazel test --test_tag_filters=-jenkins_only python/ray/tune/... +asan_install() { + (cd "${ROOT_DIR}"/../python && pip install -e . --verbose) +} + +asan_setup() { + echo "Setting up the environment" + pip install -r ray-project/requirements.txt + pip install -U pytest + + echo "Installing cython example" + (cd "${ROOT_DIR}"/../doc/examples/cython && python setup.py install --user) + + echo "Settting up the shell" + echo "build --config=asan" >> ~/.bazelrc # Setup cache + echo "LD_PRELOAD=/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so" >> ~/.bashrc + echo "ASAN_OPTIONS=detect_leaks=0" >> ~/.bashrc + + echo "Compiling ray" + git fetch + git pull origin master + asan_install || true +} + +asan_run() { + ( + export LD_PRELOAD="/usr/lib/gcc/x86_64-linux-gnu/7/libasan.so" + export ASAN_OPTIONS="detect_leaks=0" + + cd "${ROOT_DIR}"/../.. + + # async plasma test + python -m pytest -v --durations=5 --timeout=300 python/ray/experimental/test/async_test.py + + # Ray tests + bazel test --test_tag_filters=-jenkins_only python/ray/serve/... + bazel test --test_tag_filters=-jenkins_only python/ray/dashboard/... + bazel test --test_tag_filters=-jenkins_only python/ray/tests/... + bazel test --test_tag_filters=-jenkins_only python/ray/tune/... + ) +} + +asan_recompile() { + git fetch + git checkout "${git_sha}" + asan_install || true +} + +if [ 0 -lt "$#" ]; then + asan_"$@" +else + echo "Available commands: setup, run, recompile" +fi diff --git a/ci/travis/ci.sh b/ci/travis/ci.sh index 37fd3a0c9..9eb785c98 100755 --- a/ci/travis/ci.sh +++ b/ci/travis/ci.sh @@ -66,7 +66,7 @@ reload_env() { export TRAVIS_PULL_REQUEST fi - if [ -z "${TRAVIS_BRANCH-}" ] && [ -n "${GITHUB_WORKFLOW-}" ]; then + if [ "${GITHUB_ACTIONS-}" = true ] && [ -z "${TRAVIS_BRANCH-}" ]; then # Define TRAVIS_BRANCH to make Travis scripts run on GitHub Actions. TRAVIS_BRANCH="${GITHUB_BASE_REF:-${GITHUB_REF}}" # For pull requests, the base branch name TRAVIS_BRANCH="${TRAVIS_BRANCH#refs/heads/}" # Remove refs/... prefix @@ -403,7 +403,7 @@ _check_job_triggers() { fi if ! (set +x && should_run_job ${job_names//,/ }); then - if [ -n "${GITHUB_WORKFLOW-}" ]; then + if [ "${GITHUB_ACTIONS-}" = true ]; then # If this job is to be skipped, emit 'exit' into .bashrc to quickly exit all following steps. # This isn't needed on Travis (since everything runs in one shell), but is on GitHub Actions. cat <> ~/.bashrc @@ -448,7 +448,8 @@ init() { build() { if ! need_wheels; then - bazel build ${ENABLE_ASAN-} -k "//:*" # Do a full build first to ensure everything passes + # NOTE: Do not add build flags here. Use .bazelrc and --config instead. + bazel build -k "//:*" # Full build first, since pip install will build only a subset of targets install_ray if [ "${LINT-}" = 1 ]; then # Try generating Sphinx documentation. To do this, we need to install Ray first. @@ -470,7 +471,7 @@ build() { } _main() { - if [ -n "${GITHUB_WORKFLOW-}" ]; then + if [ "${GITHUB_ACTIONS-}" = true ]; then exec 2>&1 # Merge stdout and stderr to prevent out-of-order buffering issues reload_env fi diff --git a/ci/travis/install-bazel.sh b/ci/travis/install-bazel.sh index e23eaf06a..ccd3dab04 100755 --- a/ci/travis/install-bazel.sh +++ b/ci/travis/install-bazel.sh @@ -53,7 +53,7 @@ else target="./install.sh" curl -f -s -L -R -o "${target}" "https://github.com/bazelbuild/bazel/releases/download/${version}/bazel-${version}-installer-${platform}-${achitecture}.sh" chmod +x "${target}" - if [ "${TRAVIS-}" = true ] || [ -n "${GITHUB_WORKFLOW-}" ]; then + if [ "${CI-}" = true ]; then sudo "${target}" > /dev/null # system-wide install for CI command -V bazel 1>&2 else @@ -62,36 +62,26 @@ else rm -f "${target}" fi +for bazel_cfg in ${BAZEL_CONFIG-}; do + echo "build --config=${bazel_cfg}" >> ~/.bazelrc +done if [ "${TRAVIS-}" = true ]; then - # Use bazel disk cache if this script is running in Travis. - cat <> "${HOME}/.bazelrc" -build --show_timestamps # Travis doesn't have an option to show timestamps, but GitHub Actions does -# If we are in Travis, most of the compilation result will be cached. -# This means we are I/O bounded. By default, Bazel set the number of concurrent -# jobs to the the number cores on the machine, which are not efficient for -# network bounded cache downloading workload. Therefore we increase the number -# of jobs to 50 -build --jobs=50 -EOF + echo "build --config=ci-travis" >> ~/.bazelrc + + # If we are in Travis, most of the compilation result will be cached. + # This means we are I/O bounded. By default, Bazel set the number of concurrent + # jobs to the the number cores on the machine, which are not efficient for + # network bounded cache downloading workload. Therefore we increase the number + # of jobs to 50 + # NOTE: Normally --jobs should be under 'build:ci-travis' in .bazelrc, but we put + # it under 'build' here avoid conflicts with other --config options. + echo "build --jobs=50" >> ~/.bazelrc fi -if [ -n "${GITHUB_WORKFLOW-}" ]; then - cat <> "${HOME}/.bazelrc" ---output_base=".bazel-out" # On GitHub Actions, staying on the same volume seems to be faster -EOF +if [ "${GITHUB_ACTIONS-}" = true ]; then + echo "build --config=ci-github" >> ~/.bazelrc fi -if [ "${TRAVIS-}" = true ] || [ -n "${GITHUB_WORKFLOW-}" ]; then - cat <> "${HOME}/.bazelrc" -# CI output doesn't scroll, so don't use curses -build --color=yes -build --curses=no -build --disk_cache="$(test "${OSTYPE}" = msys || echo ~/ray-bazel-cache)" -# Use ray google cloud cache -build --remote_cache="https://storage.googleapis.com/ray-bazel-cache" -build --show_progress_rate_limit=15 -build --show_task_finish -build --ui_actions_shown=1024 -build --verbose_failures -EOF +if [ "${CI-}" = true ]; then + echo "build --config=ci" >> ~/.bazelrc # If we are in master build, we can write to the cache as well. upload=0 if [ "${TRAVIS_PULL_REQUEST-false}" = false ]; then @@ -113,16 +103,16 @@ EOF fi fi if [ 0 -ne "${upload}" ]; then - translated_path="${HOME}/bazel_cache_credential.json" + translated_path=~/bazel_cache_credential.json if [ "${OSTYPE}" = msys ]; then # On Windows, we need path translation translated_path="$(cygpath -m -- "${translated_path}")" fi - cat <> "${HOME}/.bazelrc" + cat <> ~/.bazelrc build --google_credentials="${translated_path}" EOF else echo "Using remote build cache in read-only mode." 1>&2 - cat <> "${HOME}/.bazelrc" + cat <> ~/.bazelrc build --remote_upload_local_results=false EOF fi diff --git a/ci/travis/install-dependencies.sh b/ci/travis/install-dependencies.sh index 24dc2c5dd..90a6d3d16 100755 --- a/ci/travis/install-dependencies.sh +++ b/ci/travis/install-dependencies.sh @@ -162,7 +162,7 @@ install_pip() { "${python}" -m pip install --upgrade --quiet pip # If we're in a CI environment, do some configuration - if [ "${TRAVIS-}" = true ] || [ -n "${GITHUB_WORKFLOW-}" ]; then + if [ "${CI-}" = true ]; then "${python}" -W ignore -m pip config -q --user set global.disable-pip-version-check True "${python}" -W ignore -m pip config -q --user set global.no-color True "${python}" -W ignore -m pip config -q --user set global.progress_bar off