[ci/release] Increase long running timeout, fix artifacts copy (#21905)

With the new job-based file copy, fetching results takes longer. We thus have to increase the long running update test check times in order not to run into bogus release test failures.
Also fixes artifact uploading issues.
This commit is contained in:
Kai Fricke 2022-01-26 21:25:03 +00:00 committed by GitHub
parent f4e8784890
commit 3b73a62dad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 5 deletions

View file

@ -21,15 +21,15 @@ def handle_result(created_on: datetime.datetime, category: str,
"object_spilling_shuffle",
]:
# Core tests
target_update_diff = 120
target_update_diff = 300
elif test_name in ["apex", "impala", "many_ppo", "pbt"]:
# Tune/RLLib style tests
target_update_diff = 360
target_update_diff = 480
elif test_name in ["serve", "serve_failure"]:
# Serve tests have workload logs every five minutes.
# Leave up to 60 seconds overhead.
target_update_diff = 360
# Leave up to 180 seconds overhead.
target_update_diff = 480
else:
return None

View file

@ -90,6 +90,7 @@ done
RAY_TEST_REPO=${RAY_TEST_REPO-https://github.com/ray-project/ray.git}
RAY_TEST_BRANCH=${RAY_TEST_BRANCH-master}
RELEASE_RESULTS_DIR=${RELEASE_RESULTS_DIR-/tmp/artifacts}
export RAY_REPO RAY_BRANCH RAY_VERSION RAY_WHEELS RAY_TEST_REPO RAY_TEST_BRANCH RELEASE_RESULTS_DIR
@ -119,6 +120,8 @@ while [ "$RETRY_NUM" -lt "$MAX_RETRIES" ]; do
sleep ${SLEEP_TIME}
fi
sudo rm -rf "${RELEASE_RESULTS_DIR}"/* || true
python e2e.py "$@"
EXIT_CODE=$?
REASON=$(reason "${EXIT_CODE}")
@ -140,7 +143,8 @@ while [ "$RETRY_NUM" -lt "$MAX_RETRIES" ]; do
done
sudo cp -rf /tmp/artifacts/* /tmp/ray_release_test_artifacts || true
sudo rm -rf /tmp/ray_release_test_artifacts/* || true
sudo cp -rf "${RELEASE_RESULTS_DIR}"/* /tmp/ray_release_test_artifacts/ || true
echo "----------------------------------------"
echo "e2e test finished with final exit code ${EXIT_CODE} after ${RETRY_NUM}/${MAX_RETRIES} tries"