[ci/release] Migrate horovod tests (#22951)

Migrating horovod tests to new release package.

https://buildkite.com/ray-project/release-tests-branch/builds/125
This commit is contained in:
Kai Fricke 2022-03-11 09:53:29 +00:00 committed by GitHub
parent aed17dd346
commit 5b2d58674b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 3 deletions

View file

@ -253,9 +253,9 @@ NIGHTLY_TESTS = {
} }
WEEKLY_TESTS = { WEEKLY_TESTS = {
"~/ray/release/horovod_tests/horovod_tests.yaml": [ # "~/ray/release/horovod_tests/horovod_tests.yaml": [
"horovod_test", # "horovod_test",
], # ],
"~/ray/release/long_running_distributed_tests" "~/ray/release/long_running_distributed_tests"
"/long_running_distributed.yaml": [ "/long_running_distributed.yaml": [
"pytorch_pbt_failure", "pytorch_pbt_failure",

View file

@ -2040,6 +2040,44 @@
alert: '' alert: ''
#########################
# Horovod tests
#########################
- name: horovod_tune_test
group: Horovod tests
working_dir: horovod_tests
legacy:
test_name: horovod_test
test_suite: horovod_tests
frequency: weekly
team: ml
cluster:
cluster_env: app_config_master.yaml
cluster_compute: compute_tpl.yaml
run:
timeout: 36000
script: python workloads/horovod_tune_test.py
long_running: true
wait_for_nodes:
num_nodes: 3
timeout: 600
type: sdk_command
file_manager: job
smoke_test:
frequency: disabled
run:
timeout: 1800
alert: default
######################### #########################
# Core Scalability Tests # Core Scalability Tests
######################### #########################