From 5b2d58674b4e168801a792093431b893c2585785 Mon Sep 17 00:00:00 2001 From: Kai Fricke Date: Fri, 11 Mar 2022 09:53:29 +0000 Subject: [PATCH] [ci/release] Migrate horovod tests (#22951) Migrating horovod tests to new release package. https://buildkite.com/ray-project/release-tests-branch/builds/125 --- release/.buildkite/build_pipeline.py | 6 ++--- release/release_tests.yaml | 38 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/release/.buildkite/build_pipeline.py b/release/.buildkite/build_pipeline.py index 0db26215d..98d37a4b2 100644 --- a/release/.buildkite/build_pipeline.py +++ b/release/.buildkite/build_pipeline.py @@ -253,9 +253,9 @@ NIGHTLY_TESTS = { } WEEKLY_TESTS = { - "~/ray/release/horovod_tests/horovod_tests.yaml": [ - "horovod_test", - ], + # "~/ray/release/horovod_tests/horovod_tests.yaml": [ + # "horovod_test", + # ], "~/ray/release/long_running_distributed_tests" "/long_running_distributed.yaml": [ "pytorch_pbt_failure", diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 7463f5e91..ffe27290d 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -2040,6 +2040,44 @@ alert: '' +######################### +# Horovod tests +######################### +- name: horovod_tune_test + group: Horovod tests + working_dir: horovod_tests + + legacy: + test_name: horovod_test + test_suite: horovod_tests + + frequency: weekly + team: ml + + cluster: + cluster_env: app_config_master.yaml + cluster_compute: compute_tpl.yaml + + run: + timeout: 36000 + script: python workloads/horovod_tune_test.py + long_running: true + wait_for_nodes: + num_nodes: 3 + timeout: 600 + + type: sdk_command + file_manager: job + + smoke_test: + frequency: disabled + + run: + timeout: 1800 + + alert: default + + ######################### # Core Scalability Tests #########################