From 9d0148dbbe418bcf4a4456e5b029ae721ee5fba3 Mon Sep 17 00:00:00 2001 From: SangBin Cho Date: Mon, 7 Mar 2022 11:24:54 +0900 Subject: [PATCH] [Test] Migrate the first test to the new infra (#22770) This migrate the simplest nightly test to the new infra. I will also explore k8s migration with this test --- release/nightly_tests/nightly_tests.yaml | 16 ++++++------- release/nightly_tests/wait_cluster.py | 11 +-------- .../command_runner/_wait_cluster.py | 1 + release/release_tests.yaml | 23 +++++++++++++++++++ 4 files changed, 33 insertions(+), 18 deletions(-) diff --git a/release/nightly_tests/nightly_tests.yaml b/release/nightly_tests/nightly_tests.yaml index 8c20382f9..1836ae07e 100644 --- a/release/nightly_tests/nightly_tests.yaml +++ b/release/nightly_tests/nightly_tests.yaml @@ -3,15 +3,15 @@ # # Test basic single node 10GB shuffle with a small number of partitions. # This doesn't require object spilling. -- name: shuffle_10gb - team: core - cluster: - app_config: shuffle/shuffle_app_config.yaml - compute_template: shuffle/shuffle_compute_single.yaml +# - name: shuffle_10gb +# team: core +# cluster: +# app_config: shuffle/shuffle_app_config.yaml +# compute_template: shuffle/shuffle_compute_single.yaml - run: - timeout: 3000 - script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=200e6 +# run: +# timeout: 3000 +# script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=200e6 # Test single node 50GB shuffle with a large number of partitions. - name: shuffle_50gb diff --git a/release/nightly_tests/wait_cluster.py b/release/nightly_tests/wait_cluster.py index cbabb9a5e..f70088289 100644 --- a/release/nightly_tests/wait_cluster.py +++ b/release/nightly_tests/wait_cluster.py @@ -26,15 +26,6 @@ start = time.time() next_feedback = start max_time = start + args.max_time_s - -def num_alive_nodes(): - n = 0 - for node in ray.nodes(): - if node.get("Alive", False): - n += 1 - return n - - while not curr_nodes >= args.num_nodes: now = time.time() @@ -54,7 +45,7 @@ while not curr_nodes >= args.num_nodes: next_feedback = now + args.feedback_interval_s time.sleep(5) - curr_nodes = num_alive_nodes() + curr_nodes = len(ray.nodes()) passed = time.time() - start print( diff --git a/release/ray_release/command_runner/_wait_cluster.py b/release/ray_release/command_runner/_wait_cluster.py index c02330db2..f70088289 100644 --- a/release/ray_release/command_runner/_wait_cluster.py +++ b/release/ray_release/command_runner/_wait_cluster.py @@ -25,6 +25,7 @@ curr_nodes = 0 start = time.time() next_feedback = start max_time = start + args.max_time_s + while not curr_nodes >= args.num_nodes: now = time.time() diff --git a/release/release_tests.yaml b/release/release_tests.yaml index cc94daac2..96328970b 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -761,3 +761,26 @@ alert: tune_tests +######################## +# Core Nightly Tests +######################## + +- name: shuffle_10gb + group: Core nightly tests + team: core + working_dir: nightly_tests + + legacy: + test_name: shuffle_10gb + test_suite: nightly_tests + + cluster: + cluster_env: shuffle/shuffle_app_config.yaml + cluster_compute: shuffle/shuffle_compute_single.yaml + + run: + timeout: 3000 + script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=200e6 + + type: sdk_command + file_manager: sdk