[release] Fix special cases in release test package (e.g. smoke test) (#22442)

Fixing special cases (e.g. smoke tests, long running tests) in the release test package infrastructure. Prepare migration of Tune and XGBoost tests.
2025-03-05 10:01:43 -05:00 · 2022-02-28 21:05:01 +01:00 · 2022-02-28 21:05:01 +01:00 · 3695408a85
commit 3695408a85
parent ba4f1423c7
12 changed files with 559 additions and 50 deletions
--- a/release/BUILD
+++ b/release/BUILD
@ -85,10 +85,10 @@ py_test(
 )

 py_test(
-    name = "test_repeat",
+    name = "test_run_script",
    tags = ["team:ml", "release_unit"],
    size = "small",
-    srcs = ["ray_release/tests/test_repeat.py"]
+    srcs = ["ray_release/tests/test_run_script.py"]
 )

 py_test(
--- a/release/ray_release/cluster_manager/cluster_manager.py
+++ b/release/ray_release/cluster_manager/cluster_manager.py
@ -34,6 +34,13 @@ class ClusterManager(abc.ABC):

    def set_cluster_env(self, cluster_env: Dict[str, Any]):
        self.cluster_env = cluster_env
+
+        # Add flags for redisless Ray
+        self.cluster_env.setdefault("env_vars", {})
+        self.cluster_env["env_vars"]["MATCH_AUTOSCALER_AND_RAY_IMAGES"] = "1"
+        self.cluster_env["env_vars"]["RAY_bootstrap_with_gcs"] = "1"
+        self.cluster_env["env_vars"]["RAY_gcs_storage"] = "memory"
+
        self.cluster_env_name = (
            f"{self.project_name}_{self.project_id[4:8]}"
            f"__env__{self.test_name}__"
--- a/release/ray_release/glue.py
+++ b/release/ray_release/glue.py
@ -214,6 +214,8 @@ def run_release_test(
            command = f"{command} --smoke-test"
            command_env["IS_SMOKE_TEST"] = "1"

+        is_long_running = test["run"].get("long_running", False)
+
        try:
            command_runner.run_command(
                command, env=command_env, timeout=command_timeout
@ -221,7 +223,9 @@ def run_release_test(
        except CommandError as e:
            raise TestCommandError(e)
        except CommandTimeout as e:
-            raise TestCommandTimeout(e)
+            if not is_long_running:
+                # Only raise error if command is not long running
+                raise TestCommandTimeout(e)

        try:
            command_results = command_runner.fetch_results()
--- a/release/ray_release/scripts/build_pipeline.py
+++ b/release/ray_release/scripts/build_pipeline.py
@ -20,6 +20,9 @@ from ray_release.logger import logger
 from ray_release.wheels import find_and_wait_for_ray_wheels_url


+PIPELINE_ARTIFACT_PATH = "/tmp/pipeline_artifacts"
+
+
@click.command()
@click.option(
    "--test-collection-file",
@ -118,6 +121,19 @@ def main(test_collection_file: Optional[str] = None):
        group_step = {"group": group, "steps": group_steps}
        steps.append(group_step)

+    if "BUILDKITE" in os.environ:
+        if os.path.exists(PIPELINE_ARTIFACT_PATH):
+            shutil.rmtree(PIPELINE_ARTIFACT_PATH)
+
+        os.makedirs(PIPELINE_ARTIFACT_PATH, exist_ok=True, mode=0o755)
+
+        with open(os.path.join(PIPELINE_ARTIFACT_PATH, "pipeline.json"), "wt") as fp:
+            json.dump(steps, fp)
+
+        settings["frequency"] = settings["frequency"].value
+        with open(os.path.join(PIPELINE_ARTIFACT_PATH, "settings.json"), "wt") as fp:
+            json.dump(settings, fp)
+
    steps_str = json.dumps(steps)
    print(steps_str)

--- a/release/ray_release/scripts/convert_legacy_config.py
+++ b/release/ray_release/scripts/convert_legacy_config.py
@ -6,6 +6,17 @@ import click
 import yaml


+class FormatDumper(yaml.SafeDumper):
+    last_indent = 0
+
+    def write_line_break(self, data=None):
+        if (self.indent or 0) < self.last_indent:
+            super().write_line_break()
+
+        super().write_line_break(data)
+        self.last_indent = self.indent or 0
+
+
 def replace_prepare(dt: Dict):
    if "prepare" in dt and "wait_cluster" in dt["prepare"]:
        _, _, nodes, timeout = dt.pop("prepare").split(" ")
@ -42,6 +53,11 @@ def main(legacy_config: str, prefix: str, group: str, alert: str):
            "cluster_compute": old["cluster"]["compute_template"],
        }

+        if "cloud_id" in old["cluster"]:
+            test["cluster"]["cloud_id"] = old["cluster"]["cloud_id"]
+        if "cloud_name" in old["cluster"]:
+            test["cluster"]["cloud_name"] = old["cluster"]["cloud_name"]
+
        if "driver_setup" in old:
            test["driver_setup"] = "driver_setup"

@ -65,7 +81,7 @@ def main(legacy_config: str, prefix: str, group: str, alert: str):

        tests.append(test)

-    yaml.dump(tests, sys.stdout, sort_keys=False)
+    yaml.dump(tests, sys.stdout, Dumper=FormatDumper, sort_keys=False)
    sys.stdout.flush()


--- a/release/ray_release/scripts/run_release_test.py
+++ b/release/ray_release/scripts/run_release_test.py
@ -124,6 +124,7 @@ def main(
            result=result,
            ray_wheels_url=ray_wheels_url,
            reporters=reporters,
+            smoke_test=smoke_test,
            cluster_id=cluster_id,
            cluster_env_id=cluster_env_id,
            no_terminate=no_terminate,
--- a/release/ray_release/tests/_test_catch_args.py
+++ b/release/ray_release/tests/_test_catch_args.py
@ -0,0 +1,14 @@
+import json
+import sys
+
+
+def main():
+    argv_file = sys.argv[1]
+    with open(argv_file, "wt") as fp:
+        json.dump(sys.argv, fp)
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
--- a/release/ray_release/tests/test_buildkite.py
+++ b/release/ray_release/tests/test_buildkite.py
@ -11,6 +11,7 @@ from ray_release.buildkite.settings import (
    Frequency,
    update_settings_from_buildkite,
 )
+from ray_release.buildkite.step import get_step
 from ray_release.config import Test
 from ray_release.exception import ReleaseTestConfigError
 from ray_release.wheels import (
@ -221,3 +222,19 @@ class BuildkiteSettingsTest(unittest.TestCase):
            [t["name"] for t, _ in grouped["x"]], ["x1", "x2", "x3"]
        )
        self.assertEqual(len(grouped["y"]), 1)
+
+    def testGetStep(self):
+        test = Test(
+            {
+                "name": "test",
+                "frequency": "nightly",
+                "run": {"script": "test_script.py"},
+                "smoke_test": {"frequency": "multi"},
+            }
+        )
+
+        step = get_step(test, smoke_test=False)
+        self.assertNotIn("--smoke-test", step["command"])
+
+        step = get_step(test, smoke_test=True)
+        self.assertIn("--smoke-test", step["command"])
--- a/release/ray_release/tests/test_glue.py
+++ b/release/ray_release/tests/test_glue.py
@ -1,6 +1,7 @@
 import os
 import shutil
 import tempfile
+import time
 import unittest
 from typing import Type, Callable
 from unittest.mock import patch
@ -213,7 +214,10 @@ class GlueTest(unittest.TestCase):
        if until == "test_command":
            return

-        self.command_runner_return["fetch_results"] = {"time_taken": 50}
+        self.command_runner_return["fetch_results"] = {
+            "time_taken": 50,
+            "last_update": time.time() - 60,
+        }

        if until == "fetch_results":
            return
@ -495,6 +499,26 @@ class GlueTest(unittest.TestCase):
        # Ensure cluster was terminated
        self.assertGreaterEqual(self.sdk.call_counter["terminate_cluster"], 1)

+    def testTestCommandTimeoutLongRunning(self):
+        result = Result()
+
+        self._succeed_until("fetch_results")
+
+        # Test command times out
+        self.command_runner_return["run_command"] = _fail_on_call(CommandTimeout)
+        with self.assertRaises(TestCommandTimeout):
+            self._run(result)
+        self.assertEqual(result.return_code, ExitCode.COMMAND_TIMEOUT.value)
+
+        # But now set test to long running
+        self.test["run"]["long_running"] = True
+        self._run(result)  # Will not fail this time
+
+        self.assertGreaterEqual(result.results["last_update_diff"], 60.0)
+
+        # Ensure cluster was terminated
+        self.assertGreaterEqual(self.sdk.call_counter["terminate_cluster"], 1)
+
    def testFetchResultFails(self):
        result = Result()

--- a/release/ray_release/tests/test_run_script.py
+++ b/release/ray_release/tests/test_run_script.py
@ -1,3 +1,4 @@
+import json
 import os
 import shutil
 import subprocess
@ -7,7 +8,7 @@ import unittest
 from ray_release.result import ExitCode


-class WheelsFinderTest(unittest.TestCase):
+class RunScriptTest(unittest.TestCase):
    def setUp(self) -> None:
        self.tempdir = tempfile.mkdtemp()
        self.state_file = os.path.join(self.tempdir, "state.txt")
@ -18,9 +19,7 @@ class WheelsFinderTest(unittest.TestCase):
        os.environ["NO_INSTALL"] = "1"
        os.environ["NO_CLONE"] = "1"
        os.environ["NO_ARTIFACTS"] = "1"
-        os.environ["RAY_TEST_SCRIPT"] = (
-            "ray_release/tests/" "_test_run_release_test_sh.py"
-        )
+        os.environ["RAY_TEST_SCRIPT"] = "ray_release/tests/_test_run_release_test_sh.py"
        os.environ["OVERRIDE_SLEEP_TIME"] = "0"

    def tearDown(self) -> None:
@ -86,3 +85,19 @@ class WheelsFinderTest(unittest.TestCase):
            ExitCode.COMMAND_ALERT.value,
        )
        self.assertEquals(self._read_state(), 2)
+
+    def testParameters(self):
+        os.environ["RAY_TEST_SCRIPT"] = "ray_release/tests/_test_catch_args.py"
+        argv_file = tempfile.mktemp()
+
+        subprocess.check_call(
+            f"{self.test_script} " f"{argv_file} " f"--smoke-test",
+            shell=True,
+        )
+
+        with open(argv_file, "rt") as fp:
+            data = json.load(fp)
+
+        os.unlink(argv_file)
+
+        self.assertIn("--smoke-test", data)
--- a/release/release_tests.yaml
+++ b/release/release_tests.yaml
@ -17,7 +17,7 @@
 #
 #  # How often to run the tests.
 #  # One of [disabled, any, multi, nightly, weekly].
-#  frequency: weekly
+#  frequency: disabled  # weekly
 #  # Owning team. This field will be persisted to the database
 #  team: ml
 #
@ -49,7 +49,7 @@
 #
 #    # File manager to use to transfer files to and from the cluster.
 #    # Can be any of [sdk, client, job].
-#    file_manager: job
+#    file_manager: sdk
 #
 #    # If you want to wait for nodes to be ready, you can specify this here:
 #    wait_for_nodes:
@ -77,7 +77,7 @@
 #  smoke_test:
 #    # Smoke tests can have different frequencies. A smoke test is only triggered
 #    # when the regular test is not matched.
-#    frequency: nightly
+#    frequency: disabled  # nightly
 #    # Here we adjust the run timeout down and run on less nodes. The test script
 #    # remains the same.
 #    run:
@ -130,7 +130,7 @@
    test_name: train_moderate
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -146,7 +146,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -158,7 +158,7 @@
    test_name: train_gpu
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -174,7 +174,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -186,7 +186,7 @@
    test_name: distributed_api_test
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -201,7 +201,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -213,7 +213,7 @@
    test_name: ft_small_elastic
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -229,7 +229,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -241,7 +241,7 @@
    test_name: ft_small_non_elastic
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -257,7 +257,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -269,7 +269,7 @@
    test_name: tune_small
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -285,7 +285,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -297,7 +297,7 @@
    test_name: tune_32x4
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -313,7 +313,7 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests

@ -325,7 +325,7 @@
    test_name: tune_4x32
    test_suite: xgboost_tests

-  frequency: nightly
+  frequency: disabled  # nightly
  team: ml

  cluster:
@ -341,6 +341,423 @@
      timeout: 600

    type: sdk_command
-    file_manager: job
+    file_manager: sdk

  alert: xgboost_tests
+
+#######################
+# Tune cloud  tests
+#######################
+- name: tune_cloud_aws_no_sync_down
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_no_sync_down
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py no_sync_down
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_ssh_sync
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_ssh_sync
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py ssh_sync
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_durable_upload
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_durable_upload
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py durable_upload --bucket s3://data-test-ilr/durable_upload
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_durable_upload_rllib_str
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_durable_upload_rllib_str
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config_ml.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+
+    script: python workloads/run_cloud_test.py durable_upload --trainable rllib_str
+      --bucket s3://data-test-ilr/durable_upload_rllib_str
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_durable_upload_rllib_trainer
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_durable_upload_rllib_trainer
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config_ml.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py durable_upload --trainable rllib_trainer
+      --bucket s3://data-test-ilr/durable_upload_rllib_trainer
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_gcp_k8s_no_sync_down
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: gcp_k8s_no_sync_down
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_gcp_k8s_4x8.yaml
+    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py no_sync_down --cpus-per-trial 8
+    type: client
+
+  alert: tune_tests
+
+- name: tune_cloud_gcp_k8s_ssh_sync
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: gcp_k8s_ssh_sync
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_gcp_k8s_4x8.yaml
+    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py ssh_sync --cpus-per-trial 8
+    type: client
+
+  alert: tune_tests
+
+- name: tune_cloud_gcp_k8s_durable_upload
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: gcp_k8s_durable_upload
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_gcp_k8s_4x8.yaml
+    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py durable_upload --cpus-per-trial 8 --bucket gs://jun-riot-test/durable_upload
+    type: client
+
+  alert: tune_tests
+
+
+########################
+# Tune scalability tests
+########################
+
+- name: tune_scalability_bookkeeping_overhead
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: bookkeeping_overhead
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_1x16.yaml
+
+  run:
+    timeout: 1200
+    script: python workloads/test_bookkeeping_overhead.py
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_durable_trainable
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: durable_trainable
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_16x2.yaml
+
+  run:
+    timeout: 900
+    script: python workloads/test_durable_trainable.py --bucket data-test-ilr
+    wait_for_nodes:
+      num_nodes: 16
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_long_running_large_checkpoints
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: long_running_large_checkpoints
+    test_suite: tune_tests
+
+  frequency: disabled  # weekly
+  team: ml
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_1x32_hd.yaml
+
+  run:
+    timeout: 86400
+    script: python workloads/test_long_running_large_checkpoints.py
+    long_running: true
+    type: sdk_command
+    file_manager: sdk
+
+  smoke_test:
+    frequency: disabled  # nightly
+
+    run:
+      timeout: 3600
+
+  alert: tune_tests
+
+- name: tune_scalability_network_overhead
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+  legacy:
+    test_name: network_overhead
+    test_suite: tune_tests
+
+  frequency: disabled  # weekly
+  team: ml
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_100x2.yaml
+
+  run:
+    timeout: 900
+    prepare_timeout: 1200
+    script: python workloads/test_network_overhead.py
+    wait_for_nodes:
+      num_nodes: 100
+      timeout: 1200
+
+    type: sdk_command
+    file_manager: sdk
+
+  smoke_test:
+    frequency: disabled  # nightly
+
+    cluster:
+      compute_template: tpl_20x2.yaml
+
+    run:
+      timeout: 400
+      prepare_timeout: 600
+      wait_for_nodes:
+        num_nodes: 20
+        timeout: 600
+
+  alert: tune_tests
+
+- name: tune_scalability_result_throughput_cluster
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: result_throughput_cluster
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_16x64.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/test_result_throughput_cluster.py
+
+    wait_for_nodes:
+      num_nodes: 16
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_result_throughput_single_node
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: result_throughput_single_node
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_1x96.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/test_result_throughput_single_node.py
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_xgboost_sweep
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: xgboost_sweep
+    test_suite: tune_tests
+
+  frequency: disabled  # weekly
+  team: ml
+
+  cluster:
+    cluster_env: app_config_data.yaml
+    cluster_compute: tpl_16x64.yaml
+
+  run:
+    timeout: 3600
+    script: python workloads/test_xgboost_sweep.py
+
+    wait_for_nodes:
+      num_nodes: 16
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
--- a/release/run_release_test.sh
+++ b/release/run_release_test.sh
@ -26,28 +26,6 @@ reason() {
  echo "${REASON}"
 }

-while [[ $# -gt 0 ]]
-do
-key="$1"
-case $key in
-    --ray-test-repo)
-    shift
-    RAY_TEST_REPO=$1
-    ;;
-    --ray-test-branch)
-    shift
-    RAY_TEST_BRANCH=$1
-    ;;
-    --release-results-dir)
-    shift
-    RELEASE_RESULTS_DIR=$1
-    ;;
-    *)
-    break
-esac
-shift
-done
-
 RAY_TEST_SCRIPT=${RAY_TEST_SCRIPT-ray_release/scripts/run_release_test.py}
 RAY_TEST_REPO=${RAY_TEST_REPO-https://github.com/ray-project/ray.git}
 RAY_TEST_BRANCH=${RAY_TEST_BRANCH-master}