diff --git a/release/BUILD b/release/BUILD
index f8a169de1..c5988d982 100644
--- a/release/BUILD
+++ b/release/BUILD
@@ -85,10 +85,10 @@ py_test(
 )
 
 py_test(
-    name = "test_repeat",
+    name = "test_run_script",
     tags = ["team:ml", "release_unit"],
     size = "small",
-    srcs = ["ray_release/tests/test_repeat.py"]
+    srcs = ["ray_release/tests/test_run_script.py"]
 )
 
 py_test(
diff --git a/release/ray_release/cluster_manager/cluster_manager.py b/release/ray_release/cluster_manager/cluster_manager.py
index d920cf2cd..7d826b8b1 100644
--- a/release/ray_release/cluster_manager/cluster_manager.py
+++ b/release/ray_release/cluster_manager/cluster_manager.py
@@ -34,6 +34,13 @@ class ClusterManager(abc.ABC):
 
     def set_cluster_env(self, cluster_env: Dict[str, Any]):
         self.cluster_env = cluster_env
+
+        # Add flags for redisless Ray
+        self.cluster_env.setdefault("env_vars", {})
+        self.cluster_env["env_vars"]["MATCH_AUTOSCALER_AND_RAY_IMAGES"] = "1"
+        self.cluster_env["env_vars"]["RAY_bootstrap_with_gcs"] = "1"
+        self.cluster_env["env_vars"]["RAY_gcs_storage"] = "memory"
+
         self.cluster_env_name = (
             f"{self.project_name}_{self.project_id[4:8]}"
             f"__env__{self.test_name}__"
diff --git a/release/ray_release/glue.py b/release/ray_release/glue.py
index 2ceb3537b..9ff3cbb79 100644
--- a/release/ray_release/glue.py
+++ b/release/ray_release/glue.py
@@ -214,6 +214,8 @@ def run_release_test(
             command = f"{command} --smoke-test"
             command_env["IS_SMOKE_TEST"] = "1"
 
+        is_long_running = test["run"].get("long_running", False)
+
         try:
             command_runner.run_command(
                 command, env=command_env, timeout=command_timeout
@@ -221,7 +223,9 @@ def run_release_test(
         except CommandError as e:
             raise TestCommandError(e)
         except CommandTimeout as e:
-            raise TestCommandTimeout(e)
+            if not is_long_running:
+                # Only raise error if command is not long running
+                raise TestCommandTimeout(e)
 
         try:
             command_results = command_runner.fetch_results()
diff --git a/release/ray_release/scripts/build_pipeline.py b/release/ray_release/scripts/build_pipeline.py
index 92410ed47..200050214 100644
--- a/release/ray_release/scripts/build_pipeline.py
+++ b/release/ray_release/scripts/build_pipeline.py
@@ -20,6 +20,9 @@ from ray_release.logger import logger
 from ray_release.wheels import find_and_wait_for_ray_wheels_url
 
 
+PIPELINE_ARTIFACT_PATH = "/tmp/pipeline_artifacts"
+
+
 @click.command()
 @click.option(
     "--test-collection-file",
@@ -118,6 +121,19 @@ def main(test_collection_file: Optional[str] = None):
         group_step = {"group": group, "steps": group_steps}
         steps.append(group_step)
 
+    if "BUILDKITE" in os.environ:
+        if os.path.exists(PIPELINE_ARTIFACT_PATH):
+            shutil.rmtree(PIPELINE_ARTIFACT_PATH)
+
+        os.makedirs(PIPELINE_ARTIFACT_PATH, exist_ok=True, mode=0o755)
+
+        with open(os.path.join(PIPELINE_ARTIFACT_PATH, "pipeline.json"), "wt") as fp:
+            json.dump(steps, fp)
+
+        settings["frequency"] = settings["frequency"].value
+        with open(os.path.join(PIPELINE_ARTIFACT_PATH, "settings.json"), "wt") as fp:
+            json.dump(settings, fp)
+
     steps_str = json.dumps(steps)
     print(steps_str)
 
diff --git a/release/ray_release/scripts/convert_legacy_config.py b/release/ray_release/scripts/convert_legacy_config.py
index 4eda97dd7..eb115958d 100644
--- a/release/ray_release/scripts/convert_legacy_config.py
+++ b/release/ray_release/scripts/convert_legacy_config.py
@@ -6,6 +6,17 @@ import click
 import yaml
 
 
+class FormatDumper(yaml.SafeDumper):
+    last_indent = 0
+
+    def write_line_break(self, data=None):
+        if (self.indent or 0) < self.last_indent:
+            super().write_line_break()
+
+        super().write_line_break(data)
+        self.last_indent = self.indent or 0
+
+
 def replace_prepare(dt: Dict):
     if "prepare" in dt and "wait_cluster" in dt["prepare"]:
         _, _, nodes, timeout = dt.pop("prepare").split(" ")
@@ -42,6 +53,11 @@ def main(legacy_config: str, prefix: str, group: str, alert: str):
             "cluster_compute": old["cluster"]["compute_template"],
         }
 
+        if "cloud_id" in old["cluster"]:
+            test["cluster"]["cloud_id"] = old["cluster"]["cloud_id"]
+        if "cloud_name" in old["cluster"]:
+            test["cluster"]["cloud_name"] = old["cluster"]["cloud_name"]
+
         if "driver_setup" in old:
             test["driver_setup"] = "driver_setup"
 
@@ -65,7 +81,7 @@ def main(legacy_config: str, prefix: str, group: str, alert: str):
 
         tests.append(test)
 
-    yaml.dump(tests, sys.stdout, sort_keys=False)
+    yaml.dump(tests, sys.stdout, Dumper=FormatDumper, sort_keys=False)
     sys.stdout.flush()
 
 
diff --git a/release/ray_release/scripts/run_release_test.py b/release/ray_release/scripts/run_release_test.py
index 6729c6a66..e070d57ff 100644
--- a/release/ray_release/scripts/run_release_test.py
+++ b/release/ray_release/scripts/run_release_test.py
@@ -124,6 +124,7 @@ def main(
             result=result,
             ray_wheels_url=ray_wheels_url,
             reporters=reporters,
+            smoke_test=smoke_test,
             cluster_id=cluster_id,
             cluster_env_id=cluster_env_id,
             no_terminate=no_terminate,
diff --git a/release/ray_release/tests/_test_catch_args.py b/release/ray_release/tests/_test_catch_args.py
new file mode 100644
index 000000000..736a01e64
--- /dev/null
+++ b/release/ray_release/tests/_test_catch_args.py
@@ -0,0 +1,14 @@
+import json
+import sys
+
+
+def main():
+    argv_file = sys.argv[1]
+    with open(argv_file, "wt") as fp:
+        json.dump(sys.argv, fp)
+
+    sys.exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/release/ray_release/tests/test_buildkite.py b/release/ray_release/tests/test_buildkite.py
index 6c806c7f2..074c19552 100644
--- a/release/ray_release/tests/test_buildkite.py
+++ b/release/ray_release/tests/test_buildkite.py
@@ -11,6 +11,7 @@ from ray_release.buildkite.settings import (
     Frequency,
     update_settings_from_buildkite,
 )
+from ray_release.buildkite.step import get_step
 from ray_release.config import Test
 from ray_release.exception import ReleaseTestConfigError
 from ray_release.wheels import (
@@ -221,3 +222,19 @@ class BuildkiteSettingsTest(unittest.TestCase):
             [t["name"] for t, _ in grouped["x"]], ["x1", "x2", "x3"]
         )
         self.assertEqual(len(grouped["y"]), 1)
+
+    def testGetStep(self):
+        test = Test(
+            {
+                "name": "test",
+                "frequency": "nightly",
+                "run": {"script": "test_script.py"},
+                "smoke_test": {"frequency": "multi"},
+            }
+        )
+
+        step = get_step(test, smoke_test=False)
+        self.assertNotIn("--smoke-test", step["command"])
+
+        step = get_step(test, smoke_test=True)
+        self.assertIn("--smoke-test", step["command"])
diff --git a/release/ray_release/tests/test_glue.py b/release/ray_release/tests/test_glue.py
index 05e190026..a9ab3a85e 100644
--- a/release/ray_release/tests/test_glue.py
+++ b/release/ray_release/tests/test_glue.py
@@ -1,6 +1,7 @@
 import os
 import shutil
 import tempfile
+import time
 import unittest
 from typing import Type, Callable
 from unittest.mock import patch
@@ -213,7 +214,10 @@ class GlueTest(unittest.TestCase):
         if until == "test_command":
             return
 
-        self.command_runner_return["fetch_results"] = {"time_taken": 50}
+        self.command_runner_return["fetch_results"] = {
+            "time_taken": 50,
+            "last_update": time.time() - 60,
+        }
 
         if until == "fetch_results":
             return
@@ -495,6 +499,26 @@ class GlueTest(unittest.TestCase):
         # Ensure cluster was terminated
         self.assertGreaterEqual(self.sdk.call_counter["terminate_cluster"], 1)
 
+    def testTestCommandTimeoutLongRunning(self):
+        result = Result()
+
+        self._succeed_until("fetch_results")
+
+        # Test command times out
+        self.command_runner_return["run_command"] = _fail_on_call(CommandTimeout)
+        with self.assertRaises(TestCommandTimeout):
+            self._run(result)
+        self.assertEqual(result.return_code, ExitCode.COMMAND_TIMEOUT.value)
+
+        # But now set test to long running
+        self.test["run"]["long_running"] = True
+        self._run(result)  # Will not fail this time
+
+        self.assertGreaterEqual(result.results["last_update_diff"], 60.0)
+
+        # Ensure cluster was terminated
+        self.assertGreaterEqual(self.sdk.call_counter["terminate_cluster"], 1)
+
     def testFetchResultFails(self):
         result = Result()
 
diff --git a/release/ray_release/tests/test_repeat.py b/release/ray_release/tests/test_run_script.py
similarity index 81%
rename from release/ray_release/tests/test_repeat.py
rename to release/ray_release/tests/test_run_script.py
index fca41669e..1aa944f7e 100644
--- a/release/ray_release/tests/test_repeat.py
+++ b/release/ray_release/tests/test_run_script.py
@@ -1,3 +1,4 @@
+import json
 import os
 import shutil
 import subprocess
@@ -7,7 +8,7 @@ import unittest
 from ray_release.result import ExitCode
 
 
-class WheelsFinderTest(unittest.TestCase):
+class RunScriptTest(unittest.TestCase):
     def setUp(self) -> None:
         self.tempdir = tempfile.mkdtemp()
         self.state_file = os.path.join(self.tempdir, "state.txt")
@@ -18,9 +19,7 @@ class WheelsFinderTest(unittest.TestCase):
         os.environ["NO_INSTALL"] = "1"
         os.environ["NO_CLONE"] = "1"
         os.environ["NO_ARTIFACTS"] = "1"
-        os.environ["RAY_TEST_SCRIPT"] = (
-            "ray_release/tests/" "_test_run_release_test_sh.py"
-        )
+        os.environ["RAY_TEST_SCRIPT"] = "ray_release/tests/_test_run_release_test_sh.py"
         os.environ["OVERRIDE_SLEEP_TIME"] = "0"
 
     def tearDown(self) -> None:
@@ -86,3 +85,19 @@ class WheelsFinderTest(unittest.TestCase):
             ExitCode.COMMAND_ALERT.value,
         )
         self.assertEquals(self._read_state(), 2)
+
+    def testParameters(self):
+        os.environ["RAY_TEST_SCRIPT"] = "ray_release/tests/_test_catch_args.py"
+        argv_file = tempfile.mktemp()
+
+        subprocess.check_call(
+            f"{self.test_script} " f"{argv_file} " f"--smoke-test",
+            shell=True,
+        )
+
+        with open(argv_file, "rt") as fp:
+            data = json.load(fp)
+
+        os.unlink(argv_file)
+
+        self.assertIn("--smoke-test", data)
diff --git a/release/release_tests.yaml b/release/release_tests.yaml
index 75b75dcd9..ea3a09c00 100644
--- a/release/release_tests.yaml
+++ b/release/release_tests.yaml
@@ -17,7 +17,7 @@
 #
 #  # How often to run the tests.
 #  # One of [disabled, any, multi, nightly, weekly].
-#  frequency: weekly
+#  frequency: disabled  # weekly
 #  # Owning team. This field will be persisted to the database
 #  team: ml
 #
@@ -49,7 +49,7 @@
 #
 #    # File manager to use to transfer files to and from the cluster.
 #    # Can be any of [sdk, client, job].
-#    file_manager: job
+#    file_manager: sdk
 #
 #    # If you want to wait for nodes to be ready, you can specify this here:
 #    wait_for_nodes:
@@ -77,7 +77,7 @@
 #  smoke_test:
 #    # Smoke tests can have different frequencies. A smoke test is only triggered
 #    # when the regular test is not matched.
-#    frequency: nightly
+#    frequency: disabled  # nightly
 #    # Here we adjust the run timeout down and run on less nodes. The test script
 #    # remains the same.
 #    run:
@@ -130,7 +130,7 @@
     test_name: train_moderate
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -146,7 +146,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -158,7 +158,7 @@
     test_name: train_gpu
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -174,7 +174,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -186,7 +186,7 @@
     test_name: distributed_api_test
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -201,7 +201,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -213,7 +213,7 @@
     test_name: ft_small_elastic
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -229,7 +229,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -241,7 +241,7 @@
     test_name: ft_small_non_elastic
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -257,7 +257,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -269,7 +269,7 @@
     test_name: tune_small
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -285,7 +285,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -297,7 +297,7 @@
     test_name: tune_32x4
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -313,7 +313,7 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
 
@@ -325,7 +325,7 @@
     test_name: tune_4x32
     test_suite: xgboost_tests
 
-  frequency: nightly
+  frequency: disabled  # nightly
   team: ml
 
   cluster:
@@ -341,6 +341,423 @@
       timeout: 600
 
     type: sdk_command
-    file_manager: job
+    file_manager: sdk
 
   alert: xgboost_tests
+
+#######################
+# Tune cloud  tests
+#######################
+- name: tune_cloud_aws_no_sync_down
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_no_sync_down
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py no_sync_down
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_ssh_sync
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_ssh_sync
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py ssh_sync
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_durable_upload
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_durable_upload
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py durable_upload --bucket s3://data-test-ilr/durable_upload
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_durable_upload_rllib_str
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_durable_upload_rllib_str
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config_ml.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+
+    script: python workloads/run_cloud_test.py durable_upload --trainable rllib_str
+      --bucket s3://data-test-ilr/durable_upload_rllib_str
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_aws_durable_upload_rllib_trainer
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: aws_durable_upload_rllib_trainer
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config_ml.yaml
+    cluster_compute: tpl_aws_4x2.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py durable_upload --trainable rllib_trainer
+      --bucket s3://data-test-ilr/durable_upload_rllib_trainer
+
+    wait_for_nodes:
+      num_nodes: 4
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_cloud_gcp_k8s_no_sync_down
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: gcp_k8s_no_sync_down
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_gcp_k8s_4x8.yaml
+    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py no_sync_down --cpus-per-trial 8
+    type: client
+
+  alert: tune_tests
+
+- name: tune_cloud_gcp_k8s_ssh_sync
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: gcp_k8s_ssh_sync
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_gcp_k8s_4x8.yaml
+    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py ssh_sync --cpus-per-trial 8
+    type: client
+
+  alert: tune_tests
+
+- name: tune_cloud_gcp_k8s_durable_upload
+  group: Tune cloud tests
+  working_dir: tune_tests/cloud_tests
+
+  legacy:
+    test_name: gcp_k8s_durable_upload
+    test_suite: tune_cloud_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_gcp_k8s_4x8.yaml
+    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
+
+  run:
+    timeout: 600
+    script: python workloads/run_cloud_test.py durable_upload --cpus-per-trial 8 --bucket gs://jun-riot-test/durable_upload
+    type: client
+
+  alert: tune_tests
+
+
+########################
+# Tune scalability tests
+########################
+
+- name: tune_scalability_bookkeeping_overhead
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: bookkeeping_overhead
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_1x16.yaml
+
+  run:
+    timeout: 1200
+    script: python workloads/test_bookkeeping_overhead.py
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_durable_trainable
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: durable_trainable
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_16x2.yaml
+
+  run:
+    timeout: 900
+    script: python workloads/test_durable_trainable.py --bucket data-test-ilr
+    wait_for_nodes:
+      num_nodes: 16
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_long_running_large_checkpoints
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: long_running_large_checkpoints
+    test_suite: tune_tests
+
+  frequency: disabled  # weekly
+  team: ml
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_1x32_hd.yaml
+
+  run:
+    timeout: 86400
+    script: python workloads/test_long_running_large_checkpoints.py
+    long_running: true
+    type: sdk_command
+    file_manager: sdk
+
+  smoke_test:
+    frequency: disabled  # nightly
+
+    run:
+      timeout: 3600
+
+  alert: tune_tests
+
+- name: tune_scalability_network_overhead
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+  legacy:
+    test_name: network_overhead
+    test_suite: tune_tests
+
+  frequency: disabled  # weekly
+  team: ml
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_100x2.yaml
+
+  run:
+    timeout: 900
+    prepare_timeout: 1200
+    script: python workloads/test_network_overhead.py
+    wait_for_nodes:
+      num_nodes: 100
+      timeout: 1200
+
+    type: sdk_command
+    file_manager: sdk
+
+  smoke_test:
+    frequency: disabled  # nightly
+
+    cluster:
+      compute_template: tpl_20x2.yaml
+
+    run:
+      timeout: 400
+      prepare_timeout: 600
+      wait_for_nodes:
+        num_nodes: 20
+        timeout: 600
+
+  alert: tune_tests
+
+- name: tune_scalability_result_throughput_cluster
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: result_throughput_cluster
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_16x64.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/test_result_throughput_cluster.py
+
+    wait_for_nodes:
+      num_nodes: 16
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_result_throughput_single_node
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: result_throughput_single_node
+    test_suite: tune_tests
+
+  frequency: disabled  # nightly
+  team: ml
+
+  cluster:
+    cluster_env: app_config.yaml
+    cluster_compute: tpl_1x96.yaml
+
+  run:
+    timeout: 600
+    script: python workloads/test_result_throughput_single_node.py
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
+- name: tune_scalability_xgboost_sweep
+  group: Tune scalability tests
+  working_dir: tune_tests/scalability_tests
+
+  legacy:
+    test_name: xgboost_sweep
+    test_suite: tune_tests
+
+  frequency: disabled  # weekly
+  team: ml
+
+  cluster:
+    cluster_env: app_config_data.yaml
+    cluster_compute: tpl_16x64.yaml
+
+  run:
+    timeout: 3600
+    script: python workloads/test_xgboost_sweep.py
+
+    wait_for_nodes:
+      num_nodes: 16
+      timeout: 600
+
+    type: sdk_command
+    file_manager: sdk
+
+  alert: tune_tests
+
diff --git a/release/run_release_test.sh b/release/run_release_test.sh
index 6654f6e63..669a18c5c 100755
--- a/release/run_release_test.sh
+++ b/release/run_release_test.sh
@@ -26,28 +26,6 @@ reason() {
   echo "${REASON}"
 }
 
-while [[ $# -gt 0 ]]
-do
-key="$1"
-case $key in
-    --ray-test-repo)
-    shift
-    RAY_TEST_REPO=$1
-    ;;
-    --ray-test-branch)
-    shift
-    RAY_TEST_BRANCH=$1
-    ;;
-    --release-results-dir)
-    shift
-    RELEASE_RESULTS_DIR=$1
-    ;;
-    *)
-    break
-esac
-shift
-done
-
 RAY_TEST_SCRIPT=${RAY_TEST_SCRIPT-ray_release/scripts/run_release_test.py}
 RAY_TEST_REPO=${RAY_TEST_REPO-https://github.com/ray-project/ray.git}
 RAY_TEST_BRANCH=${RAY_TEST_BRANCH-master}