diff --git a/benchmarks/benchmark_tests.yaml b/benchmarks/benchmark_tests.yaml
deleted file mode 100644
index a89e3deb9..000000000
--- a/benchmarks/benchmark_tests.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-- name: single_node
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: single_node.yaml
-
-  run:
-    timeout: 12000
-    prepare: sleep 0
-    script: python single_node/test_single_node.py
-
-- name: object_store
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: object_store.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=50
-    script: python object_store/test_object_store.py
-
-- name: many_actors
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=65
-    script: python distributed/test_many_actors.py
-
-- name: many_actors_smoke_test
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed_smoke_test.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=2
-    script: SMOKE_TEST=1 python distributed/test_many_actors.py
-
-- name: many_tasks
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=65
-    script: python distributed/test_many_tasks.py --num-tasks=10000
-
-- name: many_tasks_smoke_test
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed_smoke_test.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=2
-    script: python distributed/test_many_tasks.py --num-tasks=100
-
-- name: many_pgs
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=65
-    script: python distributed/test_many_pgs.py
-
-- name: many_pgs_smoke_test
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed_smoke_test.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=2
-    script: SMOKE_TEST=1 python distributed/test_many_pgs.py
-
-# NOTE: No smoke test since this shares a script with the many_tasks_smoke_test
-- name: many_nodes
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: many_nodes.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=250
-    script: python distributed/test_many_tasks.py --num-tasks=1000
-
-- name: scheduling_test_many_0s_tasks_single_node
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=0 --total-num-actors=1 --num-actors-per-nodes=1
-
-- name: scheduling_test_many_0s_tasks_many_nodes
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=0 --total-num-actors=32 --num-actors-per-nodes=1
-
-- name: scheduling_test_many_5s_tasks_single_node
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=5 --total-num-actors=1 --num-actors-per-nodes=1
-  stable: false
-
-- name: scheduling_test_many_5s_tasks_many_nodes
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=5 --total-num-actors=32 --num-actors-per-nodes=1
-  stable: false
diff --git a/benchmarks/distributed/wait_cluster.py b/benchmarks/distributed/wait_cluster.py
deleted file mode 100644
index 12a8a1677..000000000
--- a/benchmarks/distributed/wait_cluster.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import click
-import ray
-import time
-
-
-def num_alive_nodes():
-    n = 0
-    for node in ray.nodes():
-        if node["Alive"]:
-            n += 1
-    return n
-
-
-@click.command()
-@click.option("--num-nodes", required=True, type=int, help="The target number of nodes")
-def wait_cluster(num_nodes: int):
-    ray.init(address="auto")
-    while num_alive_nodes() != num_nodes:
-        print(f"Waiting for nodes: {num_alive_nodes()}/{num_nodes}")
-        time.sleep(5)
-
-
-if __name__ == "__main__":
-    wait_cluster()
diff --git a/release/.buildkite/build_pipeline.py b/release/.buildkite/build_pipeline.py
deleted file mode 100644
index 4c0e09099..000000000
--- a/release/.buildkite/build_pipeline.py
+++ /dev/null
@@ -1,680 +0,0 @@
-import copy
-import logging
-import os
-import re
-import sys
-
-import yaml
-
-# If you update or reorganize the periodic tests, please ensure the
-# relevant portions of the Ray release instructions (go/release-ray)
-# (in particular, running periodic tests and collecting release logs)
-# are up to date.  If you need access, please contact @zhe-thoughts.
-
-# Env variables:
-
-# RAY_REPO          Repo to use for finding the wheel
-# RAY_BRANCH        Branch to find the wheel
-# RAY_VERSION       Version to find the wheel
-# RAY_WHEELS        Direct Ray wheel URL
-# RAY_TEST_REPO     Repo to use for test scripts
-# RAY_TEST_BRANCH   Branch for test scripts
-# FILTER_FILE       File filter
-# FILTER_TEST       Test name filter
-# RELEASE_TEST_SUITE Release test suite (e.g. manual, nightly)
-
-
-class ReleaseTest:
-    def __init__(
-        self,
-        name: str,
-        smoke_test: bool = False,
-        retry: int = 0,
-    ):
-        self.name = name
-        self.smoke_test = smoke_test
-        self.retry = retry
-
-    def __str__(self):
-        return self.name
-
-    def __repr__(self):
-        return self.name
-
-    def __contains__(self, item):
-        return self.name.__contains__(item)
-
-    def __iter__(self):
-        return iter(self.name)
-
-    def __len__(self):
-        return len(self.name)
-
-
-class SmokeTest(ReleaseTest):
-    def __init__(self, name: str, retry: int = 0):
-        super(SmokeTest, self).__init__(name=name, smoke_test=True, retry=retry)
-
-
-CORE_NIGHTLY_TESTS = {
-    # "~/ray/release/nightly_tests/nightly_tests.yaml": [
-    # "shuffle_10gb",
-    # "shuffle_50gb",
-    # "shuffle_50gb_large_partition",
-    # "shuffle_100gb",
-    # "non_streaming_shuffle_100gb",
-    # "non_streaming_shuffle_50gb_large_partition",
-    # "non_streaming_shuffle_50gb",
-    # SmokeTest("dask_on_ray_large_scale_test_no_spilling"),
-    # SmokeTest("dask_on_ray_large_scale_test_spilling"),
-    # "stress_test_placement_group",
-    # "shuffle_1tb_1000_partition",
-    # "non_streaming_shuffle_1tb_1000_partition",
-    # "shuffle_1tb_5000_partitions",
-    # TODO(sang): It doesn't even work without spilling
-    # as it hits the scalability limit.
-    # "non_streaming_shuffle_1tb_5000_partitions",
-    # "decision_tree_autoscaling",
-    # "decision_tree_autoscaling_20_runs",
-    # "autoscaling_shuffle_1tb_1000_partitions",
-    # SmokeTest("stress_test_many_tasks"),
-    # SmokeTest("stress_test_dead_actors"),
-    # SmokeTest("threaded_actors_stress_test"),
-    # "pg_long_running_performance_test",
-    # ],
-    # "~/ray/benchmarks/benchmark_tests.yaml": [
-    #     "single_node",
-    #     "object_store",
-    #     "many_actors_smoke_test",
-    #     "many_tasks_smoke_test",
-    #     "many_pgs_smoke_test",
-    # ],
-    # "~/ray/release/nightly_tests/dataset/dataset_test.yaml": [
-    #     "inference",
-    #     "shuffle_data_loader",
-    #     "parquet_metadata_resolution",
-    #     "pipelined_training_50_gb",
-    #     "pipelined_ingestion_1500_gb",
-    #     "datasets_preprocess_ingest",
-    #     "datasets_ingest_400G",
-    #     SmokeTest("datasets_ingest_train_infer"),
-    # ],
-    # "~/ray/release/nightly_tests/chaos_test.yaml": [
-    #     "chaos_many_actors",
-    #     "chaos_many_tasks_no_object_store",
-    #     "chaos_pipelined_ingestion_1500_gb_15_windows",
-    # ],
-    # "~/ray/release/microbenchmark/microbenchmark.yaml": [
-    #     "microbenchmark",
-    # ],
-}
-
-SERVE_NIGHTLY_TESTS = {
-    # "~/ray/release/long_running_tests/long_running_tests.yaml": [
-    #     SmokeTest("serve"),
-    #     SmokeTest("serve_failure"),
-    # ],
-    # "~/ray/release/serve_tests/serve_tests.yaml": [
-    #     "single_deployment_1k_noop_replica",
-    #     "multi_deployment_1k_noop_replica",
-    #     "autoscaling_single_deployment",
-    #     "autoscaling_multi_deployment",
-    #     "serve_micro_benchmark",
-    #     # TODO(architkulkarni) Reenable after K8s migration.  Currently failing
-    #     # "serve_micro_benchmark_k8s",
-    #     "serve_cluster_fault_tolerance",
-    # ],
-}
-
-CORE_DAILY_TESTS = {
-    # "~/ray/release/nightly_tests/nightly_tests.yaml": [
-    #     "k8s_dask_on_ray_large_scale_test_no_spilling",
-    #     "dask_on_ray_large_scale_test_no_spilling",
-    #     "dask_on_ray_large_scale_test_spilling",
-    #     "pg_autoscaling_regression_test",
-    #     "threaded_actors_stress_test",
-    #     "k8s_threaded_actors_stress_test",
-    #     "stress_test_many_tasks",
-    #     "stress_test_dead_actors",
-    # ],
-    # "~/ray/release/nightly_tests/chaos_test.yaml": [
-    #     "chaos_dask_on_ray_large_scale_test_no_spilling",
-    #     "chaos_dask_on_ray_large_scale_test_spilling",
-    # ],
-}
-
-CORE_SCALABILITY_TESTS_DAILY = {
-    # "~/ray/benchmarks/benchmark_tests.yaml": [
-    #     "many_actors",
-    #     "many_tasks",
-    #     "many_pgs",
-    #     "many_nodes",
-    # ],
-}
-
-CORE_SCHEDULING_DAILY = {
-    # "~/ray/benchmarks/benchmark_tests.yaml": [
-    #     "scheduling_test_many_0s_tasks_single_node",
-    #     "scheduling_test_many_0s_tasks_many_nodes",
-    #     # Reenable these two once we got right setup
-    #     # "scheduling_test_many_5s_tasks_single_node",
-    #     # "scheduling_test_many_5s_tasks_many_nodes",
-    # ],
-    # "~/ray/release/nightly_tests/nightly_tests.yaml": [
-    #     "many_nodes_actor_test",
-    #     "dask_on_ray_10gb_sort",
-    #     "dask_on_ray_100gb_sort",
-    #     "dask_on_ray_1tb_sort",
-    #     "placement_group_performance_test",
-    # ],
-}
-
-NIGHTLY_TESTS = {
-    # "~/ray/release/horovod_tests/horovod_tests.yaml": [
-    #     SmokeTest("horovod_test"),
-    # ],  # Should we enable this?
-    # "~/ray/release/golden_notebook_tests/golden_notebook_tests.yaml": [
-    #     "dask_xgboost_test",
-    #     "modin_xgboost_test",
-    #     "torch_tune_serve_test",
-    # ],
-    # "~/ray/release/long_running_tests/long_running_tests.yaml": [
-    #     SmokeTest("actor_deaths"),
-    #     SmokeTest("apex"),
-    #     SmokeTest("impala"),
-    #     SmokeTest("many_actor_tasks"),
-    #     SmokeTest("many_drivers"),
-    #     SmokeTest("many_ppo"),
-    #     SmokeTest("many_tasks"),
-    #     SmokeTest("many_tasks_serialized_ids"),
-    #     SmokeTest("node_failures"),
-    #     SmokeTest("pbt"),
-    #     # SmokeTest("serve"),
-    #     # SmokeTest("serve_failure"),
-    #     # Full long running tests (1 day runtime)
-    #     "actor_deaths",
-    #     "apex",
-    #     "impala",
-    #     "many_actor_tasks",
-    #     "many_drivers",
-    #     "many_ppo",
-    #     "many_tasks",
-    #     "many_tasks_serialized_ids",
-    #     "node_failures",
-    #     "pbt",
-    #     "serve",
-    #     "serve_failure",
-    # ],
-    # "~/ray/release/sgd_tests/sgd_tests.yaml": [
-    #     "sgd_gpu",
-    # ],
-    # "~/ray/release/tune_tests/cloud_tests/tune_cloud_tests.yaml": [
-    #     "aws_no_sync_down",
-    #     "aws_ssh_sync",
-    #     "aws_durable_upload",
-    #     "aws_durable_upload_rllib_str",
-    #     "aws_durable_upload_rllib_trainer",
-    #     "gcp_k8s_durable_upload",
-    # ],
-    # "~/ray/release/tune_tests/scalability_tests/tune_tests.yaml": [
-    #     "bookkeeping_overhead",
-    #     "durable_trainable",
-    #     SmokeTest("long_running_large_checkpoints"),
-    #     SmokeTest("network_overhead"),
-    #     "result_throughput_cluster",
-    #     "result_throughput_single_node",
-    # ],
-    # "~/ray/release/xgboost_tests/xgboost_tests.yaml": [
-    #     "train_small",
-    #     "train_moderate",
-    #     "train_gpu",
-    #     "tune_small",
-    #     "tune_4x32",
-    #     "tune_32x4",
-    #     "ft_small_elastic",
-    #     "ft_small_non_elastic",
-    #     "distributed_api_test",
-    # ],
-    # "~/ray/release/rllib_tests/rllib_tests.yaml": [
-    #     SmokeTest("learning_tests"),
-    #     SmokeTest("stress_tests"),
-    #     "performance_tests",
-    #     "multi_gpu_learning_tests",
-    #     "multi_gpu_with_lstm_learning_tests",
-    #     "multi_gpu_with_attention_learning_tests",
-    #     # We'll have these as per-PR tests soon.
-    #     # "example_scripts_on_gpu_tests",
-    # ],
-    # "~/ray/release/runtime_env_tests/runtime_env_tests.yaml": [
-    #     "rte_many_tasks_actors",
-    #     "wheel_urls",
-    #     "rte_ray_client",
-    # ],
-}
-
-WEEKLY_TESTS = {
-    # "~/ray/release/horovod_tests/horovod_tests.yaml": [
-    #     "horovod_test",
-    # ],
-    "~/ray/release/long_running_distributed_tests"
-    # "/long_running_distributed.yaml": [
-    #     "pytorch_pbt_failure",
-    # ],
-    # "~/ray/release/tune_tests/scalability_tests/tune_tests.yaml": [
-    #     "network_overhead",
-    #     "long_running_large_checkpoints",
-    #     "xgboost_sweep",
-    # ],
-    # "~/ray/release/rllib_tests/rllib_tests.yaml": [
-    #     "learning_tests",
-    #     "stress_tests",
-    # ],
-}
-
-# This test suite holds "user" tests to test important user workflows
-# in a particular environment.
-# All workloads in this test suite should:
-#   1. Be run in a distributed (multi-node) fashion
-#   2. Use autoscaling/scale up (no wait_cluster.py)
-#   3. Use GPUs if applicable
-#   4. Have the `use_connect` flag set.
-USER_TESTS = {
-    # "~/ray/release/ml_user_tests/ml_user_tests.yaml": [
-    #     "train_tensorflow_mnist_test",
-    #     "train_torch_linear_test",
-    #     "ray_lightning_user_test_latest",
-    #     "ray_lightning_user_test_master",
-    #     "horovod_user_test_latest",
-    #     "horovod_user_test_master",
-    #     "xgboost_gpu_connect_latest",
-    #     "xgboost_gpu_connect_master",
-    #     "tune_rllib_connect_test",
-    # ]
-}
-
-SUITES = {
-    "core-nightly": CORE_NIGHTLY_TESTS,
-    "serve-nightly": SERVE_NIGHTLY_TESTS,
-    "core-daily": CORE_DAILY_TESTS,
-    "core-scalability": CORE_SCALABILITY_TESTS_DAILY,
-    "nightly": {**NIGHTLY_TESTS, **USER_TESTS},
-    "core-scheduling-daily": CORE_SCHEDULING_DAILY,
-    "weekly": WEEKLY_TESTS,
-}
-
-DEFAULT_STEP_TEMPLATE = {
-    "env": {
-        "ANYSCALE_CLOUD_ID": "cld_4F7k8814aZzGG8TNUGPKnc",
-        "ANYSCALE_PROJECT": "prj_2xR6uT6t7jJuu1aCwWMsle",
-        "RELEASE_AWS_BUCKET": "ray-release-automation-results",
-        "RELEASE_AWS_LOCATION": "dev",
-        "RELEASE_AWS_DB_NAME": "ray_ci",
-        "RELEASE_AWS_DB_TABLE": "release_test_result",
-        "AWS_REGION": "us-west-2",
-    },
-    "agents": {"queue": "runner_queue_branch"},
-    "plugins": [
-        {
-            "docker#v3.9.0": {
-                "image": "rayproject/ray",
-                "propagate-environment": True,
-                "volumes": [
-                    "/tmp/ray_release_test_artifacts:" "/tmp/ray_release_test_artifacts"
-                ],
-            }
-        }
-    ],
-    "artifact_paths": ["/tmp/ray_release_test_artifacts/**/*"],
-}
-
-
-def ask_configuration():
-    RAY_BRANCH = os.environ.get("RAY_BRANCH", "master")
-    RAY_REPO = os.environ.get("RAY_REPO", "https://github.com/ray-project/ray.git")
-    RAY_VERSION = os.environ.get("RAY_VERSION", "")
-    RAY_WHEELS = os.environ.get("RAY_WHEELS", "")
-
-    RAY_TEST_BRANCH = os.environ.get("RAY_TEST_BRANCH", RAY_BRANCH)
-    RAY_TEST_REPO = os.environ.get("RAY_TEST_REPO", RAY_REPO)
-
-    RELEASE_TEST_SUITE = os.environ.get("RELEASE_TEST_SUITE", "nightly")
-    FILTER_FILE = os.environ.get("FILTER_FILE", "")
-    FILTER_TEST = os.environ.get("FILTER_TEST", "")
-
-    input_ask_step = {
-        "input": "Input required: Please specify tests to run",
-        "fields": [
-            {
-                "text": (
-                    "RAY_REPO: Please specify the Ray repository used "
-                    "to find the wheel."
-                ),
-                "hint": (
-                    "Repository from which to fetch the latest "
-                    "commits to find the Ray wheels. Usually you don't "
-                    "need to change this."
-                ),
-                "default": RAY_REPO,
-                "key": "ray_repo",
-            },
-            {
-                "text": (
-                    "RAY_BRANCH: Please specify the Ray branch used "
-                    "to find the wheel."
-                ),
-                "hint": "For releases, this will be e.g. `releases/1.x.0`",
-                "default": RAY_BRANCH,
-                "key": "ray_branch",
-            },
-            {
-                "text": (
-                    "RAY_VERSION: Please specify the Ray version used "
-                    "to find the wheel."
-                ),
-                "hint": (
-                    "Leave empty for latest master. For releases, "
-                    "specify the release version."
-                ),
-                "required": False,
-                "default": RAY_VERSION,
-                "key": "ray_version",
-            },
-            {
-                "text": "RAY_WHEELS: Please specify the Ray wheel URL.",
-                "hint": (
-                    "ATTENTION: If you provide this, RAY_REPO, "
-                    "RAY_BRANCH and RAY_VERSION will be ignored! "
-                    "Please also make sure to provide the wheels URL "
-                    "for Python 3.7 on Linux.\n"
-                    "You can also insert a commit hash here instead "
-                    "of a full URL.\n"
-                    "NOTE: You can specify multiple commits or URLs "
-                    "for easy bisection (one per line) - this will "
-                    "run each test on each of the specified wheels."
-                ),
-                "required": False,
-                "default": RAY_WHEELS,
-                "key": "ray_wheels",
-            },
-            {
-                "text": (
-                    "RAY_TEST_REPO: Please specify the Ray repository "
-                    "used to find the tests you would like to run."
-                ),
-                "hint": (
-                    "If you're developing a new release test, this "
-                    "will most likely be your GitHub fork."
-                ),
-                "default": RAY_TEST_REPO,
-                "key": "ray_test_repo",
-            },
-            {
-                "text": (
-                    "RAY_TEST_BRANCH: Please specify the Ray branch used "
-                    "to find the tests you would like to run."
-                ),
-                "hint": (
-                    "If you're developing a new release test, this "
-                    "will most likely be a branch living on your "
-                    "GitHub fork."
-                ),
-                "default": RAY_TEST_BRANCH,
-                "key": "ray_test_branch",
-            },
-            {
-                "select": (
-                    "RELEASE_TEST_SUITE: Please specify the release "
-                    "test suite containing the tests you would like "
-                    "to run."
-                ),
-                "hint": (
-                    "Check in the `build_pipeline.py` if you're "
-                    "unsure which suite contains your tests."
-                ),
-                "required": True,
-                "options": sorted(SUITES.keys()),
-                "default": RELEASE_TEST_SUITE,
-                "key": "release_test_suite",
-            },
-            {
-                "text": (
-                    "FILTER_FILE: Please specify a filter for the "
-                    "test files that should be included in this build."
-                ),
-                "hint": (
-                    "Only test files (e.g. xgboost_tests.yml) that "
-                    "match this string will be included in the test"
-                ),
-                "default": FILTER_FILE,
-                "required": False,
-                "key": "filter_file",
-            },
-            {
-                "text": (
-                    "FILTER_TEST: Please specify a filter for the "
-                    "test names that should be included in this build."
-                ),
-                "hint": (
-                    "Only test names (e.g. tune_4x32) that match "
-                    "this string will be included in the test"
-                ),
-                "default": FILTER_TEST,
-                "required": False,
-                "key": "filter_test",
-            },
-        ],
-        "key": "input_ask_step",
-    }
-
-    run_again_step = {
-        "commands": [
-            f'export {v}=$(buildkite-agent meta-data get "{k}")'
-            for k, v in {
-                "ray_branch": "RAY_BRANCH",
-                "ray_repo": "RAY_REPO",
-                "ray_version": "RAY_VERSION",
-                "ray_wheels": "RAY_WHEELS",
-                "ray_test_branch": "RAY_TEST_BRANCH",
-                "ray_test_repo": "RAY_TEST_REPO",
-                "release_test_suite": "RELEASE_TEST_SUITE",
-                "filter_file": "FILTER_FILE",
-                "filter_test": "FILTER_TEST",
-            }.items()
-        ]
-        + [
-            "export AUTOMATIC=1",
-            "python3 -m pip install --user pyyaml",
-            "rm -rf ~/ray || true",
-            "git clone -b $${RAY_TEST_BRANCH} $${RAY_TEST_REPO} ~/ray",
-            (
-                "python3 ~/ray/release/.buildkite/build_pipeline.py "
-                "| buildkite-agent pipeline upload"
-            ),
-        ],
-        "label": ":pipeline: Again",
-        "agents": {"queue": "runner_queue_branch"},
-        "depends_on": "input_ask_step",
-        "key": "run_again_step",
-    }
-
-    return [
-        input_ask_step,
-        run_again_step,
-    ]
-
-
-def create_test_step(
-    ray_repo: str,
-    ray_branch: str,
-    ray_version: str,
-    ray_wheels: str,
-    ray_test_repo: str,
-    ray_test_branch: str,
-    test_file: str,
-    test_name: ReleaseTest,
-):
-    custom_commit_str = "custom_wheels_url"
-    if ray_wheels:
-        # Extract commit from url
-        p = re.compile(r"([a-f0-9]{40})")
-        m = p.search(ray_wheels)
-        if m is not None:
-            custom_commit_str = m.group(1)
-
-    ray_wheels_str = f" ({ray_wheels}) " if ray_wheels else ""
-
-    logging.info(f"Creating step for {test_file}/{test_name}{ray_wheels_str}")
-
-    cmd = (
-        f"./release/run_e2e.sh "
-        f'--ray-repo "{ray_repo}" '
-        f'--ray-branch "{ray_branch}" '
-        f'--ray-version "{ray_version}" '
-        f'--ray-wheels "{ray_wheels}" '
-        f'--ray-test-repo "{ray_test_repo}" '
-        f'--ray-test-branch "{ray_test_branch}" '
-    )
-
-    args = (
-        f"--category {ray_branch} "
-        f"--test-config {test_file} "
-        f"--test-name {test_name} "
-        f"--keep-results-dir"
-    )
-
-    if test_name.smoke_test:
-        logging.info("This test will run as a smoke test.")
-        args += " --smoke-test"
-
-    step_conf = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
-
-    if test_name.retry:
-        logging.info(f"This test will be retried up to " f"{test_name.retry} times.")
-        step_conf["retry"] = {
-            "automatic": [{"exit_status": "*", "limit": test_name.retry}]
-        }
-    else:
-        # Default retry logic
-        # Warning: Exit codes are currently not correctly propagated to
-        # buildkite! Thus, actual retry logic is currently implemented in
-        # the run_e2e.sh script!
-        step_conf["retry"] = {
-            "automatic": [
-                {"exit_status": 7, "limit": 2},  # Prepare timeout
-                {"exit_status": 9, "limit": 2},  # Session timeout
-                {"exit_status": 10, "limit": 2},  # Prepare error
-            ],
-        }
-
-    step_conf["command"] = cmd + args
-
-    step_conf["label"] = (
-        f"{test_name} "
-        f"({custom_commit_str if ray_wheels_str else ray_branch}) - "
-        f"{ray_test_branch}/{ray_test_repo}"
-    )
-    return step_conf
-
-
-def build_pipeline(steps):
-    all_steps = []
-
-    RAY_BRANCH = os.environ.get("RAY_BRANCH", "master")
-    RAY_REPO = os.environ.get("RAY_REPO", "https://github.com/ray-project/ray.git")
-    RAY_VERSION = os.environ.get("RAY_VERSION", "")
-    RAY_WHEELS = os.environ.get("RAY_WHEELS", "")
-
-    RAY_TEST_BRANCH = os.environ.get("RAY_TEST_BRANCH", RAY_BRANCH)
-    RAY_TEST_REPO = os.environ.get("RAY_TEST_REPO", RAY_REPO)
-
-    FILTER_FILE = os.environ.get("FILTER_FILE", "")
-    FILTER_TEST = os.environ.get("FILTER_TEST", "")
-
-    ray_wheels_list = [""]
-    if RAY_WHEELS:
-        ray_wheels_list = RAY_WHEELS.split("\n")
-
-    if len(ray_wheels_list) > 1:
-        logging.info(
-            f"This will run a bisec on the following URLs/commits: "
-            f"{ray_wheels_list}"
-        )
-
-    logging.info(
-        f"Building pipeline \n"
-        f"Ray repo/branch to test:\n"
-        f" RAY_REPO   = {RAY_REPO}\n"
-        f" RAY_BRANCH = {RAY_BRANCH}\n\n"
-        f" RAY_VERSION = {RAY_VERSION}\n\n"
-        f" RAY_WHEELS = {RAY_WHEELS}\n\n"
-        f"Ray repo/branch containing the test configurations and scripts:"
-        f" RAY_TEST_REPO   = {RAY_TEST_REPO}\n"
-        f" RAY_TEST_BRANCH = {RAY_TEST_BRANCH}\n\n"
-        f"Filtering for these tests:\n"
-        f" FILTER_FILE = {FILTER_FILE}\n"
-        f" FILTER_TEST = {FILTER_TEST}\n\n"
-    )
-
-    for test_file, test_names in steps.items():
-        if FILTER_FILE and FILTER_FILE not in test_file:
-            continue
-
-        test_base = os.path.basename(test_file)
-        for test_name in test_names:
-            if FILTER_TEST and FILTER_TEST not in test_name:
-                continue
-
-            if not isinstance(test_name, ReleaseTest):
-                test_name = ReleaseTest(name=test_name)
-
-            logging.info(f"Adding test: {test_base}/{test_name}")
-
-            for ray_wheels in ray_wheels_list:
-                step_conf = create_test_step(
-                    ray_repo=RAY_REPO,
-                    ray_branch=RAY_BRANCH,
-                    ray_version=RAY_VERSION,
-                    ray_wheels=ray_wheels,
-                    ray_test_repo=RAY_TEST_REPO,
-                    ray_test_branch=RAY_TEST_BRANCH,
-                    test_file=test_file,
-                    test_name=test_name,
-                )
-
-                all_steps.append(step_conf)
-
-    return all_steps
-
-
-def alert_pipeline(stats: bool = False):
-    step_conf = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
-
-    cmd = "python release/alert.py"
-    if stats:
-        cmd += " --stats"
-
-    step_conf["commands"] = [
-        "pip install -q -r release/requirements.txt",
-        "pip install -U boto3 botocore",
-        cmd,
-    ]
-    step_conf["label"] = f"Send periodic alert (stats_only = {stats})"
-    return [step_conf]
-
-
-if __name__ == "__main__":
-    alert = os.environ.get("RELEASE_ALERT", "0")
-
-    ask_for_config = not bool(int(os.environ.get("AUTOMATIC", "0")))
-
-    if alert in ["1", "stats"]:
-        steps = alert_pipeline(alert == "stats")
-    elif ask_for_config:
-        steps = ask_configuration()
-    else:
-        TEST_SUITE = os.environ.get("RELEASE_TEST_SUITE", "nightly")
-        PIPELINE_SPEC = SUITES[TEST_SUITE]
-
-        steps = build_pipeline(PIPELINE_SPEC)
-
-    yaml.dump({"steps": steps}, sys.stdout)
diff --git a/release/alert.py b/release/alert.py
deleted file mode 100644
index d0d1d433d..000000000
--- a/release/alert.py
+++ /dev/null
@@ -1,441 +0,0 @@
-import argparse
-from collections import defaultdict, Counter
-from typing import Any, List, Tuple, Mapping, Optional
-import datetime
-import hashlib
-import json
-import logging
-import os
-import requests
-import sys
-
-import boto3
-
-from e2e import GLOBAL_CONFIG
-
-from alerts.default import handle_result as default_handle_result
-from alerts.rllib_tests import handle_result as rllib_tests_handle_result
-from alerts.long_running_tests import handle_result as long_running_tests_handle_result
-from alerts.tune_tests import handle_result as tune_tests_handle_result
-from alerts.xgboost_tests import handle_result as xgboost_tests_handle_result
-
-SUITE_TO_FN = {
-    "long_running_tests": long_running_tests_handle_result,
-    "rllib_tests": rllib_tests_handle_result,
-    "tune_tests": tune_tests_handle_result,
-    "xgboost_tests": xgboost_tests_handle_result,
-}
-
-GLOBAL_CONFIG["RELEASE_AWS_DB_STATE_TABLE"] = "alert_state"
-GLOBAL_CONFIG["SLACK_WEBHOOK"] = os.environ.get("SLACK_WEBHOOK", "")
-GLOBAL_CONFIG["SLACK_CHANNEL"] = os.environ.get("SLACK_CHANNEL", "#oss-test-cop")
-
-RESULTS_LIMIT = 120
-
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-handler = logging.StreamHandler(stream=sys.stdout)
-formatter = logging.Formatter(
-    fmt="[%(levelname)s %(asctime)s] " "%(filename)s: %(lineno)d  " "%(message)s"
-)
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-def maybe_fetch_slack_webhook():
-    if GLOBAL_CONFIG["SLACK_WEBHOOK"] in [None, ""]:
-        print("Missing SLACK_WEBHOOK, retrieving from AWS secrets store")
-        GLOBAL_CONFIG["SLACK_WEBHOOK"] = boto3.client(
-            "secretsmanager", region_name="us-west-2"
-        ).get_secret_value(
-            SecretId="arn:aws:secretsmanager:us-west-2:029272617770:secret:"
-            "release-automation/"
-            "slack-webhook-Na0CFP"
-        )[
-            "SecretString"
-        ]
-
-
-def _obj_hash(obj: Any) -> str:
-    json_str = json.dumps(obj, sort_keys=True, ensure_ascii=True)
-    sha = hashlib.sha256()
-    sha.update(json_str.encode())
-    return sha.hexdigest()
-
-
-def fetch_latest_alerts(rds_data_client):
-    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_STATE_TABLE"]
-
-    sql = f"""
-        SELECT DISTINCT ON (category, test_suite, test_name)
-               category, test_suite, test_name, last_result_hash,
-               last_notification_dt
-        FROM   {schema}
-        ORDER BY category, test_suite, test_name, last_notification_dt DESC
-        LIMIT {RESULTS_LIMIT}
-        """
-
-    result = rds_data_client.execute_statement(
-        database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
-        secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
-        resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
-        schema=schema,
-        sql=sql,
-    )
-    for row in result["records"]:
-        category, test_suite, test_name, last_result_hash, last_notification_dt = (
-            r["stringValue"] if "stringValue" in r else None for r in row
-        )
-        last_notification_dt = datetime.datetime.strptime(
-            last_notification_dt, "%Y-%m-%d %H:%M:%S"
-        )
-        yield category, test_suite, test_name, last_result_hash, last_notification_dt
-
-
-def fetch_latest_results(
-    rds_data_client, fetch_since: Optional[datetime.datetime] = None
-):
-    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_TABLE"]
-
-    sql = f"""
-        SELECT DISTINCT ON (category, test_suite, test_name)
-               created_on, category, test_suite, test_name, status, results,
-               artifacts, last_logs
-        FROM   {schema} """
-
-    parameters = []
-    if fetch_since is not None:
-        sql += "WHERE created_on >= :created_on "
-        parameters = [
-            {
-                "name": "created_on",
-                "typeHint": "TIMESTAMP",
-                "value": {"stringValue": fetch_since.strftime("%Y-%m-%d %H:%M:%S")},
-            },
-        ]
-
-    sql += "ORDER BY category, test_suite, test_name, created_on DESC "
-    sql += f"LIMIT {RESULTS_LIMIT}"
-
-    result = rds_data_client.execute_statement(
-        database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
-        secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
-        resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
-        schema=schema,
-        sql=sql,
-        parameters=parameters,
-    )
-    for row in result["records"]:
-        (
-            created_on,
-            category,
-            test_suite,
-            test_name,
-            status,
-            results,
-            artifacts,
-            last_logs,
-        ) = (r["stringValue"] if "stringValue" in r else None for r in row)
-
-        # Calculate hash before converting strings to objects
-        result_obj = (
-            created_on,
-            category,
-            test_suite,
-            test_name,
-            status,
-            results,
-            artifacts,
-            last_logs,
-        )
-        result_json = json.dumps(result_obj)
-        result_hash = _obj_hash(result_json)
-
-        # Convert some strings to python objects
-        created_on = datetime.datetime.strptime(created_on, "%Y-%m-%d %H:%M:%S")
-        results = json.loads(results)
-        artifacts = json.loads(artifacts)
-
-        yield result_hash, created_on, category, test_suite, test_name, status, results, artifacts, last_logs  # noqa: E501
-
-
-def mark_as_handled(
-    rds_data_client,
-    update: bool,
-    category: str,
-    test_suite: str,
-    test_name: str,
-    result_hash: str,
-    last_notification_dt: datetime.datetime,
-):
-    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_STATE_TABLE"]
-
-    if not update:
-        sql = f"""
-            INSERT INTO {schema}
-            (category, test_suite, test_name,
-            last_result_hash, last_notification_dt)
-            VALUES (:category, :test_suite, :test_name,
-                    :last_result_hash, :last_notification_dt)
-            """
-    else:
-        sql = f"""
-            UPDATE {schema}
-            SET last_result_hash=:last_result_hash,
-                last_notification_dt=:last_notification_dt
-            WHERE category=:category AND test_suite=:test_suite
-            AND test_name=:test_name
-            """
-
-    rds_data_client.execute_statement(
-        database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
-        parameters=[
-            {"name": "category", "value": {"stringValue": category}},
-            {"name": "test_suite", "value": {"stringValue": test_suite or ""}},
-            {"name": "test_name", "value": {"stringValue": test_name}},
-            {"name": "last_result_hash", "value": {"stringValue": result_hash}},
-            {
-                "name": "last_notification_dt",
-                "typeHint": "TIMESTAMP",
-                "value": {
-                    "stringValue": last_notification_dt.strftime("%Y-%m-%d %H:%M:%S")
-                },
-            },
-        ],
-        secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
-        resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
-        schema=schema,
-        sql=sql,
-    )
-
-
-def post_alerts_to_slack(
-    channel: str, alerts: List[Tuple[str, str, str, str]], non_alerts: Mapping[str, int]
-):
-    if len(alerts) == 0:
-        logger.info("No alerts to post to slack.")
-        return
-
-    markdown_lines = [
-        f"* {len(alerts)} new release test failures found!*",
-        "",
-    ]
-
-    category_alerts = defaultdict(list)
-    for (category, test_suite, test_name, alert) in alerts:
-        category_alerts[category].append(
-            f"   *{test_suite}/{test_name}* failed: {alert}"
-        )
-
-    for category, alert_list in category_alerts.items():
-        markdown_lines.append(f"Branch: *{category}*")
-        markdown_lines.extend(alert_list)
-        markdown_lines.append("")
-
-    total_non_alerts = sum(n for n in non_alerts.values())
-    non_alert_detail = [f"{n} on {c}" for c, n in non_alerts.items()]
-
-    markdown_lines += [
-        f"Additionally, {total_non_alerts} tests passed successfully "
-        f"({', '.join(non_alert_detail)})."
-    ]
-
-    slack_url = GLOBAL_CONFIG["SLACK_WEBHOOK"]
-
-    resp = requests.post(
-        slack_url,
-        json={
-            "text": "\n".join(markdown_lines),
-            "channel": channel,
-            "username": "Fail Bot",
-            "icon_emoji": ":red_circle:",
-        },
-    )
-    print(resp.status_code)
-    print(resp.text)
-
-
-def post_statistics_to_slack(
-    channel: str, alerts: List[Tuple[str, str, str, str]], non_alerts: Mapping[str, int]
-):
-    total_alerts = len(alerts)
-
-    category_alerts = defaultdict(list)
-    for (category, test_suite, test_name, alert) in alerts:
-        category_alerts[category].append(f"`{test_suite}/{test_name}`")
-
-    alert_detail = [f"{len(a)} on {c}" for c, a in category_alerts.items()]
-
-    total_non_alerts = sum(n for n in non_alerts.values())
-    non_alert_detail = [f"{n} on {c}" for c, n in non_alerts.items()]
-
-    markdown_lines = [
-        "*Periodic release test report*",
-        "",
-        f"In the past 24 hours, "
-        f"*{total_non_alerts}* release tests finished successfully, and "
-        f"*{total_alerts}* release tests failed.",
-    ]
-
-    markdown_lines.append("")
-
-    if total_alerts:
-        markdown_lines.append(f"*Failing:* {', '.join(alert_detail)}")
-        for c, a in category_alerts.items():
-            markdown_lines.append(f"  *{c}*: {', '.join(sorted(a))}")
-    else:
-        markdown_lines.append("*Failing:* None")
-
-    markdown_lines.append("")
-
-    if total_non_alerts:
-        markdown_lines.append(f"*Passing:* {', '.join(non_alert_detail)}")
-    else:
-        markdown_lines.append("*Passing:* None")
-
-    slack_url = GLOBAL_CONFIG["SLACK_WEBHOOK"]
-
-    resp = requests.post(
-        slack_url,
-        json={
-            "text": "\n".join(markdown_lines),
-            "channel": channel,
-            "username": "Fail Bot",
-            "icon_emoji": ":red_circle:",
-        },
-    )
-    print(resp.status_code)
-    print(resp.text)
-
-
-def handle_results_and_get_alerts(
-    rds_data_client,
-    fetch_since: Optional[datetime.datetime] = None,
-    always_try_alert: bool = False,
-    no_status_update: bool = False,
-):
-    # First build a map of last notifications
-    last_notifications_map = {}
-    for (
-        category,
-        test_suite,
-        test_name,
-        last_result_hash,
-        last_notification_dt,
-    ) in fetch_latest_alerts(rds_data_client):
-        last_notifications_map[(category, test_suite, test_name)] = (
-            last_result_hash,
-            last_notification_dt,
-        )
-
-    alerts = []
-    non_alerts = Counter()
-
-    # Then fetch latest results
-    for (
-        result_hash,
-        created_on,
-        category,
-        test_suite,
-        test_name,
-        status,
-        results,
-        artifacts,
-        last_logs,
-    ) in fetch_latest_results(rds_data_client, fetch_since=fetch_since):
-        key = (category, test_suite, test_name)
-
-        try_alert = always_try_alert
-        if key in last_notifications_map:
-            # If we have an alert for this key, fetch info
-            last_result_hash, last_notification_dt = last_notifications_map[key]
-
-            if last_result_hash != result_hash:
-                # If we got a new result, handle new result
-                try_alert = True
-            # Todo: maybe alert again after some time?
-        else:
-            try_alert = True
-
-        if try_alert:
-            handle_fn = SUITE_TO_FN.get(test_suite, None)
-            if not handle_fn:
-                logger.warning(f"No handle for suite {test_suite}")
-                alert = default_handle_result(
-                    created_on,
-                    category,
-                    test_suite,
-                    test_name,
-                    status,
-                    results,
-                    artifacts,
-                    last_logs,
-                )
-            else:
-                alert = handle_fn(
-                    created_on,
-                    category,
-                    test_suite,
-                    test_name,
-                    status,
-                    results,
-                    artifacts,
-                    last_logs,
-                )
-
-            if alert:
-                logger.warning(
-                    f"Alert raised for test {test_suite}/{test_name} "
-                    f"({category}): {alert}"
-                )
-
-                alerts.append((category, test_suite, test_name, alert))
-            else:
-                logger.debug(
-                    f"No alert raised for test {test_suite}/{test_name} "
-                    f"({category})"
-                )
-                non_alerts[category] += 1
-
-            if not no_status_update:
-                mark_as_handled(
-                    rds_data_client,
-                    key in last_notifications_map,
-                    category,
-                    test_suite,
-                    test_name,
-                    result_hash,
-                    datetime.datetime.now(),
-                )
-
-    return alerts, non_alerts
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--stats",
-        action="store_true",
-        default=False,
-        help="Finish quickly for training.",
-    )
-    args = parser.parse_args()
-
-    maybe_fetch_slack_webhook()
-
-    rds_data_client = boto3.client("rds-data", region_name="us-west-2")
-
-    if args.stats:
-        # Only update last 24 hour stats
-        fetch_since = datetime.datetime.now() - datetime.timedelta(days=1)
-        alerts, non_alerts = handle_results_and_get_alerts(
-            rds_data_client,
-            fetch_since=fetch_since,
-            always_try_alert=True,
-            no_status_update=True,
-        )
-        post_statistics_to_slack(GLOBAL_CONFIG["SLACK_CHANNEL"], alerts, non_alerts)
-
-    else:
-        alerts, non_alerts = handle_results_and_get_alerts(rds_data_client)
-        post_alerts_to_slack(GLOBAL_CONFIG["SLACK_CHANNEL"], alerts, non_alerts)
diff --git a/release/benchmarks/benchmark_tests.yaml b/release/benchmarks/benchmark_tests.yaml
deleted file mode 100644
index a89e3deb9..000000000
--- a/release/benchmarks/benchmark_tests.yaml
+++ /dev/null
@@ -1,145 +0,0 @@
-- name: single_node
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: single_node.yaml
-
-  run:
-    timeout: 12000
-    prepare: sleep 0
-    script: python single_node/test_single_node.py
-
-- name: object_store
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: object_store.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=50
-    script: python object_store/test_object_store.py
-
-- name: many_actors
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=65
-    script: python distributed/test_many_actors.py
-
-- name: many_actors_smoke_test
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed_smoke_test.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=2
-    script: SMOKE_TEST=1 python distributed/test_many_actors.py
-
-- name: many_tasks
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=65
-    script: python distributed/test_many_tasks.py --num-tasks=10000
-
-- name: many_tasks_smoke_test
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed_smoke_test.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=2
-    script: python distributed/test_many_tasks.py --num-tasks=100
-
-- name: many_pgs
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=65
-    script: python distributed/test_many_pgs.py
-
-- name: many_pgs_smoke_test
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: distributed_smoke_test.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=2
-    script: SMOKE_TEST=1 python distributed/test_many_pgs.py
-
-# NOTE: No smoke test since this shares a script with the many_tasks_smoke_test
-- name: many_nodes
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: many_nodes.yaml
-
-  run:
-    timeout: 3600 # 1hr
-    prepare: python distributed/wait_cluster.py --num-nodes=250
-    script: python distributed/test_many_tasks.py --num-tasks=1000
-
-- name: scheduling_test_many_0s_tasks_single_node
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=0 --total-num-actors=1 --num-actors-per-nodes=1
-
-- name: scheduling_test_many_0s_tasks_many_nodes
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=0 --total-num-actors=32 --num-actors-per-nodes=1
-
-- name: scheduling_test_many_5s_tasks_single_node
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=5 --total-num-actors=1 --num-actors-per-nodes=1
-  stable: false
-
-- name: scheduling_test_many_5s_tasks_many_nodes
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: scheduling.yaml
-
-  run:
-    timeout: 3600
-    prepare: python distributed/wait_cluster.py --num-nodes=32
-    script: python distributed/test_scheduling.py --total-num-task=1984000 --num-cpu-per-task=1 --task-duration-s=5 --total-num-actors=32 --num-actors-per-nodes=1
-  stable: false
diff --git a/release/benchmarks/distributed/wait_cluster.py b/release/benchmarks/distributed/wait_cluster.py
deleted file mode 100644
index 12a8a1677..000000000
--- a/release/benchmarks/distributed/wait_cluster.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import click
-import ray
-import time
-
-
-def num_alive_nodes():
-    n = 0
-    for node in ray.nodes():
-        if node["Alive"]:
-            n += 1
-    return n
-
-
-@click.command()
-@click.option("--num-nodes", required=True, type=int, help="The target number of nodes")
-def wait_cluster(num_nodes: int):
-    ray.init(address="auto")
-    while num_alive_nodes() != num_nodes:
-        print(f"Waiting for nodes: {num_alive_nodes()}/{num_nodes}")
-        time.sleep(5)
-
-
-if __name__ == "__main__":
-    wait_cluster()
diff --git a/release/benchmarks/wait_cluster.py b/release/benchmarks/wait_cluster.py
deleted file mode 100644
index f70088289..000000000
--- a/release/benchmarks/wait_cluster.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/config_generator.html b/release/config_generator.html
deleted file mode 100644
index 179bd6320..000000000
--- a/release/config_generator.html
+++ /dev/null
@@ -1,214 +0,0 @@
-<!doctype html>
-<html>
-<head>
-    <meta charset="utf-8">
-    <title>Releaser config generator</title>
-    <style type="text/css">
-        html {
-            background: #cccccc;
-        }
-        body {
-            background: #ffffff;
-            font-family: sans-serif;
-            padding: 1em 2em;
-            max-width: 800px;
-            margin: 0 auto;
-        }
-        textarea {
-            width: 600px;
-            height: 200px;
-        }
-        form .use {
-            white-space: nowrap;
-            padding-right: 1em;
-        }
-        form .val {
-            min-width: 300px;
-        }
-        form .val input {
-            width: 90%;
-        }
-        form .desc {
-        }
-    </style>
-    <script type="text/javascript">
-        var env_vars = [
-            {
-                "name": "RAY_TEST_REPO",
-                "short": "Git repo with test files",
-                "long": "Repository in which the test files are which you would like to run. Note that this doesn't have to be the same repo from which the wheels are installed.",
-                "default": "https://github.com/ray-project/ray.git",
-                "enabled": false,
-            },
-            {
-                "name": "RAY_TEST_BRANCH",
-                "short": "Git branch for test repo",
-                "long": "Git branch that is checked out from RAY_TEST_REPO and which contains the test files you would like to run. Note that this doesnt' have to be the same branch you're fetching the Ray wheels from.",
-                "default": "master",
-                "enabled": false,
-            },
-            {
-                "name": "RAY_REPO",
-                "short": "Git repo for the Ray wheels",
-                "long": "Repository from which to fetch the latest commits to find the Ray wheels",
-                "default": "https://github.com/ray-project/ray.git",
-                "enabled": false,
-            },
-            {
-                "name": "RAY_BRANCH",
-                "short": "Git branch for the Ray wheels",
-                "long": "Branch that is check out from RAY_REPO from which the latest commits are fetched to find the Ray wheels",
-                "default": "master",
-                "enabled": true,
-            },
-            {
-                "name": "RELEASE_TEST_SUITE",
-                "short": "Release test suite (nightly/weekly/manual)",
-                "long": "Release test suite as defined in releaser's build_pipeline.py",
-                "default": "nightly",
-                "enabled": true,
-            },
-            {
-                "name": "FILTER_FILE",
-                "short": "Filter test file by this string",
-                "long": "Only test files (e.g. xgboost_tests.yml) that match this string will be included in the test",
-                "default": "",
-                "enabled": false,
-            },
-            {
-                "name": "FILTER_TEST",
-                "short": "Filter test name by this string",
-                "long": "Only test names (e.g. tune_4x32) that match this string will be included in the test",
-                "default": "",
-                "enabled": false,
-            },
-        ]
-
-        window.addEventListener('load', function () {
-
-            var table = document.getElementById("gen_table");
-
-            for (var env_var of env_vars) {
-
-                var use_td = document.createElement("td");
-                use_td.setAttribute("class", "use");
-
-                var use_input = document.createElement("input");
-                use_input.setAttribute("type", "checkbox");
-                use_input.setAttribute("data-activate", env_var["name"] + "_val");
-                use_input.setAttribute("id", env_var["name"] + "_use");
-                use_input.setAttribute("class", "input_use");
-                if (env_var["enabled"]) {
-                    use_input.checked = true;
-                }
-
-
-                var use_label = document.createElement("label");
-                use_label.setAttribute("for", env_var["name"] + "_use");
-                use_label.innerHTML = env_var["name"];
-
-                use_td.append(use_input);
-                use_td.append(use_label);
-
-                val_td = document.createElement("td");
-                val_td.setAttribute("class", "val");
-
-                val_input = document.createElement("input");
-                val_input.setAttribute("type", "text");
-                if (!env_var["enabled"]) {
-                    val_input.setAttribute("disabled", "disabled");
-                }
-                val_input.setAttribute("id", env_var["name"] + "_val");
-                val_input.setAttribute("name", env_var["name"]);
-                val_input.setAttribute("value", env_var["default"]);
-                val_input.setAttribute("class", "input_val");
-
-                val_td.append(val_input);
-
-                use_input.addEventListener("click", function(e) {
-                    var toggle_val = document.getElementById(e.target.getAttribute("data-activate"))
-
-                    if (toggle_val.disabled) {
-                        toggle_val.removeAttribute("disabled");
-                    } else {
-                        toggle_val.setAttribute("disabled", "disabled");
-                    }
-                    generate_snippet();
-                });
-
-                val_input.addEventListener("change", function() { generate_snippet(); });
-                val_input.addEventListener("keydown", function() { generate_snippet(); });
-                val_input.addEventListener("keyup", function() { generate_snippet(); });
-
-                var desc_td = document.createElement("td");
-                desc_td.setAttribute("class", "desc");
-
-                var desc_a = document.createElement("a");
-                desc_a.setAttribute("title", env_var["long"]);
-                desc_a.innerHTML = env_var["short"];
-
-                desc_td.append(desc_a);
-
-                var tr = document.createElement("tr");
-                tr.append(use_td);
-                tr.append(val_td);
-                tr.append(desc_td);
-
-                table.append(tr);
-            }
-
-            var button = document.getElementById("generate");
-            button.addEventListener("click", function() {
-                generate_snippet();
-            })
-
-            generate_snippet()
-        })
-
-        function generate_snippet() {
-            full_snippet = ""
-            for (env_var of env_vars) {
-                var val_input = document.getElementById(env_var["name"] + "_val")
-
-                if (!val_input.disabled) {
-                    full_snippet += env_var["name"] + "=\"" + val_input.value + "\"\n"
-                }
-            }
-
-            document.getElementById("snippet").innerHTML = full_snippet;
-        }
-
-    </script>
-</head>
-<body>
-<header class="header">
-    <h1>Releaser config generator</h1>
-    <p>Use this form to generate a list of environment variables.</p>
-    <p>These variables can be passed to Buildkite to run a subset of release tests
-    and choose the correct wheels/release test branch</p>
-</header>
-<section class="main">
-    <form id="gen">
-        <table id="gen_table">
-            <tr>
-                <th>Set</th>
-                <th>Value</th>
-                <th>Description</th>
-            </tr>
-
-        </table>
-
-    </form>
-
-    <div>
-        <button id="generate">Generate snippet</button>
-    </div>
-
-    <div>
-        <textarea id="snippet">
-
-        </textarea>
-    </div>
-</section>
-</body>
-</html>
\ No newline at end of file
diff --git a/release/e2e.py b/release/e2e.py
deleted file mode 100644
index 3f458d56a..000000000
--- a/release/e2e.py
+++ /dev/null
@@ -1,2585 +0,0 @@
-"""
-This is an end to end release test automation script used to kick off periodic
-release tests, running on Anyscale.
-
-The tool leverages app configs and compute templates.
-
-Calling this script will run a single release test.
-
-Example:
-
-python e2e.py --test-config ~/ray/release/xgboost_tests/xgboost_tests.yaml --test-name tune_small
-
-The following steps are then performed:
-
-1. It will look up the test tune_small in the file xgboost_tests.yaml
-2. It will fetch the specified app config and compute template and register
-   those with anyscale (if they don’t exist yet)
-3. It waits until the app config is built
-4. It then kicks off the script defined in the run block
-5. When the script is finished, it will fetch the latest logs, the full log
-   output, and any artifacts specified in the artifacts block.
-6. The full logs and artifacts will be stored in a s3 bucket
-7. It will also fetch the json file specified in the run block as results.
-   This is the file where you should write your metrics to.
-8. All results are then stored in a database.
-   Specifically it will store the following fields:
-   - Timestamp
-   - Test name
-   - Status (finished, error, timeout, invalid)
-   - Last logs (50 lines)
-   - results (see above)
-   - artifacts (links to s3 files)
-
-Then the script exits. If an error occurs at any time, a fail result is
-written to the database.
-
-Exit codes
-----------
-The script exits with code 0 on success, i.e. if the test has been run
-end to end without failures and the subsequent results checks have passed.
-In all other cases, an exit code > 0 is returned.
-
-Exit code 1 is the general failure exit code returned by Python when we
-encounter an error that isn't caught by the rest of the script.
-
-Generally, we try to catch errors as they occur, and return a specific exit
-code that can be used in automation tools to e.g. retry a test when nodes
-didn't come up in time.
-
-These exit codes are defined in the ``ExitCode`` enum below.
-
-Writing a new release test
---------------------------
-Each release test requires the following:
-
-1. It has to be added in a release test yaml file, describing meta information
-   about the test (e.g. name, command to run, timeout)
-2. You need an app config yaml
-3. You need a compute template yaml
-4. You need to define a command to run. This is usually a python script.
-   The command should accept (or ignore) a single optional
-   `--smoke-test` argument.
-   Usually the command should write its result metrics to a json file.
-   The json filename is available in the TEST_OUTPUT_JSON env variable.
-5. Add your test in release/.buildkite/build_pipeline.py.
-
-The script will have access to these environment variables:
-
-    "RAY_ADDRESS": os.environ.get("RAY_ADDRESS", "auto")
-    "TEST_OUTPUT_JSON": results_json_filename
-    "IS_SMOKE_TEST": "1" if smoke_test else "0"
-
-For an example, take a look at the XGBoost test suite:
-
-https://github.com/ray-project/ray/blob/master/release/xgboost_tests/xgboost_tests.yaml
-
-These all use the same app configs and similar compute templates. This means
-that app configs can be re-used across runs and only have to be built ones.
-
-App configs and compute templates can interpret environment variables.
-A notable one is the `RAY_WHEELS` variable which points to the wheels that
-should be tested (e.g. latest master wheels). You might want to include
-something like this in your `post_build_cmds`:
-
-  - pip3 uninstall ray -y || true
-  - pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
-
-If you want to force rebuilds, consider using something like
-
-  - echo {{ env["TIMESTAMP"] }}
-
-so that your app configs changes each time the script is executed. If you
-only want to trigger rebuilds once per day, use `DATESTAMP` instead:
-
-  - echo {{ env["DATESTAMP"] }}
-
-Local testing
--------------
-Make sure to set these environment variables:
-
-- ANYSCALE_CLI_TOKEN (should contain your anyscale credential token)
-- ANYSCALE_PROJECT (should point to a project ID you have access to)
-
-A test can then be run like this:
-
-python e2e.py --test-config ~/ray/release/xgboost_tests/xgboost_tests.yaml --test-name tune_small
-
-Using Compilation on Product + App Config Override
---------------------------------------------------
-For quick iteration when debugging a release test, go/compile-on-product allows
-you to easily modify and recompile Ray, such that the recompilation happens
-within an app build step and can benefit from a warm Bazel cache. See
-go/compile-on-product for more information.
-
-After kicking off the app build, you can give the app config ID to this script
-as an app config override, where the indicated app config will be used instead
-of the app config given in the test config. E.g., running
-
-python e2e.py --test-config ~/ray/benchmarks/benchmark_tests.yaml --test-name=single_node --app-config-id-override=apt_TBngEXXXrhipMXgexVcrpC9i
-
-would run the single_node benchmark test with the apt_TBngEXXXrhipMXgexVcrpC9i
-app config instead of the app config given in
-~/ray/benchmarks/benchmark_tests.yaml. If the build for the app config is still
-in progress, the script will wait until it completes, same as for a locally
-defined app config.
-
-Running on Head Node vs Running with Anyscale Connect
------------------------------------------------------
-By default release tests run their drivers on the head node. Support is being
-added to run release tests that execute the driver as a subprocess and run
-the workload on Anyscale product via Anyscale connect.
-Note that when the driver in the test is a subprocess of releaser, releaser
-cannot be terminated before the test finishes.
-Other known feature gaps when running with Anyscale connect:
-- Kicking off a test or checking progress is not supported.
-- Downloading / uploading logs and artifacts are unsupported.
-- Logs from remote may not have finished streaming, before the driver exits.
-
-Long running tests
-------------------
-Long running tests can be kicked off with by adding the --kick-off-only
-parameters to the e2e script. The status can then be checked with the
---check command.
-
-Long running test sessions will be terminated after `timeout` seconds, after
-which the latest result in the TEST_OUTPUT_JSON will be reported. Thus,
-long running release tests should update this file periodically.
-
-There are also two config options to configure behavior. The `time_key` is
-needed to track the latest update of the TEST_OUTPUT_JSON and should contain
-a floating point number (usually `time.time()`). The `max_update_delay` then
-specified the maximum time in seconds that can be passed without an update
-to the results json. If the output file hasn't been updated in e.g. 60 seconds,
-this could indicate that the command is stale/frozen, and thus should fail.
-
-Release test yaml example
--------------------------
-- name: example
-  owner:
-    mail: "kai@anyscale.com"  # Currently not used
-    slack: "@tune-team"  # Currentl not used
-
-  cluster:
-    app_config: app_config.yaml  # Relative to the release test yaml
-    compute_template: tpl_cpu.yaml
-
-  run:
-    timeout: 600  # in seconds
-    prepare: python wait_cluster.py 4 600  # prepare cmd to run before test
-    script: python workloads/train.py  # actual release test command
-
-    # Only needed for long running test
-    time_key: last_update  # Key in the results json indicating current time
-    max_update_delay: 30  # If state hasn't been updated in 30s, terminate
-
-  # This block is optional
-  artifacts:
-    # Artifact name: location on head node
-    - detailed_output: detailed_output.csv
-
-  # This block is optional. If present, the contents will be
-  # deep updated for smoke testing
-  smoke_test:
-    cluster:
-      compute_template: tpl_cpu_smoketest.yaml
-
-"""  # noqa: E501
-import argparse
-import enum
-import random
-import shlex
-import string
-
-import boto3
-import collections
-import copy
-import datetime
-import hashlib
-import jinja2
-import json
-import logging
-import multiprocessing
-import os
-import requests
-import shutil
-import subprocess
-import sys
-import re
-import tempfile
-import time
-from queue import Empty
-from typing import Any, Dict, Optional, Tuple, List
-
-import yaml
-
-import anyscale
-import anyscale.conf
-from anyscale.authenticate import get_auth_api_client
-from anyscale.controllers.session_controller import SessionController
-from anyscale.sdk.anyscale_client.sdk import AnyscaleSDK
-
-logger = logging.getLogger()
-logger.setLevel(logging.INFO)
-handler = logging.StreamHandler(stream=sys.stdout)
-formatter = logging.Formatter(
-    fmt="[%(levelname)s %(asctime)s] " "%(filename)s: %(lineno)d  " "%(message)s"
-)
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-def _format_link(link: str):
-    # Use ANSI escape code to allow link to be clickable
-    # https://buildkite.com/docs/pipelines/links-and-images
-    # -in-log-output
-    return "\033]1339;url='" + link + "'\a\n"
-
-
-def getenv_default(key: str, default: Optional[str] = None):
-    """Return environment variable with default value"""
-    # If the environment variable is set but "", still return default
-    return os.environ.get(key, None) or default
-
-
-GLOBAL_CONFIG = {
-    "ANYSCALE_USER": getenv_default("ANYSCALE_USER", "release-automation@anyscale.com"),
-    "ANYSCALE_HOST": getenv_default("ANYSCALE_HOST", "https://console.anyscale.com"),
-    "ANYSCALE_CLI_TOKEN": getenv_default("ANYSCALE_CLI_TOKEN"),
-    "ANYSCALE_CLOUD_ID": getenv_default(
-        "ANYSCALE_CLOUD_ID", "cld_4F7k8814aZzGG8TNUGPKnc"
-    ),  # anyscale_default_cloud
-    "ANYSCALE_PROJECT": getenv_default("ANYSCALE_PROJECT", ""),
-    "RAY_VERSION": getenv_default("RAY_VERSION", "2.0.0.dev0"),
-    "RAY_REPO": getenv_default("RAY_REPO", "https://github.com/ray-project/ray.git"),
-    "RAY_BRANCH": getenv_default("RAY_BRANCH", "master"),
-    "RELEASE_AWS_BUCKET": getenv_default(
-        "RELEASE_AWS_BUCKET", "ray-release-automation-results"
-    ),
-    "RELEASE_AWS_LOCATION": getenv_default("RELEASE_AWS_LOCATION", "dev"),
-    "RELEASE_AWS_DB_NAME": getenv_default("RELEASE_AWS_DB_NAME", "ray_ci"),
-    "RELEASE_AWS_DB_TABLE": getenv_default(
-        "RELEASE_AWS_DB_TABLE", "release_test_result"
-    ),
-    "RELEASE_AWS_DB_SECRET_ARN": getenv_default(
-        "RELEASE_AWS_DB_SECRET_ARN",
-        "arn:aws:secretsmanager:us-west-2:029272617770:secret:"
-        "rds-db-credentials/cluster-7RB7EYTTBK2EUC3MMTONYRBJLE/ray_ci-MQN2hh",
-    ),
-    "RELEASE_AWS_DB_RESOURCE_ARN": getenv_default(
-        "RELEASE_AWS_DB_RESOURCE_ARN",
-        "arn:aws:rds:us-west-2:029272617770:cluster:ci-reporting",
-    ),
-    "RELEASE_RESULTS_DIR": getenv_default(
-        "RELEASE_RESULTS_DIR", "/tmp/ray_release_test_artifacts"
-    ),
-    "DATESTAMP": str(datetime.datetime.now().strftime("%Y%m%d")),
-    "TIMESTAMP": str(int(datetime.datetime.now().timestamp())),
-    "EXPIRATION_1D": str(
-        (datetime.datetime.now() + datetime.timedelta(days=1)).strftime("%Y-%m-%d")
-    ),
-    "EXPIRATION_2D": str(
-        (datetime.datetime.now() + datetime.timedelta(days=2)).strftime("%Y-%m-%d")
-    ),
-    "EXPIRATION_3D": str(
-        (datetime.datetime.now() + datetime.timedelta(days=3)).strftime("%Y-%m-%d")
-    ),
-    "REPORT_RESULT": getenv_default("REPORT_RESULT", ""),
-}
-
-REPORT_S = 30
-RETRY_MULTIPLIER = 2
-VALID_TEAMS = ["ml", "core", "serve"]
-
-
-class ExitCode(enum.Enum):
-    # If you change these, also change the `retry` section
-    # in `build_pipeline.py` and the `reason()` function in `run_e2e.sh`
-    UNSPECIFIED = 2
-    UNKNOWN = 3
-    RUNTIME_ERROR = 4
-    COMMAND_ERROR = 5
-    COMMAND_TIMEOUT = 6
-    PREPARE_TIMEOUT = 7
-    FILESYNC_TIMEOUT = 8
-    SESSION_TIMEOUT = 9
-    PREPARE_ERROR = 10
-    APPCONFIG_BUILD_ERROR = 11
-    INFRA_ERROR = 12
-
-
-def exponential_backoff_retry(f, retry_exceptions, initial_retry_delay_s, max_retries):
-    retry_cnt = 0
-    retry_delay_s = initial_retry_delay_s
-    while True:
-        try:
-            return f()
-        except retry_exceptions as e:
-            retry_cnt += 1
-            if retry_cnt > max_retries:
-                raise
-            logger.info(
-                f"Retry function call failed due to {e} "
-                f"in {retry_delay_s} seconds..."
-            )
-            time.sleep(retry_delay_s)
-            retry_delay_s *= RETRY_MULTIPLIER
-
-
-def maybe_fetch_api_token():
-    if GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"] is None:
-        logger.info("Missing ANYSCALE_CLI_TOKEN, retrieving from AWS secrets store")
-        # NOTE(simon) This should automatically retrieve
-        # release-automation@anyscale.com's anyscale token
-        GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"] = boto3.client(
-            "secretsmanager", region_name="us-west-2"
-        ).get_secret_value(
-            SecretId="arn:aws:secretsmanager:us-west-2:029272617770:secret:"
-            "release-automation/"
-            "anyscale-token20210505220406333800000001-BcUuKB"
-        )[
-            "SecretString"
-        ]
-
-
-class PrepareCommandRuntimeError(RuntimeError):
-    pass
-
-
-class ReleaseTestRuntimeError(RuntimeError):
-    pass
-
-
-class ReleaseTestInfraError(ReleaseTestRuntimeError):
-    pass
-
-
-class ReleaseTestTimeoutError(ReleaseTestRuntimeError):
-    pass
-
-
-class SessionTimeoutError(ReleaseTestTimeoutError):
-    pass
-
-
-class FileSyncTimeoutError(ReleaseTestTimeoutError):
-    pass
-
-
-class CommandTimeoutError(ReleaseTestTimeoutError):
-    pass
-
-
-class PrepareCommandTimeoutError(ReleaseTestTimeoutError):
-    pass
-
-
-# e.g., App config failure.
-class AppConfigBuildFailure(RuntimeError):
-    pass
-
-
-class State:
-    def __init__(self, state: str, timestamp: float, data: Any):
-        self.state = state
-        self.timestamp = timestamp
-        self.data = data
-
-
-class CommandRunnerHack:
-    def __init__(self):
-        self.subprocess_pool: Dict[int, subprocess.Popen] = dict()
-        self.start_time: Dict[int, float] = dict()
-        self.counter = 0
-
-    def run_command(self, session_name, cmd_to_run, env_vars) -> int:
-        self.counter += 1
-        command_id = self.counter
-        env = os.environ.copy()
-        env["RAY_ADDRESS"] = f"anyscale://{session_name}"
-        env["ANYSCALE_CLI_TOKEN"] = GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"]
-        env["ANYSCALE_HOST"] = GLOBAL_CONFIG["ANYSCALE_HOST"]
-        full_cmd = " ".join(f"{k}={v}" for k, v in env_vars.items()) + " " + cmd_to_run
-        logger.info(f"Executing {cmd_to_run} with {env_vars} via ray job submit")
-        proc = subprocess.Popen(
-            f"ray job submit -- bash -c {shlex.quote(full_cmd)}",
-            shell=True,
-            stdout=sys.stdout,
-            stderr=sys.stderr,
-            env=env,
-        )
-        self.subprocess_pool[command_id] = proc
-        self.start_time[command_id] = time.time()
-        return command_id
-
-    def wait_command(self, command_id: int):
-        retcode = self.subprocess_pool[command_id].wait()
-        duration = time.time() - self.start_time[command_id]
-        return retcode, duration
-
-
-global_command_runner = CommandRunnerHack()
-
-
-class S3SyncSessionController(SessionController):
-    def __init__(self, sdk, result_queue):
-        self.sdk = sdk
-        self.result_queue = result_queue
-        self.s3_client = boto3.client("s3")
-        self.bucket = GLOBAL_CONFIG["RELEASE_AWS_BUCKET"]
-        super().__init__()
-
-    def _generate_tmp_s3_path(self):
-        fn = "".join(random.choice(string.ascii_lowercase) for i in range(10))
-        location = f"tmp/{fn}"
-        return location
-
-    def pull(self, session_name, source, target):
-        remote_upload_to = self._generate_tmp_s3_path()
-        # remote source -> s3
-        cid = global_command_runner.run_command(
-            session_name,
-            (
-                f"pip install -q awscli && aws s3 cp {source} "
-                f"s3://{self.bucket}/{remote_upload_to} "
-                "--acl bucket-owner-full-control"
-            ),
-            {},
-        )
-        global_command_runner.wait_command(cid)
-
-        # s3 -> local target
-        self.s3_client.download_file(
-            Bucket=self.bucket,
-            Key=remote_upload_to,
-            Filename=target,
-        )
-
-    def _push_local_dir(self, session_name):
-        remote_upload_to = self._generate_tmp_s3_path()
-        # pack local dir
-        _, local_path = tempfile.mkstemp()
-        shutil.make_archive(local_path, "gztar", os.getcwd())
-        # local source -> s3
-        self.s3_client.upload_file(
-            Filename=local_path + ".tar.gz",
-            Bucket=self.bucket,
-            Key=remote_upload_to,
-        )
-        # s3 -> remote target
-        cid = global_command_runner.run_command(
-            session_name,
-            (
-                "pip install -q awscli && "
-                f"aws s3 cp s3://{self.bucket}/{remote_upload_to} "
-                f"archive.tar.gz && "
-                "tar xf archive.tar.gz"
-            ),
-            {},
-        )
-        global_command_runner.wait_command(cid)
-
-    def push(
-        self,
-        session_name: str,
-        source: Optional[str],
-        target: Optional[str],
-        config: Optional[str],
-        all_nodes: bool,
-        no_warning: bool = False,
-    ):
-        if source is None and target is None:
-            self._push_local_dir(session_name)
-            return
-
-        assert isinstance(source, str)
-        assert isinstance(target, str)
-
-        remote_upload_to = self._generate_tmp_s3_path()
-        # local source -> s3
-        self.s3_client.upload_file(
-            Filename=source,
-            Bucket=self.bucket,
-            Key=remote_upload_to,
-        )
-        # s3 -> remote target
-        cid = global_command_runner.run_command(
-            session_name,
-            "pip install -q awscli && "
-            f"aws s3 cp s3://{self.bucket}/{remote_upload_to} {target}",
-            {},
-        )
-        global_command_runner.wait_command(cid)
-
-
-sys.path.insert(0, anyscale.ANYSCALE_RAY_DIR)
-
-
-def anyscale_project_url(project_id: str):
-    return (
-        f"{GLOBAL_CONFIG['ANYSCALE_HOST']}"
-        f"/o/anyscale-internal/projects/{project_id}"
-        f"/?tab=session-list"
-    )
-
-
-def anyscale_session_url(project_id: str, session_id: str):
-    return (
-        f"{GLOBAL_CONFIG['ANYSCALE_HOST']}"
-        f"/o/anyscale-internal/projects/{project_id}"
-        f"/clusters/{session_id}"
-    )
-
-
-def anyscale_compute_tpl_url(compute_tpl_id: str):
-    return (
-        f"{GLOBAL_CONFIG['ANYSCALE_HOST']}"
-        f"/o/anyscale-internal/configurations/cluster-computes"
-        f"/{compute_tpl_id}"
-    )
-
-
-def anyscale_app_config_build_url(build_id: str):
-    return (
-        f"{GLOBAL_CONFIG['ANYSCALE_HOST']}"
-        f"/o/anyscale-internal/configurations/app-config-details"
-        f"/{build_id}"
-    )
-
-
-def wheel_url(ray_version, git_branch, git_commit):
-    return (
-        f"https://s3-us-west-2.amazonaws.com/ray-wheels/"
-        f"{git_branch}/{git_commit}/"
-        f"ray-{ray_version}-cp37-cp37m-manylinux2014_x86_64.whl"
-    )
-
-
-def wheel_exists(ray_version, git_branch, git_commit):
-    url = wheel_url(ray_version, git_branch, git_commit)
-    return requests.head(url).status_code == 200
-
-
-def commit_or_url(commit_or_url: str) -> str:
-    if commit_or_url.startswith("http"):
-        url = None
-        # Directly return the S3 url
-        if "s3" in commit_or_url and "amazonaws.com" in commit_or_url:
-            url = commit_or_url
-        # Resolve the redirects for buildkite artifacts
-        # This is needed because otherwise pip won't recognize the file name.
-        elif "buildkite.com" in commit_or_url and "artifacts" in commit_or_url:
-            url = requests.head(commit_or_url, allow_redirects=True).url
-        if url is not None:
-            # Extract commit from url so that we can do the
-            # commit sanity check later.
-            p = re.compile("/([a-f0-9]{40})/")
-            m = p.search(url)
-            if m is not None:
-                os.environ["RAY_COMMIT"] = m.group(1)
-            return url
-
-    # Else, assume commit
-    os.environ["RAY_COMMIT"] = commit_or_url
-    return wheel_url(
-        GLOBAL_CONFIG["RAY_VERSION"], GLOBAL_CONFIG["RAY_BRANCH"], commit_or_url
-    )
-
-
-def get_latest_commits(repo: str, branch: str = "master") -> List[str]:
-    cur = os.getcwd()
-    with tempfile.TemporaryDirectory() as tmpdir:
-        os.chdir(tmpdir)
-
-        clone_cmd = [
-            "git",
-            "clone",
-            "--filter=tree:0",
-            "--no-checkout",
-            # "--single-branch",
-            # "--depth=10",
-            f"--branch={branch}",
-            repo,
-            tmpdir,
-        ]
-        log_cmd = [
-            "git",
-            "log",
-            "-n",
-            "10",
-            "--pretty=format:%H",
-        ]
-
-        subprocess.check_output(clone_cmd)
-        commits = (
-            subprocess.check_output(log_cmd).decode(sys.stdout.encoding).split("\n")
-        )
-    os.chdir(cur)
-    return commits
-
-
-def find_ray_wheels(repo: str, branch: str, version: str):
-    url = None
-    commits = get_latest_commits(repo, branch)
-    logger.info(f"Latest 10 commits for branch {branch}: {commits}")
-    for commit in commits:
-        if wheel_exists(version, branch, commit):
-            url = wheel_url(version, branch, commit)
-            os.environ["RAY_WHEELS"] = url
-            os.environ["RAY_COMMIT"] = commit
-            logger.info(
-                f"Found wheels URL for Ray {version}, branch {branch}: " f"{url}"
-            )
-            break
-    return url
-
-
-def populate_wheels_sanity_check(commit: Optional[str] = None):
-    if not commit:
-        cmd = (
-            "python -c 'import ray; print("
-            '"No commit sanity check available, but this is the '
-            "Ray wheel commit:\", ray.__commit__)'"
-        )
-    else:
-        cmd = (
-            f"python -c 'import ray; "
-            f'assert ray.__commit__ == "{commit}", ray.__commit__\''
-        )
-    os.environ["RAY_WHEELS_SANITY_CHECK"] = cmd
-
-
-def _check_stop(stop_event: multiprocessing.Event, timeout_type: str):
-    if stop_event.is_set():
-        if timeout_type == "prepare_command":
-            raise PrepareCommandTimeoutError(
-                "Process timed out in the prepare command stage."
-            )
-        if timeout_type == "command":
-            raise CommandTimeoutError("Process timed out while running a command.")
-        elif timeout_type == "file_sync":
-            raise FileSyncTimeoutError("Process timed out while syncing files.")
-        elif timeout_type == "session":
-            raise SessionTimeoutError("Process timed out while starting a session.")
-        else:
-            assert False, "Unexpected timeout type."
-
-
-def _deep_update(d, u):
-    for k, v in u.items():
-        if isinstance(v, collections.abc.Mapping):
-            d[k] = _deep_update(d.get(k, {}), v)
-        else:
-            d[k] = v
-    return d
-
-
-def _dict_hash(dt: Dict[Any, Any]) -> str:
-    json_str = json.dumps(dt, sort_keys=True, ensure_ascii=True)
-    sha = hashlib.sha256()
-    sha.update(json_str.encode())
-    return sha.hexdigest()
-
-
-def _load_config(local_dir: str, config_file: Optional[str]) -> Optional[Dict]:
-    if not config_file:
-        return None
-
-    config_path = os.path.join(local_dir, config_file)
-    with open(config_path, "rt") as f:
-        # Todo: jinja2 render
-        content = f.read()
-
-    env = copy.deepcopy(os.environ)
-    env.update(GLOBAL_CONFIG)
-
-    content = jinja2.Template(content).render(env=env)
-    return yaml.safe_load(content)
-
-
-def has_errored(result: Dict[Any, Any]) -> bool:
-    return result.get("status", "invalid") != "finished"
-
-
-def maybe_get_alert_for_result(result_dict: Dict[str, Any]) -> Optional[str]:
-    # If we get a result dict, check if any alerts should be raised
-    from alert import SUITE_TO_FN, default_handle_result
-
-    logger.info("Checking if results are valid...")
-
-    # Copy dict because we modify kwargs here
-    handle_result_kwargs = result_dict.copy()
-    handle_result_kwargs["created_on"] = None
-
-    test_suite = handle_result_kwargs.get("test_suite", None)
-
-    handle_fn = SUITE_TO_FN.get(test_suite, None)
-    if not handle_fn:
-        logger.warning(f"No handle for suite {test_suite}")
-        alert = default_handle_result(**handle_result_kwargs)
-    else:
-        alert = handle_fn(**handle_result_kwargs)
-
-    return alert
-
-
-def report_result(
-    *,
-    test_suite: str,
-    test_name: str,
-    status: str,
-    last_logs: str,
-    results: Dict[Any, Any],
-    artifacts: Dict[Any, Any],
-    category: str,
-    team: str,
-):
-    #   session_url: str, commit_url: str,
-    #   runtime: float, stable: bool, frequency: str, return_code: int):
-    """Report the test result to database."""
-    now = datetime.datetime.utcnow()
-    rds_data_client = boto3.client("rds-data", region_name="us-west-2")
-
-    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_TABLE"]
-
-    parameters = [
-        {
-            "name": "created_on",
-            "typeHint": "TIMESTAMP",
-            "value": {"stringValue": now.strftime("%Y-%m-%d %H:%M:%S")},
-        },
-        {"name": "test_suite", "value": {"stringValue": test_suite}},
-        {"name": "test_name", "value": {"stringValue": test_name}},
-        {"name": "status", "value": {"stringValue": status}},
-        {"name": "last_logs", "value": {"stringValue": last_logs}},
-        {
-            "name": "results",
-            "typeHint": "JSON",
-            "value": {"stringValue": json.dumps(results)},
-        },
-        {
-            "name": "artifacts",
-            "typeHint": "JSON",
-            "value": {"stringValue": json.dumps(artifacts)},
-        },
-        {"name": "category", "value": {"stringValue": category}},
-        {"name": "team", "value": {"stringValue": team}},
-    ]
-    columns = [param["name"] for param in parameters]
-    values = [f":{param['name']}" for param in parameters]
-    column_str = ", ".join(columns).strip(", ")
-    value_str = ", ".join(values).strip(", ")
-
-    sql = f"INSERT INTO {schema} " f"({column_str}) " f"VALUES ({value_str})"
-
-    logger.info(f"Query: {sql}")
-
-    # Default boto3 call timeout is 45 seconds.
-    retry_delay_s = 64
-    MAX_RDS_RETRY = 3
-    exponential_backoff_retry(
-        lambda: rds_data_client.execute_statement(
-            database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
-            parameters=parameters,
-            secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
-            resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
-            schema=schema,
-            sql=sql,
-        ),
-        retry_exceptions=rds_data_client.exceptions.StatementTimeoutException,
-        initial_retry_delay_s=retry_delay_s,
-        max_retries=MAX_RDS_RETRY,
-    )
-    logger.info("Result has been persisted to the database")
-
-    # TODO(jjyao) Migrate to new infra later
-    logger.info("Persisting results to the databricks delta lake...")
-
-    result_json = {
-        "_table": "release_test_result",
-        "created_on": now.strftime("%Y-%m-%d %H:%M:%S"),
-        "status": status,
-        "results": results,
-        "test_name": test_name,
-        "team": team,
-        "cluster_url": results["_session_url"],
-        "wheel_url": results["_commit_url"],
-        "runtime": results["_runtime"],
-        "stable": results["_stable"],
-    }
-
-    logger.debug(f"Result json: {json.dumps(result_json)}")
-
-    firehose_client = boto3.client("firehose", region_name="us-west-2")
-    firehose_client.put_record(
-        DeliveryStreamName="ray-ci-results", Record={"Data": json.dumps(result_json)}
-    )
-
-    logger.info("Result has been persisted to the databricks delta lake")
-
-
-def log_results_and_artifacts(result: Dict):
-    results = result.get("results", {})
-    if results:
-        msg = "Observed the following results:\n\n"
-
-        for key, val in results.items():
-            msg += f"  {key} = {val}\n"
-    else:
-        msg = "Did not find any results."
-    logger.info(msg)
-
-    artifacts = result.get("artifacts", {})
-    if artifacts:
-        msg = "Saved the following artifacts:\n\n"
-
-        for key, val in artifacts.items():
-            msg += f"  {key} = {val}\n"
-    else:
-        msg = "Did not find any artifacts."
-    logger.info(msg)
-
-
-def _cleanup_session(sdk: AnyscaleSDK, session_id: str):
-    if session_id:
-        # Just trigger a request. No need to wait until session shutdown.
-        sdk.terminate_session(session_id=session_id, terminate_session_options={})
-
-
-def search_running_session(
-    sdk: AnyscaleSDK, project_id: str, session_name: str
-) -> Optional[str]:
-    session_id = None
-
-    logger.info(f"Looking for existing session with name {session_name}")
-
-    result = sdk.search_sessions(
-        project_id=project_id, sessions_query=dict(name=dict(equals=session_name))
-    )
-
-    if len(result.results) > 0 and result.results[0].state == "Running":
-        logger.info("Found existing session.")
-        session_id = result.results[0].id
-    return session_id
-
-
-def find_cloud_by_name(
-    sdk: AnyscaleSDK, cloud_name: str, _repeat: bool = True
-) -> Optional[str]:
-    cloud_id = None
-    logger.info(f"Looking up cloud with name `{cloud_name}`. ")
-
-    paging_token = None
-    while not cloud_id:
-        result = sdk.search_clouds(
-            clouds_query=dict(paging=dict(count=50, paging_token=paging_token))
-        )
-
-        paging_token = result.metadata.next_paging_token
-
-        for res in result.results:
-            if res.name == cloud_name:
-                cloud_id = res.id
-                logger.info(f"Found cloud with name `{cloud_name}` as `{cloud_id}`")
-                break
-
-        if not paging_token or cloud_id or not len(result.results):
-            break
-
-    return cloud_id
-
-
-def create_or_find_compute_template(
-    sdk: AnyscaleSDK, project_id: str, compute_tpl: Dict[Any, Any], _repeat: bool = True
-) -> Tuple[Optional[str], Optional[str]]:
-    compute_tpl_id = None
-    compute_tpl_name = None
-    if compute_tpl:
-        # As of Anyscale 0.4.1, it is an error to use the same compute template
-        # name within the same organization, between different projects.
-        compute_tpl_name = f"{project_id}/compute/{_dict_hash(compute_tpl)}"
-
-        logger.info(
-            f"Tests uses compute template "
-            f"with name {compute_tpl_name}. Looking up existing "
-            f"templates."
-        )
-
-        paging_token = None
-        while not compute_tpl_id:
-            result = sdk.search_compute_templates(
-                dict(
-                    project_id=project_id,
-                    name=dict(equals=compute_tpl_name),
-                    include_anonymous=True,
-                ),
-                paging_token=paging_token,
-            )
-            paging_token = result.metadata.next_paging_token
-
-            for res in result.results:
-                if res.name == compute_tpl_name:
-                    compute_tpl_id = res.id
-                    logger.info(f"Template already exists with ID {compute_tpl_id}")
-                    break
-
-            if not paging_token:
-                break
-
-        if not compute_tpl_id:
-            logger.info(
-                f"Compute template not found. "
-                f"Creating with name {compute_tpl_name}."
-            )
-            try:
-                result = sdk.create_compute_template(
-                    dict(
-                        name=compute_tpl_name, project_id=project_id, config=compute_tpl
-                    )
-                )
-                compute_tpl_id = result.result.id
-            except Exception as e:
-                if _repeat:
-                    logger.warning(
-                        f"Got exception when trying to create compute "
-                        f"template: {e}. Sleeping for 10 seconds and then "
-                        f"try again once..."
-                    )
-                    time.sleep(10)
-                    return create_or_find_compute_template(
-                        sdk=sdk,
-                        project_id=project_id,
-                        compute_tpl=compute_tpl,
-                        _repeat=False,
-                    )
-
-                raise e
-
-            logger.info(f"Compute template created with ID {compute_tpl_id}")
-
-    return compute_tpl_id, compute_tpl_name
-
-
-def create_or_find_app_config(
-    sdk: AnyscaleSDK, project_id: str, app_config: Dict[Any, Any], _repeat: bool = True
-) -> Tuple[Optional[str], Optional[str]]:
-    app_config_id = None
-    app_config_name = None
-    if app_config:
-        app_config_name = f"{project_id}-{_dict_hash(app_config)}"
-
-        logger.info(
-            f"Test uses an app config with hash {app_config_name}. "
-            f"Looking up existing app configs with this name."
-        )
-
-        paging_token = None
-        while not app_config_id:
-            result = sdk.list_app_configs(
-                project_id=project_id, count=50, paging_token=paging_token
-            )
-            paging_token = result.metadata.next_paging_token
-
-            for res in result.results:
-                if res.name == app_config_name:
-                    app_config_id = res.id
-                    logger.info(f"App config already exists with ID {app_config_id}")
-                    break
-
-            if not paging_token or app_config_id:
-                break
-
-        if not app_config_id:
-            logger.info("App config not found. Creating new one.")
-            try:
-                result = sdk.create_app_config(
-                    dict(
-                        name=app_config_name,
-                        project_id=project_id,
-                        config_json=app_config,
-                    )
-                )
-                app_config_id = result.result.id
-            except Exception as e:
-                if _repeat:
-                    logger.warning(
-                        f"Got exception when trying to create app "
-                        f"config: {e}. Sleeping for 10 seconds and then "
-                        f"try again once..."
-                    )
-                    time.sleep(10)
-                    return create_or_find_app_config(
-                        sdk=sdk,
-                        project_id=project_id,
-                        app_config=app_config,
-                        _repeat=False,
-                    )
-
-                raise e
-
-            logger.info(f"App config created with ID {app_config_id}")
-
-    return app_config_id, app_config_name
-
-
-def run_bash_script(local_dir: str, bash_script: str):
-    previous_dir = os.getcwd()
-
-    bash_script_local_dir = os.path.dirname(bash_script)
-    file_name = os.path.basename(bash_script)
-
-    full_local_dir = os.path.join(local_dir, bash_script_local_dir)
-    os.chdir(full_local_dir)
-
-    subprocess.run("./" + file_name, shell=True, check=True)
-
-    os.chdir(previous_dir)
-
-
-def install_app_config_packages(app_config: Dict[Any, Any]):
-    os.environ.update(app_config.get("env_vars", {}))
-    packages = app_config["python"]["pip_packages"]
-    for package in packages:
-        subprocess.check_output(["pip", "install", "-U", package], text=True)
-
-
-def install_matching_ray():
-    wheel = os.environ.get("RAY_WHEELS", None)
-    if not wheel:
-        return
-    assert "manylinux2014_x86_64" in wheel, wheel
-    if sys.platform == "darwin":
-        platform = "macosx_10_15_intel"
-    elif sys.platform == "win32":
-        platform = "win_amd64"
-    else:
-        platform = "manylinux2014_x86_64"
-    wheel = wheel.replace("manylinux2014_x86_64", platform)
-    subprocess.check_output(["pip", "uninstall", "-y", "ray"], text=True)
-    subprocess.check_output(["pip", "install", "-U", wheel], text=True)
-
-
-def wait_for_build_or_raise(
-    sdk: AnyscaleSDK, app_config_id: Optional[str]
-) -> Optional[str]:
-    if not app_config_id:
-        return None
-
-    # Fetch build
-    build_id = None
-    last_status = None
-    result = sdk.list_builds(app_config_id)
-    for build in sorted(result.results, key=lambda b: b.created_at):
-        build_id = build.id
-        last_status = build.status
-
-        if build.status == "failed":
-            continue
-
-        if build.status == "succeeded":
-            logger.info(
-                f"Link to app config build: "
-                f"{_format_link(anyscale_app_config_build_url(build_id))}"
-            )
-            return build_id
-
-    if last_status == "failed":
-        raise AppConfigBuildFailure("App config build failed.")
-
-    if not build_id:
-        raise AppConfigBuildFailure("No build found for app config.")
-
-    # Build found but not failed/finished yet
-    completed = False
-    start_wait = time.time()
-    next_report = start_wait + REPORT_S
-    logger.info(f"Waiting for build {build_id} to finish...")
-    logger.info(
-        f"Track progress here: "
-        f"{_format_link(anyscale_app_config_build_url(build_id))}"
-    )
-    while not completed:
-        now = time.time()
-        if now > next_report:
-            logger.info(
-                f"... still waiting for build {build_id} to finish "
-                f"({int(now - start_wait)} seconds) ..."
-            )
-            next_report = next_report + REPORT_S
-
-        result = sdk.get_build(build_id)
-        build = result.result
-
-        if build.status == "failed":
-            raise AppConfigBuildFailure(
-                f"App config build failed. Please see "
-                f"{anyscale_app_config_build_url(build_id)} for details"
-            )
-
-        if build.status == "succeeded":
-            logger.info("Build succeeded.")
-            return build_id
-
-        completed = build.status not in ["in_progress", "pending"]
-
-        if completed:
-            raise AppConfigBuildFailure(
-                f"Unknown build status: {build.status}. Please see "
-                f"{anyscale_app_config_build_url(build_id)} for details"
-            )
-
-        time.sleep(1)
-
-    return build_id
-
-
-def run_job(
-    cluster_name: str,
-    compute_tpl_name: str,
-    cluster_env_name: str,
-    job_name: str,
-    min_workers: str,
-    script: str,
-    script_args: List[str],
-    env_vars: Dict[str, str],
-    autosuspend: int,
-) -> Tuple[int, str]:
-    # Start cluster and job
-    address = f"anyscale://{cluster_name}?autosuspend={autosuspend}"
-    logger.info(f"Starting job {job_name} with Ray address: {address}")
-    env = copy.deepcopy(os.environ)
-    env.update(GLOBAL_CONFIG)
-    env.update(env_vars)
-    env["RAY_ADDRESS"] = address
-    env["RAY_JOB_NAME"] = job_name
-    env["RAY_RELEASE_MIN_WORKERS"] = str(min_workers)
-    proc = subprocess.Popen(
-        script.split(" ") + script_args,
-        env=env,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        text=True,
-    )
-    proc.stdout.reconfigure(line_buffering=True)
-    logs = ""
-    for line in proc.stdout:
-        logs += line
-        sys.stdout.write(line)
-    proc.wait()
-    return proc.returncode, logs
-
-
-def create_and_wait_for_session(
-    sdk: AnyscaleSDK,
-    stop_event: multiprocessing.Event,
-    session_name: str,
-    session_options: Dict[Any, Any],
-    project_id: str,
-) -> str:
-    # Create session
-    logger.info(f"Creating session {session_name}")
-    result = sdk.create_session(session_options)
-    session_id = result.result.id
-
-    # Trigger session start
-    logger.info(f"Starting session {session_name} ({session_id})")
-    session_url = anyscale_session_url(
-        project_id=GLOBAL_CONFIG["ANYSCALE_PROJECT"], session_id=session_id
-    )
-    logger.info(f"URL: {session_url}")
-    logger.info(f"Link to session: {_format_link(session_url)}")
-
-    result = sdk.start_session(session_id, start_session_options={})
-    sop_id = result.result.id
-    completed = result.result.completed
-
-    # Wait for session
-    logger.info(f"Waiting for session {session_name}...")
-    start_wait = time.time()
-    next_report = start_wait + REPORT_S
-    while not completed:
-        # Sleep 1 sec before next check.
-        time.sleep(1)
-
-        session_operation_response = sdk.get_session_operation(
-            sop_id, _request_timeout=30
-        )
-        session_operation = session_operation_response.result
-        completed = session_operation.completed
-
-        try:
-            _check_stop(stop_event, "session")
-        except SessionTimeoutError as e:
-            # Always queue session termination.
-            # We can't do this later as we won't return anything here
-            # and the session ID will not be set in the control loop
-            _cleanup_session(sdk=sdk, session_id=session_id)
-            raise e
-
-        now = time.time()
-        if now > next_report:
-            logger.info(
-                f"... still waiting for session {session_name} "
-                f"({int(now - start_wait)} seconds) ..."
-            )
-            next_report = next_report + REPORT_S
-
-    result = sdk.get_session(session_id)
-    if not result.result.state != "Active":
-        raise ReleaseTestInfraError(
-            f"Cluster did not come up - most likely the nodes are currently "
-            f"not available. Please check the cluster startup logs: "
-            f"{anyscale_session_url(project_id, session_id)}"
-        )
-
-    return session_id
-
-
-def run_session_command(
-    sdk: AnyscaleSDK,
-    session_id: str,
-    cmd_to_run: str,
-    result_queue: multiprocessing.Queue,
-    env_vars: Dict[str, str],
-    state_str: str = "CMD_RUN",
-) -> Tuple[str, int]:
-    full_cmd = " ".join(f"{k}={v}" for k, v in env_vars.items()) + " " + cmd_to_run
-
-    logger.info(f"Running command in session {session_id}: \n" f"{full_cmd}")
-    session_url = anyscale_session_url(
-        project_id=GLOBAL_CONFIG["ANYSCALE_PROJECT"], session_id=session_id
-    )
-    logger.info(f"URL: {session_url}")
-    logger.info(f"Link to session: {_format_link(session_url)}")
-    result_queue.put(State(state_str, time.time(), None))
-    result = sdk.create_session_command(
-        dict(session_id=session_id, shell_command=full_cmd)
-    )
-
-    scd_id = result.result.id
-    return scd_id, result
-
-
-def wait_for_session_command_to_complete(
-    create_session_command_result,
-    sdk: AnyscaleSDK,
-    scd_id: str,
-    stop_event: multiprocessing.Event,
-    state_str: str = "CMD_RUN",
-):
-    result = create_session_command_result
-    completed = result.result.finished_at is not None
-    start_wait = time.time()
-    next_report = start_wait + REPORT_S
-    while not completed:
-        # Sleep 1 sec before next check.
-        time.sleep(1)
-
-        result = exponential_backoff_retry(
-            lambda: sdk.get_session_command(session_command_id=scd_id),
-            retry_exceptions=Exception,
-            initial_retry_delay_s=10,
-            max_retries=3,
-        )
-        completed = result.result.finished_at
-
-        if state_str == "CMD_RUN":
-            _check_stop(stop_event, "command")
-        elif state_str == "CMD_PREPARE":
-            _check_stop(stop_event, "prepare_command")
-
-        now = time.time()
-        if now > next_report:
-            logger.info(
-                f"... still waiting for command to finish "
-                f"({int(now - start_wait)} seconds) ..."
-            )
-            next_report = next_report + REPORT_S
-
-    status_code = result.result.status_code
-    runtime = time.time() - start_wait
-
-    if status_code != 0:
-        if state_str == "CMD_RUN":
-            raise RuntimeError(f"Command returned non-success status: {status_code}")
-        elif state_str == "CMD_PREPARE":
-            raise PrepareCommandRuntimeError(
-                f"Prepare command returned non-success status: {status_code}"
-            )
-
-    return status_code, runtime
-
-
-def get_command_logs(
-    session_controller: SessionController, scd_id: str, lines: int = 50
-):
-    result = exponential_backoff_retry(
-        lambda: session_controller.api_client.get_execution_logs_api_v2_session_commands_session_command_id_execution_logs_get(  # noqa: E501
-            session_command_id=scd_id, start_line=-1 * lines, end_line=0
-        ),
-        retry_exceptions=Exception,
-        initial_retry_delay_s=10,
-        max_retries=3,
-    )
-
-    return result.result.lines
-
-
-def get_remote_json_content(
-    temp_dir: str,
-    session_name: str,
-    remote_file: Optional[str],
-    session_controller: SessionController,
-):
-    if not remote_file:
-        logger.warning("No remote file specified, returning empty dict")
-        return {}
-    local_target_file = os.path.join(temp_dir, ".tmp.json")
-    session_controller.pull(
-        session_name=session_name, source=remote_file, target=local_target_file
-    )
-    with open(local_target_file, "rt") as f:
-        return json.load(f)
-
-
-def get_local_json_content(
-    local_file: Optional[str],
-):
-    if not local_file:
-        logger.warning("No local file specified, returning empty dict")
-        return {}
-    with open(local_file, "rt") as f:
-        return json.load(f)
-
-
-def pull_artifacts_and_store_in_cloud(
-    temp_dir: str,
-    logs: str,
-    session_name: str,
-    test_name: str,
-    artifacts: Optional[Dict[Any, Any]],
-    session_controller: SessionController,
-):
-    output_log_file = os.path.join(temp_dir, "output.log")
-    with open(output_log_file, "wt") as f:
-        f.write(logs)
-
-    bucket = GLOBAL_CONFIG["RELEASE_AWS_BUCKET"]
-    location = f"{GLOBAL_CONFIG['RELEASE_AWS_LOCATION']}" f"/{session_name}/{test_name}"
-    saved_artifacts = {}
-
-    s3_client = boto3.client("s3")
-    s3_client.upload_file(output_log_file, bucket, f"{location}/output.log")
-    saved_artifacts["output.log"] = f"s3://{bucket}/{location}/output.log"
-
-    # Download artifacts
-    if artifacts:
-        for name, remote_file in artifacts.items():
-            logger.info(f"Downloading artifact `{name}` from " f"{remote_file}")
-            local_target_file = os.path.join(temp_dir, name)
-            session_controller.pull(
-                session_name=session_name, source=remote_file, target=local_target_file
-            )
-
-            # Upload artifacts to s3
-            s3_client.upload_file(local_target_file, bucket, f"{location}/{name}")
-            saved_artifacts[name] = f"s3://{bucket}/{location}/{name}"
-
-    return saved_artifacts
-
-
-def find_session_by_test_name(
-    sdk: AnyscaleSDK,
-    session_controller: SessionController,
-    temp_dir: str,
-    state_json: str,
-    project_id: str,
-    test_name: str,
-) -> Optional[Tuple[str, str, Dict[Any, Any]]]:
-    paging_token = None
-
-    while True:  # Will break if paging_token is None after first search
-        result = sdk.search_sessions(
-            project_id=project_id,
-            sessions_query=dict(
-                name=dict(contains=test_name),
-                state_filter=["Running"],
-                paging=dict(count=20, paging_token=paging_token),
-            ),
-        )
-
-        for session in result.results:
-            logger.info(f"Found sessions {session.name}")
-            if not session.name.startswith(test_name):
-                continue
-
-            try:
-                session_state = get_remote_json_content(
-                    temp_dir=temp_dir,
-                    session_name=session.name,
-                    remote_file=state_json,
-                    session_controller=session_controller,
-                )
-            except Exception as exc:
-                raise RuntimeError(
-                    f"Could not get remote json content " f"for session {session.name}"
-                ) from exc
-
-            if session_state.get("test_name") == test_name:
-                return session.id, session.name, session_state
-
-        session_token = result.metadata.next_paging_token
-
-        if not session_token:
-            return None
-
-
-def get_latest_running_command_id(
-    sdk: AnyscaleSDK, session_id: str
-) -> Tuple[Optional[str], Optional[bool]]:
-    scd_id = None
-    paging_token = None
-
-    success = None
-
-    while not scd_id:
-        result = sdk.list_session_commands(
-            session_id=session_id, paging_token=paging_token
-        )
-
-        paging_token = result.metadata.next_paging_token
-
-        for cmd in result.results:
-            if not scd_id:
-                scd_id = cmd.id
-
-            completed = cmd.finished_at is not None
-
-            if completed:
-                if success is None:
-                    success = True
-
-                success = success and cmd.status_code == 0
-
-            if not completed:
-                return cmd.id, None
-
-    return scd_id, success or False
-
-
-def run_test_config(
-    local_dir: str,
-    project_id: str,
-    test_name: str,
-    test_config: Dict[Any, Any],
-    commit_url: str,
-    session_name: str = None,
-    smoke_test: bool = False,
-    no_terminate: bool = False,
-    kick_off_only: bool = False,
-    check_progress: bool = False,
-    upload_artifacts: bool = True,
-    keep_results_dir: bool = False,
-    app_config_id_override: Optional[str] = None,
-) -> Dict[Any, Any]:
-    """
-
-    Returns:
-        Dict with the following entries:
-            status (str): One of [finished, error, timeout]
-            command_link (str): Link to command (Anyscale web UI)
-            last_logs (str): Last logs (excerpt) to send to owner
-            artifacts (dict): Dict of artifacts
-                Key: Name
-                Value: S3 URL
-    """
-    stop_event = multiprocessing.Event()
-    result_queue = multiprocessing.Queue()
-
-    if not session_name:
-        session_name = f"{test_name}_{int(time.time())}"
-
-    temp_dir = tempfile.mkdtemp()
-
-    # Result and state files
-    results_json = test_config["run"].get("results", None)
-    if results_json is None:
-        results_json = "/tmp/release_test_out.json"
-
-    state_json = test_config["run"].get("state", None)
-    if state_json is None:
-        state_json = "/tmp/release_test_state.json"
-
-    env_vars = {
-        "RAY_ADDRESS": os.environ.get("RAY_ADDRESS", "auto"),
-        "TEST_OUTPUT_JSON": results_json,
-        "TEST_STATE_JSON": state_json,
-        "IS_SMOKE_TEST": "1" if smoke_test else "0",
-    }
-
-    with open(os.path.join(local_dir, ".anyscale.yaml"), "wt") as f:
-        f.write(f"project_id: {project_id}")
-    os.chdir(local_dir)
-
-    # Setup interface
-    # Unfortunately, there currently seems to be no great way to
-    # transfer files with the Anyscale SDK.
-    # So we use the session controller instead.
-    sdk = AnyscaleSDK(
-        auth_token=GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"],
-        host=GLOBAL_CONFIG["ANYSCALE_HOST"],
-    )
-
-    get_auth_api_client(
-        cli_token=GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"],
-        host=GLOBAL_CONFIG["ANYSCALE_HOST"],
-    )
-    on_k8s = test_config["cluster"].get("compute_on_k8s")
-    if on_k8s:
-        session_controller = S3SyncSessionController(sdk, result_queue)
-    else:
-        session_controller = SessionController()
-
-    cloud_id = test_config["cluster"].get("cloud_id", None)
-    cloud_name = test_config["cluster"].get("cloud_name", None)
-    if cloud_id and cloud_name:
-        raise RuntimeError(
-            f"You can't supply both a `cloud_name` ({cloud_name}) and a "
-            f"`cloud_id` ({cloud_id}) in the test cluster configuration. "
-            f"Please provide only one."
-        )
-    elif cloud_name and not cloud_id:
-        cloud_id = find_cloud_by_name(sdk, cloud_name)
-        if not cloud_id:
-            raise RuntimeError(f"Couldn't find cloud with name `{cloud_name}`.")
-    else:
-        cloud_id = cloud_id or GLOBAL_CONFIG["ANYSCALE_CLOUD_ID"]
-
-    # Overwrite global config so that `_load_config` sets the correct cloud
-    GLOBAL_CONFIG["ANYSCALE_CLOUD_ID"] = cloud_id
-
-    cluster_config_rel_path = test_config["cluster"].get("cluster_config", None)
-    cluster_config = _load_config(local_dir, cluster_config_rel_path)
-
-    app_config_rel_path = test_config["cluster"].get("app_config", None)
-    app_config = _load_config(local_dir, app_config_rel_path)
-    if app_config.get("env_vars") is None:
-        app_config["env_vars"] = {}
-    # A lot of staging tests share the same app config yaml, except the flags.
-    # `app_env_vars` in test config will help this one.
-    # Here we extend the env_vars to use the one specified in the test config.
-    if test_config.get("app_env_vars") is not None:
-        app_config["env_vars"].update(test_config["app_env_vars"])
-        logger.info(f"Using app config:\n{app_config}")
-
-    # Flags for redisless ray.
-    # TODO: remove them once done.
-    app_config["env_vars"]["MATCH_AUTOSCALER_AND_RAY_IMAGES"] = "1"
-    app_config["env_vars"]["RAY_bootstrap_with_gcs"] = "1"
-    app_config["env_vars"]["RAY_gcs_storage"] = "memory"
-    app_config["env_vars"]["RAY_USAGE_STATS_ENABLED"] = "1"
-    app_config["env_vars"]["RAY_USAGE_STATS_SOURCE"] = "nightly-tests"
-
-    compute_tpl_rel_path = test_config["cluster"].get("compute_template", None)
-    compute_tpl = _load_config(local_dir, compute_tpl_rel_path)
-
-    timeout = test_config["run"].get("timeout", 1800)
-    if "RELEASE_OVERRIDE_TIMEOUT" in os.environ:
-        previous_timeout = timeout
-        timeout = int(os.environ.get("RELEASE_OVERRIDE_TIMEOUT", str(timeout)))
-        logger.warning(
-            f"Release test timeout override: {timeout} "
-            f"(would have been {previous_timeout})"
-        )
-
-    # If a test is long running, timeout does not mean it failed
-    is_long_running = test_config["run"].get("long_running", False)
-
-    build_id_override = None
-    if test_config["run"].get("use_connect"):
-        autosuspend_mins = test_config["run"].get("autosuspend_mins", 5)
-        assert not kick_off_only, "Unsupported for running with Anyscale connect."
-        if app_config_id_override is not None:
-            logger.info(
-                "Using connect and an app config override, waiting until "
-                "build finishes so we can fetch the app config in order to "
-                "install its pip packages locally."
-            )
-            build_id_override = wait_for_build_or_raise(sdk, app_config_id_override)
-            response = sdk.get_cluster_environment_build(build_id_override)
-            app_config = response.result.config_json
-        install_app_config_packages(app_config)
-        install_matching_ray()
-    elif "autosuspend_mins" in test_config["run"]:
-        raise ValueError(
-            "'autosuspend_mins' is only supported if 'use_connect' is True."
-        )
-
-    # Add information to results dict
-    def _update_results(results: Dict):
-        if "last_update" in results:
-            results["last_update_diff"] = time.time() - results["last_update"]
-        if smoke_test:
-            results["smoke_test"] = True
-
-    def _process_finished_command(
-        session_controller: SessionController,
-        scd_id: str,
-        results: Optional[Dict] = None,
-        runtime: int = None,
-        commit_url: str = None,
-        session_url: str = None,
-    ):
-        logger.info("Command finished successfully.")
-        if results_json:
-            results = results or get_remote_json_content(
-                temp_dir=temp_dir,
-                session_name=session_name,
-                remote_file=results_json,
-                session_controller=session_controller,
-            )
-        else:
-            results = {"passed": 1}
-
-        _update_results(results)
-
-        if scd_id:
-            try:
-                logs = get_command_logs(
-                    session_controller, scd_id, test_config.get("log_lines", 50)
-                )
-            except Exception as e:
-                raise ReleaseTestInfraError(
-                    f"Could not fetch command logs: {e}. This is an "
-                    f"infrastructure error on the Anyscale side."
-                )
-        else:
-            logs = "No command found to fetch logs for"
-
-        if upload_artifacts:
-            saved_artifacts = pull_artifacts_and_store_in_cloud(
-                temp_dir=temp_dir,
-                logs=logs,  # Also save logs in cloud
-                session_name=session_name,
-                test_name=test_name,
-                artifacts=test_config.get("artifacts", {}),
-                session_controller=session_controller,
-            )
-
-            logger.info("Fetched results and stored on the cloud. Returning.")
-        else:
-            saved_artifacts = {}
-            logger.info(
-                "Usually I would have fetched the results and "
-                "artifacts and stored them on S3."
-            )
-
-        # Add these metadata here to avoid changing SQL schema.
-        results["_runtime"] = runtime
-        results["_session_url"] = session_url
-        results["_commit_url"] = commit_url
-        results["_stable"] = test_config.get("stable", True)
-        result_queue.put(
-            State(
-                "END",
-                time.time(),
-                {
-                    "status": "finished",
-                    "last_logs": logs,
-                    "results": results,
-                    "artifacts": saved_artifacts,
-                },
-            )
-        )
-
-    # When running the test script in client mode, the finish command is a
-    # completed local process.
-    def _process_finished_client_command(returncode: int, logs: str):
-        if returncode != 0:
-            raise RuntimeError(f"Client returned non-success status: {returncode}")
-        if upload_artifacts:
-            saved_artifacts = pull_artifacts_and_store_in_cloud(
-                temp_dir=temp_dir,
-                logs=logs,  # Also save logs in cloud
-                session_name=session_name,
-                test_name=test_name,
-                artifacts=None,
-                session_controller=None,
-            )
-            logger.info("Stored results on the cloud. Returning.")
-        else:
-            saved_artifacts = {}
-            logger.info(
-                "Usually I would have fetched the results and "
-                "artifacts and stored them on S3."
-            )
-
-        if results_json:
-            results = get_local_json_content(
-                local_file=results_json,
-            )
-        else:
-            results = {
-                "passed": int(returncode == 0),
-            }
-
-        results["returncode"] = returncode
-
-        _update_results(results)
-
-        result_queue.put(
-            State(
-                "END",
-                time.time(),
-                {
-                    "status": "finished",
-                    "last_logs": logs,
-                    "results": results,
-                    "artifacts": saved_artifacts,
-                },
-            )
-        )
-
-    def _run(logger):
-        # These values will be set as the test runs.
-        session_url = None
-        runtime = None
-        anyscale.conf.CLI_TOKEN = GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"]
-        test_uses_ray_connect = test_config["run"].get("use_connect")
-
-        session_id = None
-        scd_id = None
-        try:
-            # First, look for running sessions
-            session_id = search_running_session(sdk, project_id, session_name)
-            compute_tpl_name = None
-            app_config_id = app_config_id_override
-            app_config_name = None
-            build_id = build_id_override
-            if not session_id:
-                logger.info("No session found.")
-                # Start session
-                session_options = dict(name=session_name, project_id=project_id)
-
-                if cluster_config is not None:
-                    logging.info("Starting session with cluster config")
-                    cluster_config_str = json.dumps(cluster_config)
-                    session_options["cluster_config"] = cluster_config_str
-                    session_options["cloud_id"] = cloud_id
-                    session_options["uses_app_config"] = False
-                else:
-                    logging.info("Starting session with app/compute config")
-
-                    # Find/create compute template
-                    compute_tpl_id, compute_tpl_name = create_or_find_compute_template(
-                        sdk, project_id, compute_tpl
-                    )
-
-                    url = _format_link(anyscale_compute_tpl_url(compute_tpl_id))
-
-                    logger.info(f"Link to compute template: {url}")
-
-                    # Find/create app config
-                    if app_config_id is None:
-                        (
-                            app_config_id,
-                            app_config_name,
-                        ) = create_or_find_app_config(sdk, project_id, app_config)
-                    else:
-                        logger.info(f"Using override app config {app_config_id}")
-                        app_config_name = sdk.get_app_config(app_config_id).result.name
-                    if build_id is None:
-                        # We might have already retrieved the build ID when
-                        # installing app config packages locally if using
-                        # connect, so only get the build ID if it's not set.
-                        build_id = wait_for_build_or_raise(sdk, app_config_id)
-
-                    session_options["compute_template_id"] = compute_tpl_id
-                    session_options["build_id"] = build_id
-                    session_options["uses_app_config"] = True
-
-                # Start session
-                session_id = create_and_wait_for_session(
-                    sdk=sdk,
-                    stop_event=stop_event,
-                    session_name=session_name,
-                    session_options=session_options,
-                    project_id=project_id,
-                )
-
-            prepare_command = test_config["run"].get("prepare")
-
-            # Write test state json
-            test_state_file = os.path.join(local_dir, "test_state.json")
-            with open(test_state_file, "wt") as f:
-                json.dump({"start_time": time.time(), "test_name": test_name}, f)
-
-            on_k8s = test_config["cluster"].get("compute_on_k8s")
-            if prepare_command or not test_uses_ray_connect:
-                if test_uses_ray_connect:
-                    logger.info(
-                        "Found a prepare command, so pushing it " "to the session."
-                    )
-                # Rsync up
-                logger.info("Syncing files to session...")
-                session_controller.push(
-                    session_name=session_name,
-                    source=None,
-                    target=None,
-                    config=None,
-                    all_nodes=False,
-                )
-
-                logger.info("Syncing test state to session...")
-                session_controller.push(
-                    session_name=session_name,
-                    source=test_state_file,
-                    target=state_json,
-                    config=None,
-                    all_nodes=False,
-                )
-
-                session_url = anyscale_session_url(
-                    project_id=GLOBAL_CONFIG["ANYSCALE_PROJECT"], session_id=session_id
-                )
-                _check_stop(stop_event, "file_sync")
-
-                # Optionally run preparation command
-                if prepare_command:
-                    logger.info(f"Running preparation command: {prepare_command}")
-                    if on_k8s:
-                        cid = global_command_runner.run_command(
-                            session_name, prepare_command, env_vars
-                        )
-                        status_code, _ = global_command_runner.wait_command(cid)
-                        if status_code != 0:
-                            raise PrepareCommandRuntimeError()
-                    else:
-                        scd_id, result = run_session_command(
-                            sdk=sdk,
-                            session_id=session_id,
-                            cmd_to_run=prepare_command,
-                            result_queue=result_queue,
-                            env_vars=env_vars,
-                            state_str="CMD_PREPARE",
-                        )
-                        _, _ = wait_for_session_command_to_complete(
-                            result,
-                            sdk=sdk,
-                            scd_id=scd_id,
-                            stop_event=stop_event,
-                            state_str="CMD_PREPARE",
-                        )
-
-            if test_uses_ray_connect:
-                script_args = test_config["run"].get("args", [])
-                if smoke_test:
-                    script_args += ["--smoke-test"]
-                min_workers = 0
-                for node_type in compute_tpl["worker_node_types"]:
-                    min_workers += node_type["min_workers"]
-                # Build completed, use job timeout
-                result_queue.put(State("CMD_RUN", time.time(), None))
-                returncode, logs = run_job(
-                    cluster_name=session_name,
-                    compute_tpl_name=compute_tpl_name,
-                    cluster_env_name=app_config_name,
-                    job_name=session_name,
-                    min_workers=min_workers,
-                    script=test_config["run"]["script"],
-                    script_args=script_args,
-                    env_vars=env_vars,
-                    autosuspend=autosuspend_mins,
-                )
-                _process_finished_client_command(returncode, logs)
-                return
-
-            # Run release test command
-            cmd_to_run = test_config["run"]["script"] + " "
-
-            args = test_config["run"].get("args", [])
-            if args:
-                cmd_to_run += " ".join(args) + " "
-
-            if smoke_test:
-                cmd_to_run += " --smoke-test"
-
-            if on_k8s:
-                cmd_id = global_command_runner.run_command(
-                    session_name, cmd_to_run, env_vars=env_vars
-                )
-            else:
-                scd_id, result = run_session_command(
-                    sdk=sdk,
-                    session_id=session_id,
-                    cmd_to_run=cmd_to_run,
-                    result_queue=result_queue,
-                    env_vars=env_vars,
-                    state_str="CMD_RUN",
-                )
-
-            if not kick_off_only:
-                if on_k8s:
-                    retcode, runtime = global_command_runner.wait_command(cmd_id)
-                    if retcode != 0:
-                        raise RuntimeError("Command errored")
-                    _process_finished_command(
-                        session_controller=session_controller,
-                        scd_id="",
-                        runtime=runtime,
-                        session_url=session_url,
-                        commit_url=commit_url,
-                    )
-                else:
-                    _, runtime = wait_for_session_command_to_complete(
-                        result,
-                        sdk=sdk,
-                        scd_id=scd_id,
-                        stop_event=stop_event,
-                        state_str="CMD_RUN",
-                    )
-                    _process_finished_command(
-                        session_controller=session_controller,
-                        scd_id=scd_id,
-                        runtime=runtime,
-                        session_url=session_url,
-                        commit_url=commit_url,
-                    )
-            else:
-                result_queue.put(
-                    State("END", time.time(), {"status": "kickoff", "last_logs": ""})
-                )
-
-        except (ReleaseTestTimeoutError, Exception) as e:
-            logger.error(e, exc_info=True)
-
-            logs = str(e)
-            if scd_id is not None:
-                try:
-                    logs = (
-                        logs
-                        + "; Command logs:"
-                        + get_command_logs(
-                            session_controller, scd_id, test_config.get("log_lines", 50)
-                        )
-                    )
-                except Exception as e2:
-                    logger.error(e2, exc_info=True)
-
-            # Long running tests are "finished" successfully when
-            # timed out
-            if isinstance(e, ReleaseTestTimeoutError) and is_long_running:
-                _process_finished_command(
-                    session_controller=session_controller, scd_id=scd_id
-                )
-            else:
-                runtime = None
-                if isinstance(e, CommandTimeoutError):
-                    error_type = "timeout"
-                    runtime = 0
-                    exit_code = ExitCode.COMMAND_TIMEOUT
-                elif isinstance(e, PrepareCommandTimeoutError):
-                    error_type = "infra_timeout"
-                    runtime = None
-                    exit_code = ExitCode.PREPARE_TIMEOUT
-                elif isinstance(e, FileSyncTimeoutError):
-                    error_type = "infra_timeout"
-                    runtime = None
-                    exit_code = ExitCode.FILESYNC_TIMEOUT
-                elif isinstance(e, SessionTimeoutError):
-                    error_type = "infra_timeout"
-                    runtime = None
-                    exit_code = ExitCode.SESSION_TIMEOUT
-                elif isinstance(e, PrepareCommandRuntimeError):
-                    error_type = "infra_timeout"
-                    runtime = None
-                    exit_code = ExitCode.PREPARE_ERROR
-                elif isinstance(e, AppConfigBuildFailure):
-                    error_type = "infra_timeout"
-                    runtime = None
-                    exit_code = ExitCode.APPCONFIG_BUILD_ERROR
-                elif isinstance(e, ReleaseTestInfraError):
-                    error_type = "infra_error"
-                    exit_code = ExitCode.INFRA_ERROR
-                elif isinstance(e, RuntimeError):
-                    error_type = "runtime_error"
-                    runtime = 0
-                    exit_code = ExitCode.RUNTIME_ERROR
-                else:
-                    error_type = "unknown timeout"
-                    runtime = None
-                    exit_code = ExitCode.UNKNOWN
-
-                # Add these metadata here to avoid changing SQL schema.
-                results = {}
-                results["_runtime"] = runtime
-                results["_session_url"] = session_url
-                results["_commit_url"] = commit_url
-                results["_stable"] = test_config.get("stable", True)
-                result_queue.put(
-                    State(
-                        "END",
-                        time.time(),
-                        {
-                            "status": error_type,
-                            "last_logs": logs,
-                            "results": results,
-                            "exit_code": exit_code.value,
-                        },
-                    )
-                )
-        finally:
-            if no_terminate:
-                logger.warning(
-                    "`no_terminate` is set to True, so the session will "
-                    "*not* be terminated!"
-                )
-            else:
-                _cleanup_session(sdk, session_id)
-
-    def _check_progress(logger):
-        anyscale.conf.CLI_TOKEN = GLOBAL_CONFIG["ANYSCALE_CLI_TOKEN"]
-
-        should_terminate = False
-        session_id = None
-        scd_id = None
-        try:
-            existing_session = find_session_by_test_name(
-                sdk=sdk,
-                session_controller=session_controller,
-                temp_dir=temp_dir,
-                state_json=state_json,
-                project_id=project_id,
-                test_name=test_name,
-            )
-
-            if existing_session is None:
-                logger.info(f"Found no existing session for {test_name}")
-                result_queue.put(
-                    State("END", time.time(), {"status": "nosession", "last_logs": ""})
-                )
-                return
-
-            session_id, session_name, session_state = existing_session
-
-            logger.info(f"Found existing session for {test_name}: " f"{session_name}")
-
-            scd_id, success = get_latest_running_command_id(
-                sdk=sdk, session_id=session_id
-            )
-
-            latest_result = get_remote_json_content(
-                temp_dir=temp_dir,
-                session_name=session_name,
-                remote_file=results_json,
-                session_controller=session_controller,
-            )
-
-            # Fetch result json and check if it has been updated recently
-            result_time_key = test_config["run"].get("time_key", None)
-            maximum_update_delay = test_config["run"].get("max_update_delay", None)
-
-            if result_time_key and maximum_update_delay:
-                last_update = latest_result.get(result_time_key, None)
-
-                if not last_update:
-                    result_queue.put(
-                        State(
-                            "END",
-                            time.time(),
-                            {
-                                "status": "error",
-                                "last_logs": f"Test did not store "
-                                f"{result_time_key} in the "
-                                f"results json.",
-                            },
-                        )
-                    )
-                    return
-
-                delay = time.time() - last_update
-                logger.info(
-                    f"Last update was at {last_update:.2f}. "
-                    f"This was {delay:.2f} seconds ago "
-                    f"(maximum allowed: {maximum_update_delay})"
-                )
-
-                if delay > maximum_update_delay:
-                    raise RuntimeError(
-                        f"Test did not update the results json within "
-                        f"the last {maximum_update_delay} seconds."
-                    )
-
-            if time.time() - session_state["start_time"] > timeout:
-                # Long running test reached timeout
-                logger.info(f"Test command reached timeout after {timeout} seconds")
-                _process_finished_command(
-                    session_controller=session_controller,
-                    scd_id=scd_id,
-                    results=latest_result,
-                )
-                should_terminate = True
-
-            elif success:
-                logger.info("All commands finished.")
-                _process_finished_command(
-                    session_controller=session_controller,
-                    scd_id=scd_id,
-                    results=latest_result,
-                )
-                should_terminate = True
-
-            else:
-                rest_time = timeout - time.time() + session_state["start_time"]
-                logger.info(
-                    f"Test command should continue running " f"for {rest_time} seconds"
-                )
-                result_queue.put(
-                    State(
-                        "END",
-                        time.time(),
-                        {"status": "kickoff", "last_logs": "Test is still running"},
-                    )
-                )
-
-        except Exception as e:
-            logger.error(e, exc_info=True)
-
-            logs = str(e)
-            if scd_id is not None:
-                try:
-                    logs = get_command_logs(
-                        session_controller, scd_id, test_config.get("log_lines", 50)
-                    )
-                    logs += f"\n{str(e)}"
-                except Exception as e2:
-                    logger.error(e2, exc_info=True)
-
-            result_queue.put(
-                State("END", time.time(), {"status": "error", "last_logs": logs})
-            )
-            should_terminate = True
-        finally:
-            if should_terminate:
-                logger.warning("Terminating session")
-                _cleanup_session(sdk, session_id)
-
-    if not check_progress:
-        process = multiprocessing.Process(target=_run, args=(logger,))
-    else:
-        process = multiprocessing.Process(target=_check_progress, args=(logger,))
-
-    build_timeout = test_config["run"].get("build_timeout", 1800)
-    prepare_timeout = test_config["run"].get("prepare_timeout", timeout)
-
-    project_url = anyscale_project_url(project_id=GLOBAL_CONFIG["ANYSCALE_PROJECT"])
-    logger.info(f"Link to project: {_format_link(project_url)}")
-
-    msg = f"This will now run test {test_name}."
-    if smoke_test:
-        msg += " This is a smoke test."
-    if is_long_running:
-        msg += " This is a long running test."
-    logger.info(msg)
-
-    logger.info(
-        f"Starting process with timeout {timeout} "
-        f"(prepare timeout {prepare_timeout}, "
-        f"build timeout {build_timeout})"
-    )
-    process.start()
-
-    # The timeout time will be updated after the build finished
-    # Build = App config + compute template build and session start
-    timeout_time = time.time() + build_timeout
-
-    result = {}
-    while process.is_alive():
-        try:
-            state: State = result_queue.get(timeout=1)
-        except (Empty, TimeoutError):
-            if time.time() > timeout_time:
-                stop_event.set()
-                logger.warning("Process timed out.")
-
-                if not is_long_running:
-                    logger.warning("Terminating process in 10 seconds.")
-                    time.sleep(10)
-                    logger.warning("Terminating process now.")
-                    process.terminate()
-                else:
-                    logger.info(
-                        "Process is long running. Give 2 minutes to "
-                        "fetch result and terminate."
-                    )
-                    start_terminate = time.time()
-                    while time.time() < start_terminate + 120 and process.is_alive():
-                        time.sleep(1)
-                    if process.is_alive():
-                        logger.warning("Terminating forcefully now.")
-                        process.terminate()
-                    else:
-                        logger.info("Long running results collected.")
-                break
-            continue
-
-        if not isinstance(state, State):
-            raise RuntimeError(f"Expected `State` object, got {result}")
-
-        if state.state == "CMD_PREPARE":
-            # Reset timeout after build finished
-            timeout_time = state.timestamp + prepare_timeout
-
-        if state.state == "CMD_RUN":
-            # Reset timeout after prepare command or build finished
-            timeout_time = state.timestamp + timeout
-
-        elif state.state == "END":
-            result = state.data
-            break
-
-    while not result_queue.empty():
-        state = result_queue.get_nowait()
-        result = state.data
-
-    logger.info("Final check if everything worked.")
-    try:
-        result.setdefault("status", "error (status not found)")
-    except (TimeoutError, Empty):
-        result = {"status": "timeout", "last_logs": "Test timed out."}
-
-    logger.info(f"Final results: {result}")
-
-    log_results_and_artifacts(result)
-
-    if not keep_results_dir:
-        logger.info(f"Removing results dir {temp_dir}")
-        shutil.rmtree(temp_dir)
-    else:
-        # Write results.json
-        with open(os.path.join(temp_dir, "results.json"), "wt") as fp:
-            json.dump(result, fp)
-
-        out_dir = os.path.expanduser(GLOBAL_CONFIG["RELEASE_RESULTS_DIR"])
-
-        logger.info(
-            f"Moving results dir {temp_dir} to persistent location " f"{out_dir}"
-        )
-
-        try:
-            shutil.rmtree(out_dir)
-        except Exception:
-            logger.exception(
-                f"Ran into error when clearing the destination dir: {out_dir}"
-            )
-
-        try:
-            # Use distutils.dir_util.copy_tree() instead of shutil.cptree(),
-            # which allows existing output directory.
-            from distutils.dir_util import copy_tree
-
-            copy_tree(temp_dir, out_dir)
-        except Exception:
-            logger.exception(
-                "Ran into error when copying results dir to persistent "
-                f"location: {out_dir}"
-            )
-
-        logger.info(f"Dir contents: {os.listdir(out_dir)}")
-
-    return result
-
-
-def run_test(
-    test_config_file: str,
-    test_name: str,
-    project_id: str,
-    commit_url: str,
-    category: str = "unspecified",
-    smoke_test: bool = False,
-    no_terminate: bool = False,
-    kick_off_only: bool = False,
-    check_progress: bool = False,
-    report: bool = True,
-    keep_results_dir: bool = False,
-    session_name: Optional[str] = None,
-    app_config_id_override=None,
-) -> Dict[str, Any]:
-    with open(test_config_file, "rt") as f:
-        test_configs = yaml.safe_load(f)
-
-    test_config_dict = {}
-    for test_config in test_configs:
-        name = test_config.pop("name")
-        test_config_dict[name] = test_config
-
-    if test_name not in test_config_dict:
-        raise ValueError(
-            f"Test with name `{test_name}` not found in test config file "
-            f"at `{test_config_file}`."
-        )
-
-    test_config = test_config_dict[test_name]
-
-    if smoke_test and "smoke_test" in test_config:
-        smoke_test_config = test_config.pop("smoke_test")
-        test_config = _deep_update(test_config, smoke_test_config)
-
-    local_dir = os.path.dirname(test_config_file)
-    if "local_dir" in test_config:
-        # local_dir is relative to test_config_file
-        local_dir = os.path.join(local_dir, test_config["local_dir"])
-
-    if test_config["run"].get("use_connect"):
-        assert not kick_off_only, (
-            "--kick-off-only is unsupported when running with " "Anyscale connect."
-        )
-        assert (
-            not check_progress
-        ), "--check is unsupported when running with Anyscale connect."
-        if test_config.get("artifacts", {}):
-            logger.error(
-                "Saving artifacts are not yet supported when running with "
-                "Anyscale connect."
-            )
-
-    # Perform necessary driver side setup.
-    driver_setup_script = test_config.get("driver_setup", None)
-    if driver_setup_script:
-        run_bash_script(local_dir, driver_setup_script)
-    logger.info(test_config)
-    team = test_config.get("team", "unspecified").strip(" ").lower()
-    # When running local test, this validates the team name.
-    # If the team name is not specified, they will be recorded as "unspecified"
-    if not report and team not in VALID_TEAMS:
-        logger.warning(
-            f"Incorrect team name {team} has given."
-            "Please specify team under the name field in the test config. "
-            "For example, within nightly_tests.yaml,\n"
-            "\tname: test_xxx\n"
-            f"\tteam: {'|'.join(VALID_TEAMS)}\n"
-            "\tcluster:..."
-        )
-
-    result = run_test_config(
-        local_dir,
-        project_id,
-        test_name,
-        test_config,
-        commit_url,
-        session_name=session_name,
-        smoke_test=smoke_test,
-        no_terminate=no_terminate,
-        kick_off_only=kick_off_only,
-        check_progress=check_progress,
-        upload_artifacts=report,
-        keep_results_dir=keep_results_dir,
-        app_config_id_override=app_config_id_override,
-    )
-
-    status = result.get("status", "invalid")
-
-    if kick_off_only:
-        if status != "kickoff":
-            raise RuntimeError("Error kicking off test.")
-
-        logger.info(
-            "Kicked off test. It's now up to the `--check` "
-            "part of the script to track its process."
-        )
-        return {}
-    else:
-        # `--check` or no kick off only
-
-        if status == "nosession":
-            logger.info(
-                f"No running session found for test {test_name}, so "
-                f"assuming everything is fine."
-            )
-            return {}
-
-        if status == "kickoff":
-            logger.info(f"Test {test_name} is still running.")
-            return {}
-
-        last_logs = result.get("last_logs", "No logs.")
-
-        test_suite = os.path.basename(test_config_file).replace(".yaml", "")
-
-        report_kwargs = dict(
-            test_suite=test_suite,
-            test_name=test_name,
-            status=status,
-            last_logs=last_logs,
-            results=result.get("results", {}),
-            artifacts=result.get("artifacts", {}),
-            category=category,
-            team=team,
-        )
-
-        if not has_errored(result):
-            # Check if result are met if test succeeded
-            alert = maybe_get_alert_for_result(report_kwargs)
-
-            if alert:
-                # If we get an alert, the test failed.
-                logger.error(
-                    f"Alert has been raised for "
-                    f"{test_suite}/{test_name} "
-                    f"({category}): {alert}"
-                )
-                result["status"] = "error (alert raised)"
-                report_kwargs["status"] = "error (alert raised)"
-
-                # For printing/reporting to the database
-                report_kwargs["last_logs"] = alert
-                last_logs = alert
-            else:
-                logger.info(
-                    f"No alert raised for test "
-                    f"{test_suite}/{test_name} "
-                    f"({category}) - the test successfully passed!"
-                )
-
-        if report:
-            try:
-                report_result(**report_kwargs)
-            except Exception as e:
-                # On database error the test should still pass
-                # Todo: flag somewhere else?
-                logger.exception(f"Error persisting results to database: {e}")
-        else:
-            logger.info(
-                f"Usually I would now report the following results:\n"
-                f"{report_kwargs}"
-            )
-
-        if has_errored(result):
-            # If the script terminates due to an uncaught error, it
-            # will return exit code 1, so we use 2 per default to
-            # catch these cases.
-            exit_code = result.get("exit_code", ExitCode.UNSPECIFIED.value)
-            logger.error(last_logs)
-            logger.info(f"Exiting with exit code {exit_code}")
-            sys.exit(exit_code)
-
-        return report_kwargs
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
-    )
-    parser.add_argument(
-        "--test-config", type=str, required=True, help="Test config file"
-    )
-    parser.add_argument("--test-name", type=str, help="Test name in config")
-    parser.add_argument(
-        "--ray-wheels", required=False, type=str, help="URL to ray wheels"
-    )
-    parser.add_argument(
-        "--no-terminate",
-        action="store_true",
-        default=False,
-        help="Don't terminate session after failure",
-    )
-    parser.add_argument(
-        "--report",
-        action="store_true",
-        default=False,
-        help="Whether to report results and upload to S3",
-    )
-    parser.add_argument(
-        "--kick-off-only",
-        action="store_true",
-        default=False,
-        help="Kick off only (don't wait for command to finish)",
-    )
-    parser.add_argument(
-        "--check",
-        action="store_true",
-        default=False,
-        help="Check (long running) status",
-    )
-    parser.add_argument(
-        "--keep-results-dir",
-        action="store_true",
-        default=False,
-        help="Keep results in directory (named RELEASE_RESULTS_DIR), e.g. "
-        "for Buildkite artifact upload.",
-    )
-    parser.add_argument(
-        "--category",
-        type=str,
-        default="unspecified",
-        help="Category name, e.g. `release-1.3.0` (will be saved in database)",
-    )
-    parser.add_argument(
-        "--smoke-test", action="store_true", help="Finish quickly for testing"
-    )
-    parser.add_argument(
-        "--session-name",
-        required=False,
-        type=str,
-        help="Name of the session to run this test.",
-    )
-    parser.add_argument(
-        "--app-config-id-override",
-        required=False,
-        type=str,
-        help=("An app config ID, which will override the test config app " "config."),
-    )
-    args, _ = parser.parse_known_args()
-
-    if not GLOBAL_CONFIG["ANYSCALE_PROJECT"]:
-        raise RuntimeError("You have to set the ANYSCALE_PROJECT environment variable!")
-
-    ray_wheels = args.ray_wheels or os.environ.get("RAY_WHEELS", "")
-
-    maybe_fetch_api_token()
-    if ray_wheels:
-        logger.info(f"Using Ray wheels provided from URL/commit: " f"{ray_wheels}")
-        url = commit_or_url(str(ray_wheels))
-        logger.info(f"Resolved url link is: {url}")
-        # Overwrite with actual URL
-        os.environ["RAY_WHEELS"] = url
-    elif not args.check:
-        url = find_ray_wheels(
-            GLOBAL_CONFIG["RAY_REPO"],
-            GLOBAL_CONFIG["RAY_BRANCH"],
-            GLOBAL_CONFIG["RAY_VERSION"],
-        )
-        if not url:
-            raise RuntimeError(
-                f"Could not find wheels for "
-                f"Ray {GLOBAL_CONFIG['RAY_VERSION']}, "
-                f"branch {GLOBAL_CONFIG['RAY_BRANCH']}"
-            )
-
-    # RAY_COMMIT is set by commit_or_url and find_ray_wheels
-    populate_wheels_sanity_check(os.environ.get("RAY_COMMIT", ""))
-
-    test_config_file = os.path.abspath(os.path.expanduser(args.test_config))
-
-    # Override it from the global variable.
-    report = GLOBAL_CONFIG["REPORT_RESULT"]
-    if report.lower() == "1" or report.lower() == "true":
-        report = True
-    else:
-        report = args.report
-
-    run_test(
-        test_config_file=test_config_file,
-        test_name=args.test_name,
-        project_id=GLOBAL_CONFIG["ANYSCALE_PROJECT"],
-        commit_url=url,
-        category=args.category,
-        smoke_test=args.smoke_test,
-        no_terminate=args.no_terminate or args.kick_off_only,
-        kick_off_only=args.kick_off_only,
-        check_progress=args.check,
-        report=report,
-        session_name=args.session_name,
-        keep_results_dir=args.keep_results_dir,
-        app_config_id_override=args.app_config_id_override,
-    )
diff --git a/release/horovod_tests/horovod_tests.yaml b/release/horovod_tests/horovod_tests.yaml
deleted file mode 100644
index ce0abe719..000000000
--- a/release/horovod_tests/horovod_tests.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-- name: horovod_test
-  team: ml
-  cluster:
-    app_config: app_config_master.yaml
-    compute_template: compute_tpl.yaml
-
-  run:
-    timeout: 36000
-    prepare: python wait_cluster.py 3 600
-    script: python workloads/horovod_tune_test.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 1800
diff --git a/release/horovod_tests/wait_cluster.py b/release/horovod_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/horovod_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/lightgbm_tests/lightgbm_tests.yaml b/release/lightgbm_tests/lightgbm_tests.yaml
deleted file mode 100644
index 07aa9e5cf..000000000
--- a/release/lightgbm_tests/lightgbm_tests.yaml
+++ /dev/null
@@ -1,92 +0,0 @@
-- name: train_small
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/train_small.py
-
-- name: train_moderate
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_moderate.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 32 600
-    script: python workloads/train_moderate.py
-
-- name: train_gpu
-  team: ml
-  cluster:
-    app_config: app_config_gpu.yaml
-    compute_template: tpl_gpu_small.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 5 600
-    script: python workloads/train_gpu.py
-
-- name: distributed_api_test
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-    results: 
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/distributed_api_test.py
-    results: ""
-
-- name: ft_small_non_elastic
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/ft_small_non_elastic.py
-    results: ""
-
-- name: tune_small
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/tune_small.py
-
-- name: tune_32x4
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_moderate.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 32 600
-    script: python workloads/tune_32x4.py
-
-- name: tune_4x32
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_moderate.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 32 600
-    script: python workloads/tune_4x32.py
diff --git a/release/lightgbm_tests/wait_cluster.py b/release/lightgbm_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/lightgbm_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/long_running_distributed_tests/long_running_distributed.yaml b/release/long_running_distributed_tests/long_running_distributed.yaml
deleted file mode 100644
index 189ffd3f9..000000000
--- a/release/long_running_distributed_tests/long_running_distributed.yaml
+++ /dev/null
@@ -1,13 +0,0 @@
-- name: pytorch_pbt_failure
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: compute_tpl.yaml
-
-  run:
-    timeout: 86400
-    script: python workloads/pytorch_pbt_failure.py
-    long_running: True
-
-  smoke_test:
-    timeout: 3600
diff --git a/release/long_running_tests/long_running_tests.yaml b/release/long_running_tests/long_running_tests.yaml
deleted file mode 100644
index 05d4245d0..000000000
--- a/release/long_running_tests/long_running_tests.yaml
+++ /dev/null
@@ -1,196 +0,0 @@
-- name: actor_deaths
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/actor_deaths.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: apex
-  team: ml
-  cluster:
-    app_config: ../rllib_tests/app_config.yaml
-    compute_template: tpl_cpu_3.yaml
-
-  run:
-    timeout: 86400
-    prepare: python wait_cluster.py 3 600
-    script: python workloads/apex.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-
-- name: impala
-  team: ml
-  cluster:
-    app_config: app_config_np.yaml
-    compute_template: tpl_cpu_1_large.yaml
-
-  run:
-    timeout: 86400
-    script: python workloads/impala.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: many_actor_tasks
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/many_actor_tasks.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-
-- name: many_drivers
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/many_drivers.py --iteration-num=4000
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-
-- name: many_ppo
-  team: ml
-  cluster:
-    app_config: ../rllib_tests/app_config.yaml
-    compute_template: many_ppo.yaml
-
-  run:
-    timeout: 86400
-    prepare: python wait_cluster.py 1 600
-    script: python workloads/many_ppo.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: many_tasks
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/many_tasks.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: many_tasks_serialized_ids
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/many_tasks_serialized_ids.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-
-- name: node_failures
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/node_failures.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: pbt
-  team: ml
-  cluster:
-    app_config: ../rllib_tests/app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/pbt.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: serve
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/serve.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-- name: serve_failure
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_1.yaml
-
-  run:
-    timeout: 86400
-    prepare: ray stop
-    script: python workloads/serve_failure.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 600
-
-  stable: False
diff --git a/release/long_running_tests/wait_cluster.py b/release/long_running_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/long_running_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/microbenchmark/microbenchmark.yaml b/release/microbenchmark/microbenchmark.yaml
deleted file mode 100644
index 7b1c6c336..000000000
--- a/release/microbenchmark/microbenchmark.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-# - name: microbenchmark
-#   team: core
-#   cluster:
-#     app_config: app_config.yaml
-#     compute_template: tpl_64.yaml
-
-#   run:
-#     timeout: 1800
-#     script: OMP_NUM_THREADS=64 RAY_ADDRESS= python run_microbenchmark.py
diff --git a/release/ml_user_tests/ml_user_tests.yaml b/release/ml_user_tests/ml_user_tests.yaml
deleted file mode 100644
index 8c6a8162e..000000000
--- a/release/ml_user_tests/ml_user_tests.yaml
+++ /dev/null
@@ -1,124 +0,0 @@
-- name: horovod_user_test_latest
-  team: ml
-  cluster:
-    app_config: horovod/app_config.yaml
-    compute_template: horovod/compute_tpl.yaml
-
-
-  driver_setup: horovod/driver_setup_latest.sh
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 1200
-    script: python horovod/horovod_user_test.py
-
-- name: horovod_user_test_master
-  team: ml
-  cluster:
-    app_config: ../horovod_tests/app_config_master.yaml
-    compute_template: horovod/compute_tpl.yaml
-
-  driver_setup: horovod/driver_setup_master.sh
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 1200
-    script: python horovod/horovod_user_test.py
-
-
-- name: train_tensorflow_mnist_test
-  team: ml
-  cluster:
-      app_config: train/app_config.yaml
-      compute_template: train/compute_tpl.yaml
-
-  driver_setup: train/driver_setup.sh
-
-  run:
-      use_connect: True
-      timeout: 36000
-      script: python train/train_tensorflow_mnist_test.py
-
-- name: train_torch_linear_test
-  team: ml
-  cluster:
-      app_config: train/app_config.yaml
-      compute_template: train/compute_tpl.yaml
-
-  driver_setup: train/driver_setup.sh
-
-  run:
-      use_connect: True
-      timeout: 36000
-      script: python train/train_torch_linear_test.py
-
-
-- name: xgboost_gpu_connect_latest
-  team: ml
-  cluster:
-    app_config: xgboost/app_config_gpu.yaml
-    compute_template: xgboost/tpl_gpu_small_scaling.yaml
-
-  run:
-    use_connect: True
-    timeout: 1200
-    script: python xgboost/train_gpu_connect.py
-
-- name: xgboost_gpu_connect_master
-  team: ml
-  cluster:
-    app_config: xgboost/app_config_gpu_master.yaml
-    compute_template: xgboost/tpl_gpu_small_scaling.yaml
-
-  run:
-    use_connect: True
-    timeout: 1200
-    script: python xgboost/train_gpu_connect.py
-
-- name: ray_lightning_user_test_latest
-  team: ml
-  cluster:
-    app_config: ray-lightning/app_config.yaml
-    compute_template: ray-lightning/compute_tpl.yaml
-
-  driver_setup: ray-lightning/driver_setup.sh
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 1200
-    script: python ray-lightning/ray_lightning_user_test.py
-
-
-- name: ray_lightning_user_test_master
-  team: ml
-  cluster:
-    app_config: ray-lightning/app_config_master.yaml
-    compute_template: ray-lightning/compute_tpl.yaml
-
-
-  driver_setup: ray-lightning/driver_setup.sh
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 1200
-    script: python ray-lightning/ray_lightning_user_test.py
-
-
-- name: tune_rllib_connect_test
-  team: ml
-  cluster:
-    app_config: ../rllib_tests/app_config.yaml
-    compute_template: tune_rllib/compute_tpl.yaml
-
-
-  driver_setup: tune_rllib/driver_setup.sh
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 1200
-    script: python tune_rllib/run_connect_tests.py
\ No newline at end of file
diff --git a/release/nightly_tests/chaos_test.yaml b/release/nightly_tests/chaos_test.yaml
deleted file mode 100644
index f24cdcf16..000000000
--- a/release/nightly_tests/chaos_test.yaml
+++ /dev/null
@@ -1,64 +0,0 @@
-#
-# Chaos tests.
-#
-
-# Run the test that invokes many tasks without object store usage.
-- name: chaos_many_tasks_no_object_store
-  team: core
-  cluster:
-    app_config: chaos_test/app_config.yaml
-    compute_template: chaos_test/compute_template.yaml
-
-  run:
-    timeout: 3600
-    prepare: python wait_cluster.py 10 600; python setup_chaos.py --no-start
-    script: python chaos_test/test_chaos_basic.py --workload=tasks
-
-- name: chaos_many_actors
-  team: core
-  cluster:
-    app_config: chaos_test/app_config.yaml
-    compute_template: chaos_test/compute_template.yaml
-
-  run:
-    timeout: 3600
-    prepare: python wait_cluster.py 10 600; python setup_chaos.py --no-start
-    script: python chaos_test/test_chaos_basic.py --workload=actors
-
-- name: chaos_dask_on_ray_large_scale_test_no_spilling
-  team: core
-  cluster:
-    app_config: chaos_test/dask_on_ray_app_config_reconstruction.yaml
-    compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
-
-  run:
-    timeout: 7200
-    # Total run time without failures is about 300-400s.
-    prepare: python wait_cluster.py 21 600; python setup_chaos.py --node-kill-interval 100
-    script: python dask_on_ray/large_scale_test.py --num_workers 20 --worker_obj_store_size_in_gb 20 --error_rate 0  --data_save_path /tmp/ray
-
-# Test large scale dask on ray test with spilling.
-- name: chaos_dask_on_ray_large_scale_test_spilling
-  team: core
-  cluster:
-    app_config: chaos_test/dask_on_ray_app_config_reconstruction.yaml
-    compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
-
-  run:
-    timeout: 7200
-    # Total run time without failures is about 300-400s.
-    prepare: python wait_cluster.py 21 600; python setup_chaos.py --node-kill-interval 100
-    script: python dask_on_ray/large_scale_test.py --num_workers 150 --worker_obj_store_size_in_gb 70 --error_rate 0  --data_save_path /tmp/ray
-
-- name: chaos_pipelined_ingestion_1500_gb_15_windows
-  team: core
-  cluster:
-    app_config: dataset/pipelined_ingestion_app.yaml
-    compute_template: dataset/pipelined_ingestion_compute.yaml
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 21 2400;  python setup_chaos.py --node-kill-interval 300
-    script: python dataset/pipelined_training.py --epochs 1 --num-windows 15  --num-files 915 --debug
-
-  stable: false
diff --git a/release/nightly_tests/dataset/dataset_test.yaml b/release/nightly_tests/dataset/dataset_test.yaml
deleted file mode 100644
index 8ac02a36a..000000000
--- a/release/nightly_tests/dataset/dataset_test.yaml
+++ /dev/null
@@ -1,95 +0,0 @@
-- name: inference
-  team: core
-  cluster:
-    app_config: app_config.yaml
-    compute_template: inference.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 2 600
-    script: python inference.py
-  
-- name: shuffle_data_loader
-  team: core
-  cluster:
-    app_config: shuffle_app_config.yaml
-    compute_template: shuffle_compute.yaml
-
-  run:
-    timeout: 1800
-    script: python dataset_shuffle_data_loader.py
-
-- name: parquet_metadata_resolution
-  team: core
-  cluster:
-    app_config: pipelined_training_app.yaml
-    compute_template: pipelined_training_compute.yaml
-
-  run:
-    timeout: 1200
-    prepare: python wait_cluster.py 15 1200
-    script: python parquet_metadata_resolution.py --num-files 915
-
-- name: pipelined_training_50_gb
-  team: core
-  cluster:
-    app_config: pipelined_training_app.yaml
-    compute_template: pipelined_training_compute.yaml
-
-  run:
-    timeout: 4800
-    prepare: python wait_cluster.py 15 1200
-    script: python pipelined_training.py --epochs 1
-
-- name: pipelined_ingestion_1500_gb
-  team: core
-  cluster:
-    app_config: pipelined_ingestion_app.yaml
-    compute_template: pipelined_ingestion_compute.yaml
-
-  run:
-    timeout: 9600
-    prepare: python wait_cluster.py 21 2400
-    script: python pipelined_training.py --epochs 2 --num-windows 2 --num-files 915 --debug
-
-- name: datasets_ingest_train_infer
-  team: core
-  cluster:
-    app_config: ray_sgd_training_app.yaml
-    compute_template: ray_sgd_training_compute.yaml
-
-  run:
-    timeout: 14400
-    prepare: python wait_cluster.py 66 2400
-    script: python ray_sgd_training.py --address auto --use-s3 --num-workers 16 --use-gpu --large-dataset
-
-  smoke_test:
-    cluster:
-      app_config: ray_sgd_training_app.yaml
-      compute_template: ray_sgd_training_smoke_compute.yaml
-
-    run:
-      timeout: 3600
-      prepare: python wait_cluster.py 8 2400
-      script: python ray_sgd_training.py --address auto --use-s3 --num-workers 8 --use-gpu
-
-- name: datasets_preprocess_ingest
-  team: core
-  cluster:
-    app_config: ray_sgd_training_app.yaml
-    compute_template: ray_sgd_training_compute_no_gpu.yaml
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 21 2400
-    script: python ray_sgd_training.py --address auto --use-s3 --num-workers 16 --use-gpu --large-dataset --debug
-
-- name: datasets_ingest_400G
-  team: core
-  cluster:
-    app_config: ray_sgd_training_app.yaml
-    compute_template: dataset_ingest_400G_compute.yaml
-
-  run:
-    timeout: 7200
-    script: python ray_sgd_runner.py --address auto --use-gpu --num-epochs 1
diff --git a/release/nightly_tests/dataset/wait_cluster.py b/release/nightly_tests/dataset/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/nightly_tests/dataset/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/nightly_tests/nightly_tests.yaml b/release/nightly_tests/nightly_tests.yaml
deleted file mode 100644
index 2d0f90b94..000000000
--- a/release/nightly_tests/nightly_tests.yaml
+++ /dev/null
@@ -1,390 +0,0 @@
-#
-# Single node shuffle
-#
-# Test basic single node 10GB shuffle with a small number of partitions.
-# This doesn't require object spilling.
-# - name: shuffle_10gb
-#   team: core
-#   cluster:
-#     app_config: shuffle/shuffle_app_config.yaml
-#     compute_template: shuffle/shuffle_compute_single.yaml
-
-#   run:
-#     timeout: 3000
-#     script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=200e6
-
-# Test single node 50GB shuffle with a large number of partitions.
-- name: shuffle_50gb
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_single.yaml
-
-  run:
-    timeout: 3000
-    script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=1e9
-
-# Test single node 50GB shuffle with a large number of partitions.
-- name: shuffle_50gb_large_partition
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_single.yaml
-
-  run:
-    timeout: 3000
-    script: python shuffle/shuffle_test.py --num-partitions=500 --partition-size=100e6
-
-# Test non streaming shuffle in a single node with a small number of partition.
-- name: non_streaming_shuffle_50gb
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_single.yaml
-
-  run:
-    timeout: 3000
-    script: python shuffle/shuffle_test.py --num-partitions=50 --partition-size=1e9 --no-streaming
-
-# Test non streaming shuffle in a single node with a large number of partition.
-- name: non_streaming_shuffle_50gb_large_partition
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_single.yaml
-
-  run:
-    timeout: 3000
-    script: python shuffle/shuffle_test.py --num-partitions=500 --partition-size=100e6 --no-streaming
-
-- name: dask_on_ray_10gb_sort
-  team: core
-  cluster:
-    app_config: dask_on_ray/dask_on_ray_app_config.yaml
-    compute_template: dask_on_ray/dask_on_ray_sort_compute_template.yaml
-
-  run:
-    timeout: 7200
-    script: python dask_on_ray/dask_on_ray_sort.py --nbytes 10_000_000_000 --npartitions 50 --num-nodes 1 --ray --data-dir /tmp/ray --file-path /tmp/ray
-
-- name: dask_on_ray_100gb_sort
-  team: core
-  cluster:
-    app_config: dask_on_ray/dask_on_ray_app_config.yaml
-    compute_template: dask_on_ray/dask_on_ray_sort_compute_template.yaml
-
-  run:
-    timeout: 7200
-    script: python dask_on_ray/dask_on_ray_sort.py --nbytes 100_000_000_000 --npartitions 200 --num-nodes 1 --ray --data-dir /tmp/ray --file-path /tmp/ray
-
-#
-# Multi node shuffle
-#
-
-# Test multi nodes 100GB shuffle with a small number of partitions.
-- name: shuffle_100gb
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_multi.yaml
-
-  run:
-    timeout: 3000
-    prepare: python wait_cluster.py 4 600
-    script: python shuffle/shuffle_test.py --num-partitions=200 --partition-size=500e6
-
-# Test non streaming multi nodes 100GB shuffle with a small number of partitions.
-- name: non_streaming_shuffle_100gb
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_multi.yaml
-
-  run:
-    timeout: 3000
-    prepare: python wait_cluster.py 4 600
-    script: python shuffle/shuffle_test.py --num-partitions=200 --partition-size=500e6 --no-streaming
-
-# Test autoscaling 1TB streaming shuffle with a large number of partitions.
-- name: autoscaling_shuffle_1tb_1000_partitions
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_autoscaling.yaml
-
-  run:
-    timeout: 4000
-    script: python shuffle/shuffle_test.py --num-partitions=1000 --partition-size=1e9 --no-streaming
-
-# Test multi nodes 1TB streaming shuffle with a large number of partitions.
-- name: shuffle_1tb_1000_partition
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_large_scale.yaml
-
-  run:
-    timeout: 3000
-    prepare: python wait_cluster.py 20 900
-    script: python shuffle/shuffle_test.py --num-partitions=1000 --partition-size=1e9
-
-# Test multi nodes 1TB non streaming shuffle with a large number of partitions.
-- name: non_streaming_shuffle_1tb_1000_partition
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_large_scale.yaml
-
-  run:
-    timeout: 3000
-    prepare: python wait_cluster.py 20 900
-    script: python shuffle/shuffle_test.py --num-partitions=1000 --partition-size=1e9 --no-streaming
-
-# Stress test for 1TB multi node streaming shuffle.
-- name: shuffle_1tb_5000_partitions
-  team: core
-  cluster:
-    app_config: shuffle/shuffle_app_config.yaml
-    compute_template: shuffle/shuffle_compute_large_scale.yaml
-
-  run:
-    timeout: 9000
-    prepare: python wait_cluster.py 20 900
-    script: python shuffle/shuffle_test.py --num-partitions=5000 --partition-size=200e6
-
-# Stress test for 1TB multi node non-streaming shuffle.
-# - name: non_streaming_shuffle_1tb_5000_partitions
-#   team: core
-#   stable: False
-#   cluster:
-#     app_config: shuffle/shuffle_app_config.yaml
-#     compute_template: shuffle/shuffle_compute_large_scale.yaml
-
-#   run:
-#     timeout: 7200
-#     prepare: python wait_cluster.py 20 900
-#     script: python shuffle/shuffle_test.py --num-partitions=5000 --partition-size=200e6 --no-streaming
-
-- name: k8s_dask_on_ray_large_scale_test_no_spilling
-  team: core
-  cluster:
-    app_config: dask_on_ray/large_scale_dask_on_ray_app_config.yaml
-    compute_template: dask_on_ray/dask_on_ray_stress_compute_k8s.yaml
-    compute_on_k8s: True
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 21 600
-    script: python dask_on_ray/large_scale_test.py --num_workers 20 --worker_obj_store_size_in_gb 20 --error_rate 0  --data_save_path /tmp/ray
-  stable: false
-
-# # Test large scale dask on ray test without spilling.
-# - name: dask_on_ray_large_scale_test_no_spilling
-#   team: core
-#   cluster:
-#     app_config: dask_on_ray/large_scale_dask_on_ray_app_config.yaml
-#     compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
-
-#   run:
-#     timeout: 7200
-#     prepare: python wait_cluster.py 21 600
-#     script: python dask_on_ray/large_scale_test.py --num_workers 20 --worker_obj_store_size_in_gb 20 --error_rate 0  --data_save_path /tmp/ray
-
-#   smoke_test:
-#     cluster:
-#       app_config: dask_on_ray/large_scale_dask_on_ray_app_config.yaml
-#       compute_template: dask_on_ray/large_scale_dask_on_ray_compute_template.yaml
-
-#     run:
-#       timeout: 7200
-#       prepare: python wait_cluster.py 5 600
-#       script: python dask_on_ray/large_scale_test.py --num_workers 4 --worker_obj_store_size_in_gb 20 --error_rate 0  --data_save_path /tmp/ray
-
-# Test large scale dask on ray test with spilling.
-- name: dask_on_ray_large_scale_test_spilling
-  team: core
-  cluster:
-    app_config: dask_on_ray/large_scale_dask_on_ray_app_config.yaml
-    compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 21 600
-    script: python dask_on_ray/large_scale_test.py --num_workers 150 --worker_obj_store_size_in_gb 70 --error_rate 0  --data_save_path /tmp/ray
-
-  smoke_test:
-    cluster:
-      app_config: dask_on_ray/large_scale_dask_on_ray_app_config.yaml
-      compute_template: dask_on_ray/large_scale_dask_on_ray_compute_template.yaml
-
-    run:
-      timeout: 7200
-      prepare: python wait_cluster.py 5 600
-      script: python dask_on_ray/large_scale_test.py --num_workers 32 --worker_obj_store_size_in_gb 70 --error_rate 0  --data_save_path /tmp/ray
-
-# Stress tests with many tasks
-- name: stress_test_many_tasks
-  team: core
-  cluster:
-    app_config: stress_tests/stress_tests_app_config.yaml
-    compute_template: stress_tests/stress_tests_compute.yaml
-
-  run:
-    timeout: 7200
-    script: python stress_tests/test_many_tasks.py
-
-  smoke_test:
-    cluster:
-      app_config: stress_tests/stress_tests_app_config.yaml
-      compute_template: stress_tests/smoke_test_compute.yaml
-
-    run:
-      timeout: 3600
-      script: python stress_tests/test_many_tasks.py --num-nodes=4 --smoke-test
-
-# Stress tests with dead actors
-- name: stress_test_dead_actors
-  team: core
-  cluster:
-    app_config: stress_tests/stress_tests_app_config.yaml
-    compute_template: stress_tests/stress_tests_compute.yaml
-
-  run:
-    timeout: 7200
-    script: python stress_tests/test_dead_actors.py
-
-  smoke_test:
-    cluster:
-      app_config: stress_tests/stress_tests_app_config.yaml
-      compute_template: stress_tests/smoke_test_compute.yaml
-
-    run:
-      timeout: 3600
-      script: python stress_tests/test_dead_actors.py --num-nodes=4 --num-parents=3 --num-children=3
-
-# Stress tests with placement groups
-- name: stress_test_placement_group
-  team: core
-  cluster:
-    app_config: stress_tests/stress_tests_app_config.yaml
-    compute_template: stress_tests/placement_group_tests_compute.yaml
-
-  run:
-    timeout: 7200
-    script: python stress_tests/test_placement_group.py
-
-# Stress tests with many threaded actors.
-- name: threaded_actors_stress_test
-  team: core
-  cluster:
-    app_config: stress_tests/stress_tests_app_config.yaml
-    compute_template: stress_tests/stress_test_threaded_actor_compute.yaml
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 201 600
-    script: python stress_tests/test_threaded_actors.py --test-runtime 3600 --kill-interval_s 60
-
-  smoke_test:
-    cluster:
-      app_config: stress_tests/stress_tests_app_config.yaml
-      compute_template: stress_tests/smoke_test_compute.yaml
-
-    run:
-      timeout: 3600
-      prepare: python wait_cluster.py 5 600
-      script: python stress_tests/test_threaded_actors.py --test-runtime 1800 --kill-interval_s 30
-  stable: false
-
-- name: k8s_threaded_actors_stress_test
-  team: core
-  cluster:
-    app_config: stress_tests/stress_tests_app_config.yaml
-    compute_template: stress_tests/k8s_stress_test_threaded_actor_compute.yaml
-    compute_on_k8s: True
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 201 600
-    script: python stress_tests/test_threaded_actors.py --test-runtime 3600 --kill-interval_s 60
-
-    run:
-      timeout: 3600
-      prepare: python wait_cluster.py 5 600
-      script: python stress_tests/test_threaded_actors.py --test-runtime 1800 --kill-interval_s 30
-  stable: false
-
-# Test decision tree on autoscaling compute cluster.
-- name: decision_tree_autoscaling
-  team: core
-  cluster:
-    app_config: decision_tree/decision_tree_app_config.yaml
-    compute_template: decision_tree/autoscaling_compute.yaml
-
-  run:
-    timeout: 3000
-    script: python decision_tree/cart_with_tree.py
-
-# Test 20 concurrent decision tree runs on autoscaling compute cluster.
-- name: decision_tree_autoscaling_20_runs
-  team: core
-  cluster:
-    app_config: decision_tree/decision_tree_app_config.yaml
-    compute_template: decision_tree/autoscaling_compute.yaml
-  run:
-    timeout: 9600
-    script: python decision_tree/cart_with_tree.py --concurrency=20
-
-- name: dask_on_ray_1tb_sort
-  team: core
-  cluster:
-    app_config: dask_on_ray/dask_on_ray_app_config.yaml
-    compute_template: dask_on_ray/1tb_sort_compute.yaml
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 32 1000
-    script: python dask_on_ray/dask_on_ray_sort.py --nbytes 1_000_000_000_000 --npartitions 1000 --num-nodes 31 --ray --data-dir /tmp/ray --s3-bucket core-nightly-test
-
-- name: many_nodes_actor_test
-  team: core
-  cluster:
-    app_config: many_nodes_tests/app_config.yaml
-    compute_template: many_nodes_tests/compute_config.yaml
-
-  run:
-    timeout: 7200
-    prepare: python wait_cluster.py 251 5400
-    script: python many_nodes_tests/actor_test.py
-
-- name: pg_autoscaling_regression_test
-  team: core
-  cluster:
-    app_config: placement_group_tests/app_config.yaml
-    compute_template: placement_group_tests/compute.yaml
-
-  run:
-    timeout: 1200
-    script: python placement_group_tests/pg_run.py
-
-- name: pg_long_running_performance_test
-  team: core
-  cluster:
-    app_config: placement_group_tests/app_config.yaml
-    compute_template: placement_group_tests/long_running_test_compute.yaml
-
-  run:
-    timeout: 3600
-    prepare: python wait_cluster.py 2 600
-    script: python placement_group_tests/long_running_performance_test.py --num-stages 2000
-
-- name: placement_group_performance_test
-  team: core
-  cluster:
-    app_config: placement_group_tests/app_config.yaml
-    compute_template: placement_group_tests/pg_perf_test_compute.yaml
-
-  run:
-    timeout: 1200
-    prepare: python wait_cluster.py 5 600
-    script: python placement_group_tests/placement_group_performance_test.py
diff --git a/release/nightly_tests/wait_cluster.py b/release/nightly_tests/wait_cluster.py
deleted file mode 100644
index f70088289..000000000
--- a/release/nightly_tests/wait_cluster.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/rllib_tests/rllib_tests.yaml b/release/rllib_tests/rllib_tests.yaml
deleted file mode 100644
index d0b15dc07..000000000
--- a/release/rllib_tests/rllib_tests.yaml
+++ /dev/null
@@ -1,103 +0,0 @@
-# Heavy learning tests (Atari and HalfCheetah) for major algos.
-- name: learning_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: 8gpus_64cpus.yaml
-
-  run:
-    timeout: 14400
-    script: python learning_tests/run.py
-
-  smoke_test:
-      run:
-        timeout: 1200
-
-# 2-GPU learning tests (CartPole and RepeatAfterMeEnv) for major algos.
-- name: multi_gpu_learning_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: 8gpus_96cpus.yaml
-
-  run:
-    timeout: 7200
-    script: python multi_gpu_learning_tests/run.py
-
-# 2-GPU learning tests (StatelessCartPole) + use_lstm=True for major algos
-# (that support RNN models).
-- name: multi_gpu_with_lstm_learning_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: 8gpus_96cpus.yaml
-
-  run:
-    timeout: 7200
-    script: python multi_gpu_with_lstm_learning_tests/run.py
-
-# 2-GPU learning tests (StatelessCartPole) + use_attention=True for major
-# algos (that support RNN models).
-- name: multi_gpu_with_attention_learning_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: 8gpus_96cpus.yaml
-
-  run:
-    timeout: 7200
-    script: python multi_gpu_with_attention_learning_tests/run.py
-
-# We'll have these as per-PR tests soon.
-# - name: example_scripts_on_gpu_tests
-#   team: ml
-#  cluster:
-#    app_config: app_config.yaml
-#    compute_template: 1gpu_4cpus.yaml
-
-#  run:
-#    timeout: 7200
-#    script: bash unit_gpu_tests/run.sh
-
-# IMPALA large machine stress tests (4x Atari).
-- name: stress_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: 4gpus_544_cpus.yaml
-
-  run:
-    timeout: 5400
-    prepare: python wait_cluster.py 6 600
-    script: python stress_tests/run_stress_tests.py
-
-  smoke_test:
-      run:
-        timeout: 2000
-
-# Tests that exercise auto-scaling and Anyscale connect.
-- name: connect_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: auto_scale.yaml
-
-  run:
-    use_connect: True
-    timeout: 3000
-    script: python connect_tests/run_connect_tests.py
-
-# Nightly performance regression for popular algorithms.
-# These algorithms run nightly for pre-determined amount of time without
-# passing criteria.
-# Performance metrics, such as reward achieved and throughput, are then
-# collected and tracked over time.
-- name: performance_tests
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: 12gpus_192cpus.yaml
-
-  run:
-    timeout: 10800
-    script: python performance_tests/run.py
diff --git a/release/rllib_tests/wait_cluster.py b/release/rllib_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/rllib_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/run_e2e.sh b/release/run_e2e.sh
deleted file mode 100755
index 9f1ae16fc..000000000
--- a/release/run_e2e.sh
+++ /dev/null
@@ -1,176 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-cd "${0%/*}" || exit 1
-
-reason() {
-  # Keep in sync with e2e.py ExitCode enum
-  case $1 in
-    0)
-    REASON="success"
-    ;;
-    2)
-    REASON="unspecified"
-    ;;
-    3)
-    REASON="unknown"
-    ;;
-    4)
-    REASON="runtime error"
-    ;;
-    5)
-    REASON="command error"
-    ;;
-    6)
-    REASON="command timeout"
-    ;;
-    7)
-    REASON="prepare timeout"
-    ;;
-    8)
-    REASON="filesync timeout"
-    ;;
-    9)
-    REASON="session timeout"
-    ;;
-    10)
-    REASON="prepare error"
-    ;;
-    11)
-    REASON="app config build error"
-    ;;
-    12)
-    REASON="infra error"
-    ;;
-    *)
-    REASON="untracked error"
-    ;;
-  esac
-  echo "${REASON}"
-}
-
-while [[ $# -gt 0 ]]
-do
-key="$1"
-case $key in
-    --ray-repo)
-    shift
-    RAY_REPO=$1
-    ;;
-    --ray-branch)
-    shift
-    RAY_BRANCH=$1
-    ;;
-    --ray-version)
-    shift
-    RAY_VERSION=$1
-    ;;
-    --ray-wheels)
-    shift
-    RAY_WHEELS=$1
-    ;;
-    --ray-test-repo)
-    shift
-    RAY_TEST_REPO=$1
-    ;;
-    --ray-test-branch)
-    shift
-    RAY_TEST_BRANCH=$1
-    ;;
-    --release-results-dir)
-    shift
-    RELEASE_RESULTS_DIR=$1
-    ;;
-    *)
-    break
-esac
-shift
-done
-
-RAY_TEST_REPO=${RAY_TEST_REPO-https://github.com/ray-project/ray.git}
-RAY_TEST_BRANCH=${RAY_TEST_BRANCH-master}
-RELEASE_RESULTS_DIR=${RELEASE_RESULTS_DIR-/tmp/artifacts}
-
-export RAY_REPO RAY_BRANCH RAY_VERSION RAY_WHEELS RAY_TEST_REPO RAY_TEST_BRANCH RELEASE_RESULTS_DIR
-
-pip uninstall -q -y ray
-pip install -q -r requirements.txt
-pip install -q -U boto3 botocore
-git clone -b "${RAY_TEST_BRANCH}" "${RAY_TEST_REPO}" ~/ray
-
-RETRY_NUM=0
-MAX_RETRIES=${MAX_RETRIES-3}
-
-if [ "${BUILDKITE_RETRY_COUNT-0}" -ge 1 ]; then
-  echo "This is a manually triggered retry from the Buildkite web UI, so we set the number of infra retries to 1."
-  MAX_RETRIES=1
-fi
-
-ALL_EXIT_CODES=()
-while [ "$RETRY_NUM" -lt "$MAX_RETRIES" ]; do
-  RETRY_NUM=$((RETRY_NUM + 1))
-
-  if [ "$RETRY_NUM" -gt 1 ]; then
-    # Sleep for random time between 30 and 90 minutes
-    SLEEP_TIME=$((1800 + RANDOM % 5400))
-    echo "----------------------------------------"
-    echo "Retry count: ${RETRY_NUM}/${MAX_RETRIES}. Sleeping for ${SLEEP_TIME} seconds before retrying the run."
-    echo "----------------------------------------"
-    sleep ${SLEEP_TIME}
-  fi
-
-  sudo rm -rf "${RELEASE_RESULTS_DIR}"/* || true
-
-  python e2e.py "$@"
-  EXIT_CODE=$?
-  REASON=$(reason "${EXIT_CODE}")
-  ALL_EXIT_CODES[${#ALL_EXIT_CODES[@]}]=$EXIT_CODE
-
-  case ${EXIT_CODE} in
-    0)
-    echo "Script finished successfully on try ${RETRY_NUM}/${MAX_RETRIES}"
-    break
-    ;;
-    7 | 9 | 10)
-    echo "Script failed on try ${RETRY_NUM}/${MAX_RETRIES} with exit code ${EXIT_CODE} (${REASON})."
-    ;;
-    *)
-    echo "Script failed on try ${RETRY_NUM}/${MAX_RETRIES} with exit code ${EXIT_CODE} (${REASON}), aborting."
-    break
-    ;;
-  esac
-
-done
-
-sudo rm -rf /tmp/ray_release_test_artifacts/* || true
-sudo cp -rf "${RELEASE_RESULTS_DIR}"/* /tmp/ray_release_test_artifacts/ || true
-
-echo "----------------------------------------"
-echo "e2e test finished with final exit code ${EXIT_CODE} after ${RETRY_NUM}/${MAX_RETRIES} tries"
-echo "Run results:"
-
-COUNTER=1
-for EX in "${ALL_EXIT_CODES[@]}"; do
-  REASON=$(reason "${EX}")
-  echo "  Run $COUNTER: Exit code = ${EX} (${REASON})"
-  COUNTER=$((COUNTER + 1))
-done
-
-echo "----------------------------------------"
-
-REASON=$(reason "${EXIT_CODE}")
-echo "Final e2e exit code is ${EXIT_CODE} (${REASON})"
-
-case ${EXIT_CODE} in
-  0)
-  ;;
-  7 | 9 | 10)
-  echo "RELEASE MANAGER: This is likely an infra error that can be solved by RESTARTING this test."
-  ;;
-  *)
-  echo "RELEASE MANAGER: This could be an error in the test. Please REVIEW THE LOGS and ping the test owner."
-  ;;
-esac
-
-exit $EXIT_CODE
diff --git a/release/runtime_env_tests/runtime_env_tests.yaml b/release/runtime_env_tests/runtime_env_tests.yaml
deleted file mode 100644
index 7a55da490..000000000
--- a/release/runtime_env_tests/runtime_env_tests.yaml
+++ /dev/null
@@ -1,34 +0,0 @@
-- name: rte_many_tasks_actors
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: rte_small.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/rte_many_tasks_actors.py
-
-- name: wheel_urls
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: rte_minimal.yaml
-
-  run:
-    timeout: 9000 # 2h30m
-    prepare: python wait_cluster.py 1 600
-    script: python workloads/wheel_urls.py
-
-- name: rte_ray_client
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: rte_minimal.yaml
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 600
-    prepare: python wait_cluster.py 1 600
-    script: python workloads/rte_ray_client.py
diff --git a/release/runtime_env_tests/wait_cluster.py b/release/runtime_env_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/runtime_env_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/serve_tests/serve_tests.yaml b/release/serve_tests/serve_tests.yaml
deleted file mode 100644
index 87058d891..000000000
--- a/release/serve_tests/serve_tests.yaml
+++ /dev/null
@@ -1,101 +0,0 @@
-- name: single_deployment_1k_noop_replica
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: compute_tpl_32_cpu.yaml
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/single_deployment_1k_noop_replica.py
-
-  smoke_test:
-    timeout: 600
-
-- name: multi_deployment_1k_noop_replica
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: compute_tpl_32_cpu.yaml
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/multi_deployment_1k_noop_replica.py
-
-  smoke_test:
-    timeout: 600
-
-- name: autoscaling_single_deployment
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: compute_tpl_8_cpu_autoscaling.yaml
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/autoscaling_single_deployment.py
-
-  smoke_test:
-    timeout: 600
-
-- name: autoscaling_multi_deployment
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    compute_template: compute_tpl_8_cpu_autoscaling.yaml
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/autoscaling_multi_deployment.py
-
-  smoke_test:
-    timeout: 600
-
-- name: serve_micro_benchmark
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    # 16 CPUS
-    compute_template: compute_tpl_single_node.yaml
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/serve_micro_benchmark.py
-
-  smoke_test:
-    timeout: 600
-
-- name: serve_micro_benchmark_k8s
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    # 16 CPUS
-    compute_template: compute_tpl_single_node_k8s.yaml
-    compute_on_k8s: True
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/serve_micro_benchmark.py
-
-  smoke_test:
-    timeout: 600
-
-- name: serve_cluster_fault_tolerance
-  team: serve
-  cluster:
-    app_config: app_config.yaml
-    # 16 CPUS
-    compute_template: compute_tpl_single_node.yaml
-
-  run:
-    timeout: 7200
-    long_running: False
-    script: python workloads/serve_cluster_fault_tolerance.py
-
-  smoke_test:
-    timeout: 600
diff --git a/release/sgd_tests/sgd_tests.yaml b/release/sgd_tests/sgd_tests.yaml
deleted file mode 100644
index cb0d4d5c3..000000000
--- a/release/sgd_tests/sgd_tests.yaml
+++ /dev/null
@@ -1,11 +0,0 @@
-# Test multi-node, multi-GPU Ray SGD example.
-- name: sgd_gpu
-  team: ml
-  cluster:
-    app_config: sgd_gpu/sgd_gpu_app_config.yaml
-    compute_template: sgd_gpu/sgd_gpu_compute.yaml
-
-  run:
-    timeout: 3000
-    prepare: python wait_cluster.py 2 600
-    script: python sgd_gpu/sgd_gpu_test.py --num-workers=2 --use-gpu --address=auto
\ No newline at end of file
diff --git a/release/sgd_tests/wait_cluster.py b/release/sgd_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/sgd_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/test_owners.yaml b/release/test_owners.yaml
deleted file mode 100644
index b898529a8..000000000
--- a/release/test_owners.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-# Specify the test owners (teams) here.
-# The root key should be the name of the test yaml file without the .yaml.
-# To specify owners of subtests, use a sub dict (see e.g. long_running_tests).
-golden_notebook_tests: ml
-horovod_tests: ml
-lightgbm_tests: ml
-long_running_distributed_tests: ml
-long_running_tests:
-  actor_deaths: core
-  apex: ml
-  impala: ml
-  many_actor_tasks: core
-  many_drivers: core
-  many_ppo: core
-  many_tasks: core
-  many_tasks_serialized_ids: core
-  node_failures: core
-  pbt: ml
-  serve: serve
-  serve_failure: serve
-microbenchmark: core
-nightly_tests: core
-rllib_tests: ml
-runtime_env_tests: serve
-serve_tests: serve
-sgd_tests: ml
-xgboost_tests: ml
diff --git a/release/tune_tests/cloud_tests/tune_cloud_tests.yaml b/release/tune_tests/cloud_tests/tune_cloud_tests.yaml
deleted file mode 100644
index 72279931e..000000000
--- a/release/tune_tests/cloud_tests/tune_cloud_tests.yaml
+++ /dev/null
@@ -1,118 +0,0 @@
-- name: aws_no_sync_down
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_aws_4x2.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/run_cloud_test.py no_sync_down
-
-- name: aws_ssh_sync
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_aws_4x2.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/run_cloud_test.py ssh_sync
-
-- name: aws_durable_upload
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_aws_4x2.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/run_cloud_test.py durable_upload --bucket s3://data-test-ilr/durable_upload
-
-- name: aws_durable_upload_rllib_str
-  team: ml
-  cluster:
-    app_config: app_config_ml.yaml
-    compute_template: tpl_aws_4x2.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/run_cloud_test.py durable_upload --trainable rllib_str --bucket s3://data-test-ilr/durable_upload_rllib_str
-
-- name: aws_durable_upload_rllib_trainer
-  team: ml
-  cluster:
-    app_config: app_config_ml.yaml
-    compute_template: tpl_aws_4x2.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/run_cloud_test.py durable_upload --trainable rllib_trainer --bucket s3://data-test-ilr/durable_upload_rllib_trainer
-
-- name: aws_no_durable_upload
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_aws_4x2.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/run_cloud_test.py no_durable_upload --bucket s3://data-test-ilr/durable_upload
-
-- name: gcp_k8s_no_sync_down
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_gcp_k8s_4x8.yaml
-    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
-
-  run:
-    use_connect: True
-    timeout: 600
-    # Remove --cpus-per-trial 8 once n2-standard-2 is supported
-    script: python workloads/run_cloud_test.py no_sync_down --cpus-per-trial 8
-
-- name: gcp_k8s_ssh_sync
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_gcp_k8s_4x8.yaml
-    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
-
-  run:
-    use_connect: True
-    timeout: 600
-    # Remove --cpus-per-trial 8 once n2-standard-2 is supported
-    script: python workloads/run_cloud_test.py ssh_sync --cpus-per-trial 8
-
-- name: gcp_k8s_durable_upload
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_gcp_k8s_4x8.yaml
-    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
-
-  run:
-    use_connect: True
-    timeout: 600
-    # Remove --cpus-per-trial 8 once n2-standard-2 is supported
-    script: python workloads/run_cloud_test.py durable_upload --cpus-per-trial 8 --bucket gs://jun-riot-test/durable_upload
-
-
-- name: gcp_k8s_no_durable_upload
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_gcp_k8s_4x8.yaml
-    cloud_id: cld_k8WcxPgjUtSE8RVmfZpTLuKM  # anyscale_k8s_gcp_cloud
-
-  run:
-    use_connect: True
-    timeout: 600
-    # Remove --cpus-per-trial 8 once n2-standard-2 is supported
-    script: python workloads/run_cloud_test.py no_durable_upload --cpus-per-trial 8 --bucket gs://jun-riot-test/durable_upload
diff --git a/release/tune_tests/cloud_tests/wait_cluster.py b/release/tune_tests/cloud_tests/wait_cluster.py
deleted file mode 100644
index f70088289..000000000
--- a/release/tune_tests/cloud_tests/wait_cluster.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/tune_tests/scalability_tests/tune_tests.yaml b/release/tune_tests/scalability_tests/tune_tests.yaml
deleted file mode 100644
index ba8a5a230..000000000
--- a/release/tune_tests/scalability_tests/tune_tests.yaml
+++ /dev/null
@@ -1,90 +0,0 @@
-- name: bookkeeping_overhead
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_1x16.yaml
-
-  run:
-    timeout: 1200
-    script: python workloads/test_bookkeeping_overhead.py
-
-
-- name: durable_trainable
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_16x2.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 16 600
-    script: python workloads/test_durable_trainable.py --bucket data-test-ilr
-
-- name: long_running_large_checkpoints
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_1x32_hd.yaml
-
-  run:
-    timeout: 86400
-    script: python workloads/test_long_running_large_checkpoints.py
-    long_running: True
-
-  smoke_test:
-    run:
-      timeout: 3600
-
-
-- name: network_overhead
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_100x2.yaml
-
-  run:
-    timeout: 900
-    prepare_timeout: 1200
-    prepare: python wait_cluster.py 100 1200
-    script: python workloads/test_network_overhead.py
-
-  smoke_test:
-    cluster:
-      compute_template: tpl_20x2.yaml
-
-    run:
-      timeout: 400
-      prepare_timeout: 600
-      prepare: python wait_cluster.py 20 600
-
-- name: result_throughput_cluster
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_16x64.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 16 600
-    script: python workloads/test_result_throughput_cluster.py
-
-- name: result_throughput_single_node
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_1x96.yaml
-
-  run:
-    timeout: 600
-    script: python workloads/test_result_throughput_single_node.py
-
-- name: xgboost_sweep
-  team: ml
-  cluster:
-    app_config: app_config_data.yaml
-    compute_template: tpl_16x64.yaml
-
-  run:
-    timeout: 3600
-    prepare: python wait_cluster.py 16 600
-    script: python workloads/test_xgboost_sweep.py
diff --git a/release/tune_tests/scalability_tests/wait_cluster.py b/release/tune_tests/scalability_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/tune_tests/scalability_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/util/wait_cluster.py b/release/util/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/util/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/xgboost_tests/wait_cluster.py b/release/xgboost_tests/wait_cluster.py
deleted file mode 100644
index c02330db2..000000000
--- a/release/xgboost_tests/wait_cluster.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import argparse
-import time
-
-import ray
-
-ray.init(address="auto")
-
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "num_nodes", type=int, help="Wait for this number of nodes (includes head)"
-)
-
-parser.add_argument("max_time_s", type=int, help="Wait for this number of seconds")
-
-parser.add_argument(
-    "--feedback_interval_s",
-    type=int,
-    default=10,
-    help="Wait for this number of seconds",
-)
-
-args = parser.parse_args()
-
-curr_nodes = 0
-start = time.time()
-next_feedback = start
-max_time = start + args.max_time_s
-while not curr_nodes >= args.num_nodes:
-    now = time.time()
-
-    if now >= max_time:
-        raise RuntimeError(
-            f"Maximum wait time reached, but only "
-            f"{curr_nodes}/{args.num_nodes} nodes came up. Aborting."
-        )
-
-    if now >= next_feedback:
-        passed = now - start
-        print(
-            f"Waiting for more nodes to come up: "
-            f"{curr_nodes}/{args.num_nodes} "
-            f"({passed:.0f} seconds passed)"
-        )
-        next_feedback = now + args.feedback_interval_s
-
-    time.sleep(5)
-    curr_nodes = len(ray.nodes())
-
-passed = time.time() - start
-print(
-    f"Cluster is up: {curr_nodes}/{args.num_nodes} nodes online after "
-    f"{passed:.0f} seconds"
-)
diff --git a/release/xgboost_tests/xgboost_tests.yaml b/release/xgboost_tests/xgboost_tests.yaml
deleted file mode 100644
index 264443308..000000000
--- a/release/xgboost_tests/xgboost_tests.yaml
+++ /dev/null
@@ -1,104 +0,0 @@
-- name: train_small
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    use_connect: True
-    autosuspend_mins: 10
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/train_small.py
-
-- name: train_moderate
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_moderate.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 32 600
-    script: python workloads/train_moderate.py
-
-- name: train_gpu
-  team: ml
-  cluster:
-    app_config: app_config_gpu.yaml
-    compute_template: tpl_gpu_small.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 5 600
-    script: python workloads/train_gpu.py
-
-- name: distributed_api_test
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-    results: 
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/distributed_api_test.py
-    results: ""
-
-- name: ft_small_elastic
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/ft_small_elastic.py
-    results: ""
-
-- name: ft_small_non_elastic
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/ft_small_non_elastic.py
-    results: ""
-
-- name: tune_small
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_small.yaml
-
-  run:
-    timeout: 600
-    prepare: python wait_cluster.py 4 600
-    script: python workloads/tune_small.py
-
-- name: tune_32x4
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_moderate.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 32 600
-    script: python workloads/tune_32x4.py
-
-- name: tune_4x32
-  team: ml
-  cluster:
-    app_config: app_config.yaml
-    compute_template: tpl_cpu_moderate.yaml
-
-  run:
-    timeout: 900
-    prepare: python wait_cluster.py 32 600
-    script: python workloads/tune_4x32.py