[release] move release testing end to end script to main ray repo (#17070)

2025-03-04 17:41:43 -05:00 · 2021-07-14 21:39:07 +02:00 · 2021-07-14 21:39:07 +02:00 · ed131f87da
commit ed131f87da
parent 92f19170ab
12 changed files with 2823 additions and 0 deletions
--- a/release/.buildkite/build_pipeline.py
+++ b/release/.buildkite/build_pipeline.py
@ -0,0 +1,307 @@
+import copy
+import logging
+import os
+import sys
+
+import yaml
+
+# Env variables:
+
+# RAY_REPO          Repo to use for finding the wheel
+# RAY_BRANCH        Branch to find the wheel
+# RAY_TEST_REPO     Repo to use for test scripts
+# RAY_TEST_BRANCH   Branch for test scripts
+# FILTER_FILE       File filter
+# FILTER_TEST       Test name filter
+# RELEASE_TEST_SUITE Release test suite (e.g. manual, nightly)
+
+
+class ReleaseTest:
+    def __init__(self, name: str, smoke_test: bool = False, retry: int = 0):
+        self.name = name
+        self.smoke_test = smoke_test
+        self.retry = retry
+
+    def __str__(self):
+        return self.name
+
+    def __repr__(self):
+        return self.name
+
+    def __contains__(self, item):
+        return self.name.__contains__(item)
+
+    def __iter__(self):
+        return iter(self.name)
+
+    def __len__(self):
+        return len(self.name)
+
+
+class SmokeTest(ReleaseTest):
+    def __init__(self, name: str, retry: int = 0):
+        super(SmokeTest, self).__init__(
+            name=name, smoke_test=True, retry=retry)
+
+
+CORE_NIGHTLY_TESTS = {
+    "~/ray/release/nightly_tests/nightly_tests.yaml": [
+        "shuffle_10gb",
+        "shuffle_50gb",
+        "shuffle_50gb_large_partition",
+        "shuffle_100gb",
+        "non_streaming_shuffle_100gb",
+        "non_streaming_shuffle_50gb_large_partition",
+        "non_streaming_shuffle_50gb",
+        "dask_on_ray_10gb_sort",
+        "dask_on_ray_100gb_sort",
+        "dask_on_ray_large_scale_test_no_spilling",
+        "dask_on_ray_large_scale_test_spilling",
+        "stress_test_placement_group",
+        "shuffle_1tb_1000_partition",
+        "non_streaming_shuffle_1tb_1000_partition",
+        "shuffle_1tb_5000_partitions",
+        "non_streaming_shuffle_1tb_5000_partitions",
+        "decision_tree_autoscaling",
+        "autoscaling_shuffle_1tb_1000_partitions",
+        SmokeTest("stress_test_many_tasks"),
+        SmokeTest("stress_test_dead_actors"),
+    ],
+    "~/ray/benchmarks/benchmark_tests.yaml": [
+        "single_node",
+        "object_store",
+    ],
+}
+
+NIGHTLY_TESTS = {
+    # "~/ray/release/horovod_tests/horovod_tests.yaml": [
+    #     SmokeTest("horovod_test"),
+    # ],  # Should we enable this?
+    "~/ray/release/golden_notebook_tests/golden_notebook_tests.yaml": [
+        "dask_xgboost_test",
+        "modin_xgboost_test",
+        "torch_tune_serve_test",
+    ],
+    "~/ray/release/long_running_tests/long_running_tests.yaml": [
+        SmokeTest("actor_deaths"),
+        SmokeTest("apex"),
+        SmokeTest("impala"),
+        SmokeTest("many_actor_tasks"),
+        SmokeTest("many_drivers"),
+        SmokeTest("many_ppo"),
+        SmokeTest("many_tasks"),
+        SmokeTest("many_tasks_serialized_ids"),
+        SmokeTest("node_failures"),
+        SmokeTest("pbt"),
+        # SmokeTest("serve"),
+        # SmokeTest("serve_failure"),
+    ],
+    "~/ray/release/microbenchmark/microbenchmark.yaml": [
+        "microbenchmark",
+    ],
+    "~/ray/release/sgd_tests/sgd_tests.yaml": [
+        "sgd_gpu",
+    ],
+    "~/ray/release/tune_tests/scalability_tests/tune_tests.yaml": [
+        "bookkeeping_overhead",
+        "durable_trainable",
+        SmokeTest("long_running_large_checkpoints"),
+        SmokeTest("network_overhead"),
+        "result_throughput_cluster",
+        "result_throughput_single_node",
+        "xgboost_sweep",
+    ],
+    "~/ray/release/xgboost_tests/xgboost_tests.yaml": [
+        "train_small",
+        "train_moderate",
+        "train_gpu",
+        "tune_small",
+        "tune_4x32",
+        "tune_32x4",
+        "ft_small_elastic",
+        "ft_small_non_elastic",
+        "distributed_api_test",
+    ],
+}
+
+WEEKLY_TESTS = {
+    "~/ray/benchmarks/benchmark_tests.yaml": [
+        "distributed",
+    ],
+    "~/ray/release/nightly_tests/nightly_tests.yaml": [
+        "stress_test_many_tasks",
+        "stress_test_dead_actors",
+    ],
+    "~/ray/release/horovod_tests/horovod_tests.yaml": [
+        "horovod_test",
+    ],
+    "~/ray/release/long_running_distributed_tests"
+    "/long_running_distributed.yaml": [
+        "pytorch_pbt_failure",
+    ],
+    # Full long running tests (1 day runtime)
+    "~/ray/release/long_running_tests/long_running_tests.yaml": [
+        "actor_deaths",
+        "apex",
+        "impala",
+        "many_actor_tasks",
+        "many_drivers",
+        "many_ppo",
+        "many_tasks",
+        "many_tasks_serialized_ids",
+        "node_failures",
+        "pbt",
+        # "serve",
+        # "serve_failure",
+    ],
+    "~/ray/release/tune_tests/scalability_tests/tune_tests.yaml": [
+        "network_overhead",
+        "long_running_large_checkpoints",
+    ],
+}
+
+MANUAL_TESTS = {
+    "~/ray/release/rllib_tests/rllib_tests.yaml": [
+        "learning_tests",
+        "example_scripts_on_gpu_tests",
+        "stress_tests",
+    ],
+    "~/ray/release/long_running_tests/long_running_tests.yaml": [
+        SmokeTest("serve"),
+        SmokeTest("serve_failure"),
+    ]
+}
+
+SUITES = {
+    "core-nightly": CORE_NIGHTLY_TESTS,
+    "nightly": NIGHTLY_TESTS,
+    "weekly": WEEKLY_TESTS,
+    "manual": MANUAL_TESTS,
+}
+
+DEFAULT_STEP_TEMPLATE = {
+    "env": {
+        "ANYSCALE_CLOUD_ID": "cld_4F7k8814aZzGG8TNUGPKnc",
+        "ANYSCALE_PROJECT": "prj_2xR6uT6t7jJuu1aCwWMsle",
+        "RELEASE_AWS_BUCKET": "ray-release-automation-results",
+        "RELEASE_AWS_LOCATION": "dev",
+        "RELEASE_AWS_DB_NAME": "ray_ci",
+        "RELEASE_AWS_DB_TABLE": "release_test_result",
+        "AWS_REGION": "us-west-2"
+    },
+    "agents": {
+        "queue": "runner_queue_branch"
+    },
+    "plugins": [{
+        "docker#v3.8.0": {
+            "image": "rayproject/ray",
+            "propagate-environment": True
+        }
+    }],
+    "commands": []
+}
+
+
+def build_pipeline(steps):
+    all_steps = []
+
+    RAY_BRANCH = os.environ.get("RAY_BRANCH", "master")
+    RAY_REPO = os.environ.get("RAY_REPO",
+                              "https://github.com/ray-project/ray.git")
+
+    RAY_TEST_BRANCH = os.environ.get("RAY_TEST_BRANCH", RAY_BRANCH)
+    RAY_TEST_REPO = os.environ.get("RAY_TEST_REPO", RAY_REPO)
+
+    FILTER_FILE = os.environ.get("FILTER_FILE", "")
+    FILTER_TEST = os.environ.get("FILTER_TEST", "")
+
+    logging.info(
+        f"Building pipeline \n"
+        f"Ray repo/branch to test:\n"
+        f" RAY_REPO   = {RAY_REPO}\n"
+        f" RAY_BRANCH = {RAY_BRANCH}\n\n"
+        f"Ray repo/branch containing the test configurations and scripts:"
+        f" RAY_TEST_REPO   = {RAY_TEST_REPO}\n"
+        f" RAY_TEST_BRANCH = {RAY_TEST_BRANCH}\n\n"
+        f"Filtering for these tests:\n"
+        f" FILTER_FILE = {FILTER_FILE}\n"
+        f" FILTER_TEST = {FILTER_TEST}\n\n")
+
+    for test_file, test_names in steps.items():
+        if FILTER_FILE and FILTER_FILE not in test_file:
+            continue
+
+        test_base = os.path.basename(test_file)
+        for test_name in test_names:
+            if FILTER_TEST and FILTER_TEST not in test_name:
+                continue
+
+            if not isinstance(test_name, ReleaseTest):
+                test_name = ReleaseTest(name=test_name)
+
+            logging.info(f"Adding test: {test_base}/{test_name}")
+
+            cmd = str(f"python release/e2e.py "
+                      f"--ray-branch {RAY_BRANCH} "
+                      f"--category {RAY_BRANCH} "
+                      f"--test-config {test_file} "
+                      f"--test-name {test_name}")
+
+            if test_name.smoke_test:
+                logging.info("This test will run as a smoke test.")
+                cmd += " --smoke-test"
+
+            step_conf = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
+
+            if test_name.retry:
+                logging.info(f"This test will be retried up to "
+                             f"{test_name.retry} times.")
+                step_conf["retry"] = {
+                    "automatic": [{
+                        "exit_status": "*",
+                        "limit": test_name.retry
+                    }]
+                }
+
+            step_conf["commands"] = [
+                "pip install -q -r release/requirements.txt",
+                "pip install -U boto3 botocore",
+                f"git clone -b {RAY_TEST_BRANCH} {RAY_TEST_REPO} ~/ray",
+                cmd,
+            ]
+
+            step_conf["label"] = f"{test_name} ({RAY_BRANCH}) - " \
+                                 f"{RAY_TEST_BRANCH}/{test_base}"
+            all_steps.append(step_conf)
+
+    return all_steps
+
+
+def alert_pipeline(stats: bool = False):
+    step_conf = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
+
+    cmd = "python release/alert.py"
+    if stats:
+        cmd += " --stats"
+
+    step_conf["commands"] = [
+        "pip install -q -r release/requirements.txt",
+        "pip install -U boto3 botocore",
+        cmd,
+    ]
+    step_conf["label"] = f"Send periodic alert (stats_only = {stats})"
+    return [step_conf]
+
+
+if __name__ == "__main__":
+    alert = os.environ.get("RELEASE_ALERT", "0")
+
+    if alert in ["1", "stats"]:
+        steps = alert_pipeline(alert == "stats")
+    else:
+        TEST_SUITE = os.environ.get("RELEASE_TEST_SUITE", "nightly")
+        PIPELINE_SPEC = SUITES[TEST_SUITE]
+
+        steps = build_pipeline(PIPELINE_SPEC)
+
+    yaml.dump({"steps": steps}, sys.stdout)
--- a/release/init.py
+++ b/release/init.py
--- a/release/alert.py
+++ b/release/alert.py
@ -0,0 +1,401 @@
+import argparse
+from collections import defaultdict, Counter
+from typing import Any, List, Tuple, Mapping, Optional
+import datetime
+import hashlib
+import json
+import logging
+import os
+import requests
+import sys
+
+import boto3
+
+from e2e import GLOBAL_CONFIG
+
+from alerts.default import handle_result as default_handle_result
+from alerts.rllib_tests import handle_result as rllib_tests_handle_result
+from alerts.long_running_tests import handle_result as \
+    long_running_tests_handle_result
+from alerts.tune_tests import handle_result as tune_tests_handle_result
+from alerts.xgboost_tests import handle_result as xgboost_tests_handle_result
+
+SUITE_TO_FN = {
+    "long_running_tests": long_running_tests_handle_result,
+    "rllib_tests": rllib_tests_handle_result,
+    "tune_tests": tune_tests_handle_result,
+    "xgboost_tests": xgboost_tests_handle_result,
+}
+
+GLOBAL_CONFIG["RELEASE_AWS_DB_STATE_TABLE"] = "alert_state"
+GLOBAL_CONFIG["SLACK_WEBHOOK"] = os.environ.get("SLACK_WEBHOOK", "")
+GLOBAL_CONFIG["SLACK_CHANNEL"] = os.environ.get("SLACK_CHANNEL",
+                                                "#oss-test-cop")
+
+logger = logging.getLogger()
+logger.setLevel(logging.INFO)
+handler = logging.StreamHandler(stream=sys.stdout)
+formatter = logging.Formatter(fmt="[%(levelname)s %(asctime)s] "
+                              "%(filename)s: %(lineno)d  "
+                              "%(message)s")
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+
+
+def maybe_fetch_slack_webhook():
+    if GLOBAL_CONFIG["SLACK_WEBHOOK"] in [None, ""]:
+        print("Missing SLACK_WEBHOOK, retrieving from AWS secrets store")
+        GLOBAL_CONFIG["SLACK_WEBHOOK"] = boto3.client(
+            "secretsmanager", region_name="us-west-2"
+        ).get_secret_value(
+            SecretId="arn:aws:secretsmanager:us-west-2:029272617770:secret:"
+            "release-automation/"
+            "slack-webhook-Na0CFP")["SecretString"]
+
+
+def _obj_hash(obj: Any) -> str:
+    json_str = json.dumps(obj, sort_keys=True, ensure_ascii=True)
+    sha = hashlib.sha256()
+    sha.update(json_str.encode())
+    return sha.hexdigest()
+
+
+def fetch_latest_alerts(rds_data_client):
+    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_STATE_TABLE"]
+
+    sql = (f"""
+        SELECT DISTINCT ON (category, test_suite, test_name)
+               category, test_suite, test_name, last_result_hash,
+               last_notification_dt
+        FROM   {schema}
+        ORDER BY category, test_suite, test_name, last_notification_dt DESC
+        """)
+
+    result = rds_data_client.execute_statement(
+        database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
+        secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
+        resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
+        schema=schema,
+        sql=sql,
+    )
+    for row in result["records"]:
+        category, test_suite, test_name, last_result_hash, \
+           last_notification_dt = (
+                r["stringValue"]
+                if "stringValue" in r else None
+                for r in row
+            )
+        last_notification_dt = datetime.datetime.strptime(
+            last_notification_dt, "%Y-%m-%d %H:%M:%S")
+        yield category, test_suite, test_name, last_result_hash, \
+            last_notification_dt
+
+
+def fetch_latest_results(rds_data_client,
+                         fetch_since: Optional[datetime.datetime] = None):
+    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_TABLE"]
+
+    sql = (f"""
+        SELECT DISTINCT ON (category, test_suite, test_name)
+               created_on, category, test_suite, test_name, status, results,
+               artifacts, last_logs
+        FROM   {schema} """)
+
+    parameters = []
+    if fetch_since is not None:
+        sql += "WHERE created_on >= :created_on "
+        parameters = [
+            {
+                "name": "created_on",
+                "typeHint": "TIMESTAMP",
+                "value": {
+                    "stringValue": fetch_since.strftime("%Y-%m-%d %H:%M:%S")
+                },
+            },
+        ]
+
+    sql += "ORDER BY category, test_suite, test_name, created_on DESC"
+
+    result = rds_data_client.execute_statement(
+        database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
+        secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
+        resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
+        schema=schema,
+        sql=sql,
+        parameters=parameters,
+    )
+    for row in result["records"]:
+        created_on, category, test_suite, test_name, status, results, \
+            artifacts, last_logs = (
+                r["stringValue"] if "stringValue" in r else None for r in row)
+
+        # Calculate hash before converting strings to objects
+        result_obj = (created_on, category, test_suite, test_name, status,
+                      results, artifacts, last_logs)
+        result_json = json.dumps(result_obj)
+        result_hash = _obj_hash(result_json)
+
+        # Convert some strings to python objects
+        created_on = datetime.datetime.strptime(created_on,
+                                                "%Y-%m-%d %H:%M:%S")
+        results = json.loads(results)
+        artifacts = json.loads(artifacts)
+
+        yield result_hash, created_on, category, test_suite, test_name, \
+            status, results, artifacts, last_logs
+
+
+def mark_as_handled(rds_data_client, update: bool, category: str,
+                    test_suite: str, test_name: str, result_hash: str,
+                    last_notification_dt: datetime.datetime):
+    schema = GLOBAL_CONFIG["RELEASE_AWS_DB_STATE_TABLE"]
+
+    if not update:
+        sql = (f"""
+            INSERT INTO {schema}
+            (category, test_suite, test_name,
+            last_result_hash, last_notification_dt)
+            VALUES (:category, :test_suite, :test_name,
+                    :last_result_hash, :last_notification_dt)
+            """)
+    else:
+        sql = (f"""
+            UPDATE {schema}
+            SET last_result_hash=:last_result_hash,
+                last_notification_dt=:last_notification_dt
+            WHERE category=:category AND test_suite=:test_suite
+            AND test_name=:test_name
+            """)
+
+    rds_data_client.execute_statement(
+        database=GLOBAL_CONFIG["RELEASE_AWS_DB_NAME"],
+        parameters=[
+            {
+                "name": "category",
+                "value": {
+                    "stringValue": category
+                }
+            },
+            {
+                "name": "test_suite",
+                "value": {
+                    "stringValue": test_suite or ""
+                }
+            },
+            {
+                "name": "test_name",
+                "value": {
+                    "stringValue": test_name
+                }
+            },
+            {
+                "name": "last_result_hash",
+                "value": {
+                    "stringValue": result_hash
+                }
+            },
+            {
+                "name": "last_notification_dt",
+                "typeHint": "TIMESTAMP",
+                "value": {
+                    "stringValue": last_notification_dt.strftime(
+                        "%Y-%m-%d %H:%M:%S")
+                },
+            },
+        ],
+        secretArn=GLOBAL_CONFIG["RELEASE_AWS_DB_SECRET_ARN"],
+        resourceArn=GLOBAL_CONFIG["RELEASE_AWS_DB_RESOURCE_ARN"],
+        schema=schema,
+        sql=sql,
+    )
+
+
+def post_alerts_to_slack(channel: str, alerts: List[Tuple[str, str, str, str]],
+                         non_alerts: Mapping[str, int]):
+    if len(alerts) == 0:
+        logger.info("No alerts to post to slack.")
+        return
+
+    markdown_lines = [
+        f"* {len(alerts)} new release test failures found!*",
+        "",
+    ]
+
+    category_alerts = defaultdict(list)
+    for (category, test_suite, test_name, alert) in alerts:
+        category_alerts[category].append(
+            f"   *{test_suite}/{test_name}* failed: {alert}")
+
+    for category, alert_list in category_alerts.items():
+        markdown_lines.append(f"Branch: *{category}*")
+        markdown_lines.extend(alert_list)
+        markdown_lines.append("")
+
+    total_non_alerts = sum(n for n in non_alerts.values())
+    non_alert_detail = [f"{n} on {c}" for c, n in non_alerts.items()]
+
+    markdown_lines += [
+        f"Additionally, {total_non_alerts} tests passed successfully "
+        f"({', '.join(non_alert_detail)})."
+    ]
+
+    slack_url = GLOBAL_CONFIG["SLACK_WEBHOOK"]
+
+    resp = requests.post(
+        slack_url,
+        json={
+            "text": "\n".join(markdown_lines),
+            "channel": channel,
+            "username": "Fail Bot",
+            "icon_emoji": ":red_circle:",
+        },
+    )
+    print(resp.status_code)
+    print(resp.text)
+
+
+def post_statistics_to_slack(channel: str,
+                             alerts: List[Tuple[str, str, str, str]],
+                             non_alerts: Mapping[str, int]):
+    total_alerts = len(alerts)
+
+    category_alerts = defaultdict(list)
+    for (category, test_suite, test_name, alert) in alerts:
+        category_alerts[category].append(f"`{test_suite}/{test_name}`")
+
+    alert_detail = [f"{len(a)} on {c}" for c, a in category_alerts.items()]
+
+    total_non_alerts = sum(n for n in non_alerts.values())
+    non_alert_detail = [f"{n} on {c}" for c, n in non_alerts.items()]
+
+    markdown_lines = [
+        "*Periodic release test report*", "", f"In the past 24 hours, "
+        f"*{total_non_alerts}* release tests finished successfully, and "
+        f"*{total_alerts}* release tests failed."
+    ]
+
+    markdown_lines.append("")
+
+    if total_alerts:
+        markdown_lines.append(f"*Failing:* {', '.join(alert_detail)}")
+        for c, a in category_alerts.items():
+            markdown_lines.append(f"  *{c}*: {', '.join(sorted(a))}")
+    else:
+        markdown_lines.append("*Failing:* None")
+
+    markdown_lines.append("")
+
+    if total_non_alerts:
+        markdown_lines.append(f"*Passing:* {', '.join(non_alert_detail)}")
+    else:
+        markdown_lines.append("*Passing:* None")
+
+    slack_url = GLOBAL_CONFIG["SLACK_WEBHOOK"]
+
+    resp = requests.post(
+        slack_url,
+        json={
+            "text": "\n".join(markdown_lines),
+            "channel": channel,
+            "username": "Fail Bot",
+            "icon_emoji": ":red_circle:",
+        },
+    )
+    print(resp.status_code)
+    print(resp.text)
+
+
+def handle_results_and_get_alerts(
+        rds_data_client,
+        fetch_since: Optional[datetime.datetime] = None,
+        always_try_alert: bool = False,
+        no_status_update: bool = False):
+    # First build a map of last notifications
+    last_notifications_map = {}
+    for category, test_suite, test_name, last_result_hash, \
+            last_notification_dt in fetch_latest_alerts(rds_data_client):
+        last_notifications_map[(category, test_suite,
+                                test_name)] = (last_result_hash,
+                                               last_notification_dt)
+
+    alerts = []
+    non_alerts = Counter()
+
+    # Then fetch latest results
+    for result_hash, created_on, category, test_suite, test_name, status, \
+            results, artifacts, last_logs in fetch_latest_results(
+                rds_data_client, fetch_since=fetch_since):
+        key = (category, test_suite, test_name)
+
+        try_alert = always_try_alert
+        if key in last_notifications_map:
+            # If we have an alert for this key, fetch info
+            last_result_hash, last_notification_dt = last_notifications_map[
+                key]
+
+            if last_result_hash != result_hash:
+                # If we got a new result, handle new result
+                try_alert = True
+            # Todo: maybe alert again after some time?
+        else:
+            try_alert = True
+
+        if try_alert:
+            handle_fn = SUITE_TO_FN.get(test_suite, None)
+            if not handle_fn:
+                logger.warning(f"No handle for suite {test_suite}")
+                alert = default_handle_result(created_on, category, test_suite,
+                                              test_name, status, results,
+                                              artifacts, last_logs)
+            else:
+                alert = handle_fn(created_on, category, test_suite, test_name,
+                                  status, results, artifacts, last_logs)
+
+            if alert:
+                logger.warning(
+                    f"Alert raised for test {test_suite}/{test_name} "
+                    f"({category}): {alert}")
+
+                alerts.append((category, test_suite, test_name, alert))
+            else:
+                logger.debug(
+                    f"No alert raised for test {test_suite}/{test_name} "
+                    f"({category})")
+                non_alerts[category] += 1
+
+            if not no_status_update:
+                mark_as_handled(rds_data_client, key in last_notifications_map,
+                                category, test_suite, test_name, result_hash,
+                                datetime.datetime.now())
+
+    return alerts, non_alerts
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--stats",
+        action="store_true",
+        default=False,
+        help="Finish quickly for training.")
+    args = parser.parse_args()
+
+    maybe_fetch_slack_webhook()
+
+    rds_data_client = boto3.client("rds-data", region_name="us-west-2")
+
+    if args.stats:
+        # Only update last 24 hour stats
+        fetch_since = datetime.datetime.now() - datetime.timedelta(days=1)
+        alerts, non_alerts = handle_results_and_get_alerts(
+            rds_data_client,
+            fetch_since=fetch_since,
+            always_try_alert=True,
+            no_status_update=True)
+        post_statistics_to_slack(GLOBAL_CONFIG["SLACK_CHANNEL"], alerts,
+                                 non_alerts)
+
+    else:
+        alerts, non_alerts = handle_results_and_get_alerts(rds_data_client)
+        post_alerts_to_slack(GLOBAL_CONFIG["SLACK_CHANNEL"], alerts,
+                             non_alerts)
--- a/release/alerts/init.py
+++ b/release/alerts/init.py
--- a/release/alerts/default.py
+++ b/release/alerts/default.py
@ -0,0 +1,13 @@
+import datetime
+
+from typing import Dict, Optional
+
+
+def handle_result(created_on: datetime.datetime, category: str,
+                  test_suite: str, test_name: str, status: str, results: Dict,
+                  artifacts: Dict, last_logs: str) -> Optional[str]:
+
+    if not status == "finished":
+        return f"Test script did not finish successfully ({status})."
+
+    return None
--- a/release/alerts/long_running_tests.py
+++ b/release/alerts/long_running_tests.py
@ -0,0 +1,32 @@
+import datetime
+
+from typing import Dict, Optional
+
+
+def handle_result(created_on: datetime.datetime, category: str,
+                  test_suite: str, test_name: str, status: str, results: Dict,
+                  artifacts: Dict, last_logs: str) -> Optional[str]:
+    assert test_suite == "long_running_tests"
+
+    # elapsed_time = results.get("elapsed_time", 0.)
+    last_update_diff = results.get("last_update_diff", float("inf"))
+
+    if test_name in [
+            "actor_deaths", "many_actor_tasks", "many_drivers", "many_tasks",
+            "many_tasks_serialized_ids", "node_failures",
+            "object_spilling_shuffle", "serve", "serve_failure"
+    ]:
+        # Core tests
+        target_update_diff = 120
+
+    elif test_name in ["apex", "impala", "many_ppo", "pbt"]:
+        # Tune/RLLib style tests
+        target_update_diff = 360
+    else:
+        return None
+
+    if last_update_diff > target_update_diff:
+        return f"Last update to results json was too long ago " \
+               f"({last_update_diff:.2f} > {target_update_diff})"
+
+    return None
--- a/release/alerts/rllib_tests.py
+++ b/release/alerts/rllib_tests.py
@ -0,0 +1,14 @@
+import datetime
+
+from typing import Dict, Optional
+
+
+def handle_result(created_on: datetime.datetime, category: str,
+                  test_suite: str, test_name: str, status: str, results: Dict,
+                  artifacts: Dict, last_logs: str) -> Optional[str]:
+    assert test_suite == "rllib_tests"
+
+    if not status == "finished":
+        return f"Test script did not finish successfully ({status})."
+
+    return None
--- a/release/alerts/tune_tests.py
+++ b/release/alerts/tune_tests.py
@ -0,0 +1,60 @@
+import datetime
+
+from typing import Dict, Optional
+
+
+def handle_result(created_on: datetime.datetime, category: str,
+                  test_suite: str, test_name: str, status: str, results: Dict,
+                  artifacts: Dict, last_logs: str) -> Optional[str]:
+    assert test_suite == "tune_tests"
+
+    msg = ""
+    success = status == "finished"
+    time_taken = results.get("time_taken", float("inf"))
+    num_terminated = results.get("trial_states", {}).get("TERMINATED", 0)
+    was_smoke_test = results.get("smoke_test", False)
+
+    if not success:
+        if status == "timeout":
+            msg += "Test timed out."
+        else:
+            msg += "Test script failed. "
+
+    if test_name == "long_running_large_checkpoints":
+        last_update_diff = results.get("last_update_diff", float("inf"))
+        target_update_diff = 360
+
+        if last_update_diff > target_update_diff:
+            return f"Last update to results json was too long ago " \
+                   f"({last_update_diff:.2f} > {target_update_diff})"
+        return None
+
+    elif test_name == "bookkeeping_overhead":
+        target_terminated = 10000
+        target_time = 800
+    elif test_name == "durable_trainable":
+        target_terminated = 16
+        target_time = 600
+    elif test_name == "network_overhead":
+        target_terminated = 100 if not was_smoke_test else 20
+        target_time = 900 if not was_smoke_test else 400
+    elif test_name == "result_throughput_cluster":
+        target_terminated = 1000
+        target_time = 120
+    elif test_name == "result_throughput_single_node":
+        target_terminated = 96
+        target_time = 120
+    elif test_name == "xgboost_sweep":
+        target_terminated = 31
+        target_time = 3600
+    else:
+        return None
+
+    if num_terminated < target_terminated:
+        msg += f"Some trials failed " \
+               f"(num_terminated={num_terminated} < {target_terminated}). "
+    if time_taken > target_time:
+        msg += f"Took too long to complete " \
+               f"(time_taken={time_taken:.2f} > {target_time}). "
+
+    return msg or None
--- a/release/alerts/xgboost_tests.py
+++ b/release/alerts/xgboost_tests.py
@ -0,0 +1,58 @@
+import datetime
+
+from typing import Dict, Optional
+
+
+def handle_result(created_on: datetime.datetime, category: str,
+                  test_suite: str, test_name: str, status: str, results: Dict,
+                  artifacts: Dict, last_logs: str) -> Optional[str]:
+    assert test_suite == "xgboost_tests"
+
+    time_taken = results.get("time_taken", float("inf"))
+    num_terminated = results.get("trial_states", {}).get("TERMINATED", 0)
+
+    if test_name in [
+            "distributed_api_test", "ft_small_elastic", "ft_small_nonelastic"
+    ]:
+        if not status == "finished":
+            return f"Test script did not finish successfully ({status})."
+
+        return None
+    elif test_name.startswith("tune_"):
+        msg = ""
+        if test_name == "tune_small":
+            target_terminated = 4
+            target_time = 90
+        elif test_name == "tune_4x32":
+            target_terminated = 4
+            target_time = 120
+        elif test_name == "tune_32x4":
+            target_terminated = 32
+            target_time = 600
+        else:
+            return None
+
+        if num_terminated < target_terminated:
+            msg += f"Some trials failed " \
+                   f"(num_terminated={num_terminated} < {target_terminated}). "
+        if time_taken > target_time:
+            msg += f"Took too long to complete " \
+                   f"(time_taken={time_taken} > {target_time}). "
+
+        return msg or None
+    else:
+        # train scripts
+        if test_name == "train_small":
+            target_time = 30
+        elif test_name == "train_moderate":
+            target_time = 60
+        elif test_name == "train_gpu":
+            target_time = 40
+        else:
+            return None
+
+        if time_taken > target_time:
+            return f"Took too long to complete " \
+                   f"(time_taken={time_taken:.2f} > {target_time}). "
+
+    return None
--- a/release/config_generator.html
+++ b/release/config_generator.html
@ -0,0 +1,214 @@
+<!doctype html>
+<html>
+<head>
+    <meta charset="utf-8">
+    <title>Releaser config generator</title>
+    <style type="text/css">
+        html {
+            background: #cccccc;
+        }
+        body {
+            background: #ffffff;
+            font-family: sans-serif;
+            padding: 1em 2em;
+            max-width: 800px;
+            margin: 0 auto;
+        }
+        textarea {
+            width: 600px;
+            height: 200px;
+        }
+        form .use {
+            white-space: nowrap;
+            padding-right: 1em;
+        }
+        form .val {
+            min-width: 300px;
+        }
+        form .val input {
+            width: 90%;
+        }
+        form .desc {
+        }
+    </style>
+    <script type="text/javascript">
+        var env_vars = [
+            {
+                "name": "RAY_TEST_REPO",
+                "short": "Git repo with test files",
+                "long": "Repository in which the test files are which you would like to run. Note that this doesn't have to be the same repo from which the wheels are installed.",
+                "default": "https://github.com/ray-project/ray.git",
+                "enabled": false,
+            },
+            {
+                "name": "RAY_TEST_BRANCH",
+                "short": "Git branch for test repo",
+                "long": "Git branch that is checked out from RAY_TEST_REPO and which contains the test files you would like to run. Note that this doesnt' have to be the same branch you're fetching the Ray wheels from.",
+                "default": "master",
+                "enabled": false,
+            },
+            {
+                "name": "RAY_REPO",
+                "short": "Git repo for the Ray wheels",
+                "long": "Repository from which to fetch the latest commits to find the Ray wheels",
+                "default": "https://github.com/ray-project/ray.git",
+                "enabled": false,
+            },
+            {
+                "name": "RAY_BRANCH",
+                "short": "Git branch for the Ray wheels",
+                "long": "Branch that is check out from RAY_REPO from which the latest commits are fetched to find the Ray wheels",
+                "default": "master",
+                "enabled": true,
+            },
+            {
+                "name": "RELEASE_TEST_SUITE",
+                "short": "Release test suite (nightly/weekly/manual)",
+                "long": "Release test suite as defined in releaser's build_pipeline.py",
+                "default": "nightly",
+                "enabled": true,
+            },
+            {
+                "name": "FILTER_FILE",
+                "short": "Filter test file by this string",
+                "long": "Only test files (e.g. xgboost_tests.yml) that match this string will be included in the test",
+                "default": "",
+                "enabled": false,
+            },
+            {
+                "name": "FILTER_TEST",
+                "short": "Filter test name by this string",
+                "long": "Only test names (e.g. tune_4x32) that match this string will be included in the test",
+                "default": "",
+                "enabled": false,
+            },
+        ]
+
+        window.addEventListener('load', function () {
+
+            var table = document.getElementById("gen_table");
+
+            for (var env_var of env_vars) {
+
+                var use_td = document.createElement("td");
+                use_td.setAttribute("class", "use");
+
+                var use_input = document.createElement("input");
+                use_input.setAttribute("type", "checkbox");
+                use_input.setAttribute("data-activate", env_var["name"] + "_val");
+                use_input.setAttribute("id", env_var["name"] + "_use");
+                use_input.setAttribute("class", "input_use");
+                if (env_var["enabled"]) {
+                    use_input.checked = true;
+                }
+
+
+                var use_label = document.createElement("label");
+                use_label.setAttribute("for", env_var["name"] + "_use");
+                use_label.innerHTML = env_var["name"];
+
+                use_td.append(use_input);
+                use_td.append(use_label);
+
+                val_td = document.createElement("td");
+                val_td.setAttribute("class", "val");
+
+                val_input = document.createElement("input");
+                val_input.setAttribute("type", "text");
+                if (!env_var["enabled"]) {
+                    val_input.setAttribute("disabled", "disabled");
+                }
+                val_input.setAttribute("id", env_var["name"] + "_val");
+                val_input.setAttribute("name", env_var["name"]);
+                val_input.setAttribute("value", env_var["default"]);
+                val_input.setAttribute("class", "input_val");
+
+                val_td.append(val_input);
+
+                use_input.addEventListener("click", function(e) {
+                    var toggle_val = document.getElementById(e.target.getAttribute("data-activate"))
+
+                    if (toggle_val.disabled) {
+                        toggle_val.removeAttribute("disabled");
+                    } else {
+                        toggle_val.setAttribute("disabled", "disabled");
+                    }
+                    generate_snippet();
+                });
+
+                val_input.addEventListener("change", function() { generate_snippet(); });
+                val_input.addEventListener("keydown", function() { generate_snippet(); });
+                val_input.addEventListener("keyup", function() { generate_snippet(); });
+
+                var desc_td = document.createElement("td");
+                desc_td.setAttribute("class", "desc");
+
+                var desc_a = document.createElement("a");
+                desc_a.setAttribute("title", env_var["long"]);
+                desc_a.innerHTML = env_var["short"];
+
+                desc_td.append(desc_a);
+
+                var tr = document.createElement("tr");
+                tr.append(use_td);
+                tr.append(val_td);
+                tr.append(desc_td);
+
+                table.append(tr);
+            }
+
+            var button = document.getElementById("generate");
+            button.addEventListener("click", function() {
+                generate_snippet();
+            })
+
+            generate_snippet()
+        })
+
+        function generate_snippet() {
+            full_snippet = ""
+            for (env_var of env_vars) {
+                var val_input = document.getElementById(env_var["name"] + "_val")
+
+                if (!val_input.disabled) {
+                    full_snippet += env_var["name"] + "=\"" + val_input.value + "\"\n"
+                }
+            }
+
+            document.getElementById("snippet").innerHTML = full_snippet;
+        }
+
+    </script>
+</head>
+<body>
+<header class="header">
+    <h1>Releaser config generator</h1>
+    <p>Use this form to generate a list of environment variables.</p>
+    <p>These variables can be passed to Buildkite to run a subset of release tests
+    and choose the correct wheels/release test branch</p>
+</header>
+<section class="main">
+    <form id="gen">
+        <table id="gen_table">
+            <tr>
+                <th>Set</th>
+                <th>Value</th>
+                <th>Description</th>
+            </tr>
+
+        </table>
+
+    </form>
+
+    <div>
+        <button id="generate">Generate snippet</button>
+    </div>
+
+    <div>
+        <textarea id="snippet">
+
+        </textarea>
+    </div>
+</section>
+</body>
+</html>
--- a/release/e2e.py
+++ b/release/e2e.py
--- a/release/requirements.txt
+++ b/release/requirements.txt
@ -0,0 +1,15 @@
+ray
+click
+anyscale
+slackclient
+boto3
+PyGithub
+pydantic
+pyyaml
+typer[all]
+toml
+python-dotenv
+expiringdict
+requests
+pytz
+git+https://github.com/ray-project/xgboost_ray.git#xgboost_ray