mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00

This PR migrates scalability tests to the new infra. I had to copy the benchmarks folder to the release folder to make it work. I will remove some unnecessary files (e.g., benchmark.yaml or wait_for_cluster file) Alternatively we can support a different path than /release from the tool, but I think this way is cleaner. I am open to suggestion though cc @krfricke
680 lines
22 KiB
Python
680 lines
22 KiB
Python
import copy
|
|
import logging
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
import yaml
|
|
|
|
# If you update or reorganize the periodic tests, please ensure the
|
|
# relevant portions of the Ray release instructions (go/release-ray)
|
|
# (in particular, running periodic tests and collecting release logs)
|
|
# are up to date. If you need access, please contact @zhe-thoughts.
|
|
|
|
# Env variables:
|
|
|
|
# RAY_REPO Repo to use for finding the wheel
|
|
# RAY_BRANCH Branch to find the wheel
|
|
# RAY_VERSION Version to find the wheel
|
|
# RAY_WHEELS Direct Ray wheel URL
|
|
# RAY_TEST_REPO Repo to use for test scripts
|
|
# RAY_TEST_BRANCH Branch for test scripts
|
|
# FILTER_FILE File filter
|
|
# FILTER_TEST Test name filter
|
|
# RELEASE_TEST_SUITE Release test suite (e.g. manual, nightly)
|
|
|
|
|
|
class ReleaseTest:
|
|
def __init__(
|
|
self,
|
|
name: str,
|
|
smoke_test: bool = False,
|
|
retry: int = 0,
|
|
):
|
|
self.name = name
|
|
self.smoke_test = smoke_test
|
|
self.retry = retry
|
|
|
|
def __str__(self):
|
|
return self.name
|
|
|
|
def __repr__(self):
|
|
return self.name
|
|
|
|
def __contains__(self, item):
|
|
return self.name.__contains__(item)
|
|
|
|
def __iter__(self):
|
|
return iter(self.name)
|
|
|
|
def __len__(self):
|
|
return len(self.name)
|
|
|
|
|
|
class SmokeTest(ReleaseTest):
|
|
def __init__(self, name: str, retry: int = 0):
|
|
super(SmokeTest, self).__init__(name=name, smoke_test=True, retry=retry)
|
|
|
|
|
|
CORE_NIGHTLY_TESTS = {
|
|
"~/ray/release/nightly_tests/nightly_tests.yaml": [
|
|
# "shuffle_10gb",
|
|
"shuffle_50gb",
|
|
"shuffle_50gb_large_partition",
|
|
"shuffle_100gb",
|
|
"non_streaming_shuffle_100gb",
|
|
"non_streaming_shuffle_50gb_large_partition",
|
|
"non_streaming_shuffle_50gb",
|
|
SmokeTest("dask_on_ray_large_scale_test_no_spilling"),
|
|
SmokeTest("dask_on_ray_large_scale_test_spilling"),
|
|
"stress_test_placement_group",
|
|
"shuffle_1tb_1000_partition",
|
|
"non_streaming_shuffle_1tb_1000_partition",
|
|
"shuffle_1tb_5000_partitions",
|
|
# TODO(sang): It doesn't even work without spilling
|
|
# as it hits the scalability limit.
|
|
# "non_streaming_shuffle_1tb_5000_partitions",
|
|
"decision_tree_autoscaling",
|
|
"decision_tree_autoscaling_20_runs",
|
|
"autoscaling_shuffle_1tb_1000_partitions",
|
|
SmokeTest("stress_test_many_tasks"),
|
|
SmokeTest("stress_test_dead_actors"),
|
|
SmokeTest("threaded_actors_stress_test"),
|
|
"pg_long_running_performance_test",
|
|
],
|
|
# "~/ray/benchmarks/benchmark_tests.yaml": [
|
|
# "single_node",
|
|
# "object_store",
|
|
# "many_actors_smoke_test",
|
|
# "many_tasks_smoke_test",
|
|
# "many_pgs_smoke_test",
|
|
# ],
|
|
"~/ray/release/nightly_tests/dataset/dataset_test.yaml": [
|
|
"inference",
|
|
"shuffle_data_loader",
|
|
"parquet_metadata_resolution",
|
|
"pipelined_training_50_gb",
|
|
"pipelined_ingestion_1500_gb",
|
|
"datasets_preprocess_ingest",
|
|
"datasets_ingest_400G",
|
|
SmokeTest("datasets_ingest_train_infer"),
|
|
],
|
|
"~/ray/release/nightly_tests/chaos_test.yaml": [
|
|
"chaos_many_actors",
|
|
"chaos_many_tasks_no_object_store",
|
|
"chaos_pipelined_ingestion_1500_gb_15_windows",
|
|
],
|
|
# "~/ray/release/microbenchmark/microbenchmark.yaml": [
|
|
# "microbenchmark",
|
|
# ],
|
|
}
|
|
|
|
SERVE_NIGHTLY_TESTS = {
|
|
"~/ray/release/long_running_tests/long_running_tests.yaml": [
|
|
SmokeTest("serve"),
|
|
SmokeTest("serve_failure"),
|
|
],
|
|
"~/ray/release/serve_tests/serve_tests.yaml": [
|
|
"single_deployment_1k_noop_replica",
|
|
"multi_deployment_1k_noop_replica",
|
|
"autoscaling_single_deployment",
|
|
"autoscaling_multi_deployment",
|
|
"serve_micro_benchmark",
|
|
# TODO(architkulkarni) Reenable after K8s migration. Currently failing
|
|
# "serve_micro_benchmark_k8s",
|
|
"serve_cluster_fault_tolerance",
|
|
],
|
|
}
|
|
|
|
CORE_DAILY_TESTS = {
|
|
"~/ray/release/nightly_tests/nightly_tests.yaml": [
|
|
"k8s_dask_on_ray_large_scale_test_no_spilling",
|
|
"dask_on_ray_large_scale_test_no_spilling",
|
|
"dask_on_ray_large_scale_test_spilling",
|
|
"pg_autoscaling_regression_test",
|
|
"threaded_actors_stress_test",
|
|
"k8s_threaded_actors_stress_test",
|
|
"stress_test_many_tasks",
|
|
"stress_test_dead_actors",
|
|
],
|
|
"~/ray/release/nightly_tests/chaos_test.yaml": [
|
|
"chaos_dask_on_ray_large_scale_test_no_spilling",
|
|
"chaos_dask_on_ray_large_scale_test_spilling",
|
|
],
|
|
}
|
|
|
|
CORE_SCALABILITY_TESTS_DAILY = {
|
|
# "~/ray/benchmarks/benchmark_tests.yaml": [
|
|
# "many_actors",
|
|
# "many_tasks",
|
|
# "many_pgs",
|
|
# "many_nodes",
|
|
# ],
|
|
}
|
|
|
|
CORE_SCHEDULING_DAILY = {
|
|
# "~/ray/benchmarks/benchmark_tests.yaml": [
|
|
# "scheduling_test_many_0s_tasks_single_node",
|
|
# "scheduling_test_many_0s_tasks_many_nodes",
|
|
# # Reenable these two once we got right setup
|
|
# # "scheduling_test_many_5s_tasks_single_node",
|
|
# # "scheduling_test_many_5s_tasks_many_nodes",
|
|
# ],
|
|
"~/ray/release/nightly_tests/nightly_tests.yaml": [
|
|
"many_nodes_actor_test",
|
|
"dask_on_ray_10gb_sort",
|
|
"dask_on_ray_100gb_sort",
|
|
"dask_on_ray_1tb_sort",
|
|
"placement_group_performance_test",
|
|
],
|
|
}
|
|
|
|
NIGHTLY_TESTS = {
|
|
# "~/ray/release/horovod_tests/horovod_tests.yaml": [
|
|
# SmokeTest("horovod_test"),
|
|
# ], # Should we enable this?
|
|
"~/ray/release/golden_notebook_tests/golden_notebook_tests.yaml": [
|
|
"dask_xgboost_test",
|
|
"modin_xgboost_test",
|
|
"torch_tune_serve_test",
|
|
],
|
|
"~/ray/release/long_running_tests/long_running_tests.yaml": [
|
|
SmokeTest("actor_deaths"),
|
|
SmokeTest("apex"),
|
|
SmokeTest("impala"),
|
|
SmokeTest("many_actor_tasks"),
|
|
SmokeTest("many_drivers"),
|
|
SmokeTest("many_ppo"),
|
|
SmokeTest("many_tasks"),
|
|
SmokeTest("many_tasks_serialized_ids"),
|
|
SmokeTest("node_failures"),
|
|
SmokeTest("pbt"),
|
|
# SmokeTest("serve"),
|
|
# SmokeTest("serve_failure"),
|
|
# Full long running tests (1 day runtime)
|
|
"actor_deaths",
|
|
"apex",
|
|
"impala",
|
|
"many_actor_tasks",
|
|
"many_drivers",
|
|
"many_ppo",
|
|
"many_tasks",
|
|
"many_tasks_serialized_ids",
|
|
"node_failures",
|
|
"pbt",
|
|
"serve",
|
|
"serve_failure",
|
|
],
|
|
"~/ray/release/sgd_tests/sgd_tests.yaml": [
|
|
"sgd_gpu",
|
|
],
|
|
# "~/ray/release/tune_tests/cloud_tests/tune_cloud_tests.yaml": [
|
|
# "aws_no_sync_down",
|
|
# "aws_ssh_sync",
|
|
# "aws_durable_upload",
|
|
# "aws_durable_upload_rllib_str",
|
|
# "aws_durable_upload_rllib_trainer",
|
|
# "gcp_k8s_durable_upload",
|
|
# ],
|
|
# "~/ray/release/tune_tests/scalability_tests/tune_tests.yaml": [
|
|
# "bookkeeping_overhead",
|
|
# "durable_trainable",
|
|
# SmokeTest("long_running_large_checkpoints"),
|
|
# SmokeTest("network_overhead"),
|
|
# "result_throughput_cluster",
|
|
# "result_throughput_single_node",
|
|
# ],
|
|
# "~/ray/release/xgboost_tests/xgboost_tests.yaml": [
|
|
# "train_small",
|
|
# "train_moderate",
|
|
# "train_gpu",
|
|
# "tune_small",
|
|
# "tune_4x32",
|
|
# "tune_32x4",
|
|
# "ft_small_elastic",
|
|
# "ft_small_non_elastic",
|
|
# "distributed_api_test",
|
|
# ],
|
|
"~/ray/release/rllib_tests/rllib_tests.yaml": [
|
|
SmokeTest("learning_tests"),
|
|
SmokeTest("stress_tests"),
|
|
"performance_tests",
|
|
"multi_gpu_learning_tests",
|
|
"multi_gpu_with_lstm_learning_tests",
|
|
"multi_gpu_with_attention_learning_tests",
|
|
# We'll have these as per-PR tests soon.
|
|
# "example_scripts_on_gpu_tests",
|
|
],
|
|
"~/ray/release/runtime_env_tests/runtime_env_tests.yaml": [
|
|
"rte_many_tasks_actors",
|
|
"wheel_urls",
|
|
"rte_ray_client",
|
|
],
|
|
}
|
|
|
|
WEEKLY_TESTS = {
|
|
"~/ray/release/horovod_tests/horovod_tests.yaml": [
|
|
"horovod_test",
|
|
],
|
|
"~/ray/release/long_running_distributed_tests"
|
|
"/long_running_distributed.yaml": [
|
|
"pytorch_pbt_failure",
|
|
],
|
|
# "~/ray/release/tune_tests/scalability_tests/tune_tests.yaml": [
|
|
# "network_overhead",
|
|
# "long_running_large_checkpoints",
|
|
# "xgboost_sweep",
|
|
# ],
|
|
"~/ray/release/rllib_tests/rllib_tests.yaml": [
|
|
"learning_tests",
|
|
"stress_tests",
|
|
],
|
|
}
|
|
|
|
# This test suite holds "user" tests to test important user workflows
|
|
# in a particular environment.
|
|
# All workloads in this test suite should:
|
|
# 1. Be run in a distributed (multi-node) fashion
|
|
# 2. Use autoscaling/scale up (no wait_cluster.py)
|
|
# 3. Use GPUs if applicable
|
|
# 4. Have the `use_connect` flag set.
|
|
USER_TESTS = {
|
|
"~/ray/release/ml_user_tests/ml_user_tests.yaml": [
|
|
"train_tensorflow_mnist_test",
|
|
"train_torch_linear_test",
|
|
"ray_lightning_user_test_latest",
|
|
"ray_lightning_user_test_master",
|
|
"horovod_user_test_latest",
|
|
"horovod_user_test_master",
|
|
"xgboost_gpu_connect_latest",
|
|
"xgboost_gpu_connect_master",
|
|
"tune_rllib_connect_test",
|
|
]
|
|
}
|
|
|
|
SUITES = {
|
|
"core-nightly": CORE_NIGHTLY_TESTS,
|
|
"serve-nightly": SERVE_NIGHTLY_TESTS,
|
|
"core-daily": CORE_DAILY_TESTS,
|
|
"core-scalability": CORE_SCALABILITY_TESTS_DAILY,
|
|
"nightly": {**NIGHTLY_TESTS, **USER_TESTS},
|
|
"core-scheduling-daily": CORE_SCHEDULING_DAILY,
|
|
"weekly": WEEKLY_TESTS,
|
|
}
|
|
|
|
DEFAULT_STEP_TEMPLATE = {
|
|
"env": {
|
|
"ANYSCALE_CLOUD_ID": "cld_4F7k8814aZzGG8TNUGPKnc",
|
|
"ANYSCALE_PROJECT": "prj_2xR6uT6t7jJuu1aCwWMsle",
|
|
"RELEASE_AWS_BUCKET": "ray-release-automation-results",
|
|
"RELEASE_AWS_LOCATION": "dev",
|
|
"RELEASE_AWS_DB_NAME": "ray_ci",
|
|
"RELEASE_AWS_DB_TABLE": "release_test_result",
|
|
"AWS_REGION": "us-west-2",
|
|
},
|
|
"agents": {"queue": "runner_queue_branch"},
|
|
"plugins": [
|
|
{
|
|
"docker#v3.9.0": {
|
|
"image": "rayproject/ray",
|
|
"propagate-environment": True,
|
|
"volumes": [
|
|
"/tmp/ray_release_test_artifacts:" "/tmp/ray_release_test_artifacts"
|
|
],
|
|
}
|
|
}
|
|
],
|
|
"artifact_paths": ["/tmp/ray_release_test_artifacts/**/*"],
|
|
}
|
|
|
|
|
|
def ask_configuration():
|
|
RAY_BRANCH = os.environ.get("RAY_BRANCH", "master")
|
|
RAY_REPO = os.environ.get("RAY_REPO", "https://github.com/ray-project/ray.git")
|
|
RAY_VERSION = os.environ.get("RAY_VERSION", "")
|
|
RAY_WHEELS = os.environ.get("RAY_WHEELS", "")
|
|
|
|
RAY_TEST_BRANCH = os.environ.get("RAY_TEST_BRANCH", RAY_BRANCH)
|
|
RAY_TEST_REPO = os.environ.get("RAY_TEST_REPO", RAY_REPO)
|
|
|
|
RELEASE_TEST_SUITE = os.environ.get("RELEASE_TEST_SUITE", "nightly")
|
|
FILTER_FILE = os.environ.get("FILTER_FILE", "")
|
|
FILTER_TEST = os.environ.get("FILTER_TEST", "")
|
|
|
|
input_ask_step = {
|
|
"input": "Input required: Please specify tests to run",
|
|
"fields": [
|
|
{
|
|
"text": (
|
|
"RAY_REPO: Please specify the Ray repository used "
|
|
"to find the wheel."
|
|
),
|
|
"hint": (
|
|
"Repository from which to fetch the latest "
|
|
"commits to find the Ray wheels. Usually you don't "
|
|
"need to change this."
|
|
),
|
|
"default": RAY_REPO,
|
|
"key": "ray_repo",
|
|
},
|
|
{
|
|
"text": (
|
|
"RAY_BRANCH: Please specify the Ray branch used "
|
|
"to find the wheel."
|
|
),
|
|
"hint": "For releases, this will be e.g. `releases/1.x.0`",
|
|
"default": RAY_BRANCH,
|
|
"key": "ray_branch",
|
|
},
|
|
{
|
|
"text": (
|
|
"RAY_VERSION: Please specify the Ray version used "
|
|
"to find the wheel."
|
|
),
|
|
"hint": (
|
|
"Leave empty for latest master. For releases, "
|
|
"specify the release version."
|
|
),
|
|
"required": False,
|
|
"default": RAY_VERSION,
|
|
"key": "ray_version",
|
|
},
|
|
{
|
|
"text": "RAY_WHEELS: Please specify the Ray wheel URL.",
|
|
"hint": (
|
|
"ATTENTION: If you provide this, RAY_REPO, "
|
|
"RAY_BRANCH and RAY_VERSION will be ignored! "
|
|
"Please also make sure to provide the wheels URL "
|
|
"for Python 3.7 on Linux.\n"
|
|
"You can also insert a commit hash here instead "
|
|
"of a full URL.\n"
|
|
"NOTE: You can specify multiple commits or URLs "
|
|
"for easy bisection (one per line) - this will "
|
|
"run each test on each of the specified wheels."
|
|
),
|
|
"required": False,
|
|
"default": RAY_WHEELS,
|
|
"key": "ray_wheels",
|
|
},
|
|
{
|
|
"text": (
|
|
"RAY_TEST_REPO: Please specify the Ray repository "
|
|
"used to find the tests you would like to run."
|
|
),
|
|
"hint": (
|
|
"If you're developing a new release test, this "
|
|
"will most likely be your GitHub fork."
|
|
),
|
|
"default": RAY_TEST_REPO,
|
|
"key": "ray_test_repo",
|
|
},
|
|
{
|
|
"text": (
|
|
"RAY_TEST_BRANCH: Please specify the Ray branch used "
|
|
"to find the tests you would like to run."
|
|
),
|
|
"hint": (
|
|
"If you're developing a new release test, this "
|
|
"will most likely be a branch living on your "
|
|
"GitHub fork."
|
|
),
|
|
"default": RAY_TEST_BRANCH,
|
|
"key": "ray_test_branch",
|
|
},
|
|
{
|
|
"select": (
|
|
"RELEASE_TEST_SUITE: Please specify the release "
|
|
"test suite containing the tests you would like "
|
|
"to run."
|
|
),
|
|
"hint": (
|
|
"Check in the `build_pipeline.py` if you're "
|
|
"unsure which suite contains your tests."
|
|
),
|
|
"required": True,
|
|
"options": sorted(SUITES.keys()),
|
|
"default": RELEASE_TEST_SUITE,
|
|
"key": "release_test_suite",
|
|
},
|
|
{
|
|
"text": (
|
|
"FILTER_FILE: Please specify a filter for the "
|
|
"test files that should be included in this build."
|
|
),
|
|
"hint": (
|
|
"Only test files (e.g. xgboost_tests.yml) that "
|
|
"match this string will be included in the test"
|
|
),
|
|
"default": FILTER_FILE,
|
|
"required": False,
|
|
"key": "filter_file",
|
|
},
|
|
{
|
|
"text": (
|
|
"FILTER_TEST: Please specify a filter for the "
|
|
"test names that should be included in this build."
|
|
),
|
|
"hint": (
|
|
"Only test names (e.g. tune_4x32) that match "
|
|
"this string will be included in the test"
|
|
),
|
|
"default": FILTER_TEST,
|
|
"required": False,
|
|
"key": "filter_test",
|
|
},
|
|
],
|
|
"key": "input_ask_step",
|
|
}
|
|
|
|
run_again_step = {
|
|
"commands": [
|
|
f'export {v}=$(buildkite-agent meta-data get "{k}")'
|
|
for k, v in {
|
|
"ray_branch": "RAY_BRANCH",
|
|
"ray_repo": "RAY_REPO",
|
|
"ray_version": "RAY_VERSION",
|
|
"ray_wheels": "RAY_WHEELS",
|
|
"ray_test_branch": "RAY_TEST_BRANCH",
|
|
"ray_test_repo": "RAY_TEST_REPO",
|
|
"release_test_suite": "RELEASE_TEST_SUITE",
|
|
"filter_file": "FILTER_FILE",
|
|
"filter_test": "FILTER_TEST",
|
|
}.items()
|
|
]
|
|
+ [
|
|
"export AUTOMATIC=1",
|
|
"python3 -m pip install --user pyyaml",
|
|
"rm -rf ~/ray || true",
|
|
"git clone -b $${RAY_TEST_BRANCH} $${RAY_TEST_REPO} ~/ray",
|
|
(
|
|
"python3 ~/ray/release/.buildkite/build_pipeline.py "
|
|
"| buildkite-agent pipeline upload"
|
|
),
|
|
],
|
|
"label": ":pipeline: Again",
|
|
"agents": {"queue": "runner_queue_branch"},
|
|
"depends_on": "input_ask_step",
|
|
"key": "run_again_step",
|
|
}
|
|
|
|
return [
|
|
input_ask_step,
|
|
run_again_step,
|
|
]
|
|
|
|
|
|
def create_test_step(
|
|
ray_repo: str,
|
|
ray_branch: str,
|
|
ray_version: str,
|
|
ray_wheels: str,
|
|
ray_test_repo: str,
|
|
ray_test_branch: str,
|
|
test_file: str,
|
|
test_name: ReleaseTest,
|
|
):
|
|
custom_commit_str = "custom_wheels_url"
|
|
if ray_wheels:
|
|
# Extract commit from url
|
|
p = re.compile(r"([a-f0-9]{40})")
|
|
m = p.search(ray_wheels)
|
|
if m is not None:
|
|
custom_commit_str = m.group(1)
|
|
|
|
ray_wheels_str = f" ({ray_wheels}) " if ray_wheels else ""
|
|
|
|
logging.info(f"Creating step for {test_file}/{test_name}{ray_wheels_str}")
|
|
|
|
cmd = (
|
|
f"./release/run_e2e.sh "
|
|
f'--ray-repo "{ray_repo}" '
|
|
f'--ray-branch "{ray_branch}" '
|
|
f'--ray-version "{ray_version}" '
|
|
f'--ray-wheels "{ray_wheels}" '
|
|
f'--ray-test-repo "{ray_test_repo}" '
|
|
f'--ray-test-branch "{ray_test_branch}" '
|
|
)
|
|
|
|
args = (
|
|
f"--category {ray_branch} "
|
|
f"--test-config {test_file} "
|
|
f"--test-name {test_name} "
|
|
f"--keep-results-dir"
|
|
)
|
|
|
|
if test_name.smoke_test:
|
|
logging.info("This test will run as a smoke test.")
|
|
args += " --smoke-test"
|
|
|
|
step_conf = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
|
|
|
|
if test_name.retry:
|
|
logging.info(f"This test will be retried up to " f"{test_name.retry} times.")
|
|
step_conf["retry"] = {
|
|
"automatic": [{"exit_status": "*", "limit": test_name.retry}]
|
|
}
|
|
else:
|
|
# Default retry logic
|
|
# Warning: Exit codes are currently not correctly propagated to
|
|
# buildkite! Thus, actual retry logic is currently implemented in
|
|
# the run_e2e.sh script!
|
|
step_conf["retry"] = {
|
|
"automatic": [
|
|
{"exit_status": 7, "limit": 2}, # Prepare timeout
|
|
{"exit_status": 9, "limit": 2}, # Session timeout
|
|
{"exit_status": 10, "limit": 2}, # Prepare error
|
|
],
|
|
}
|
|
|
|
step_conf["command"] = cmd + args
|
|
|
|
step_conf["label"] = (
|
|
f"{test_name} "
|
|
f"({custom_commit_str if ray_wheels_str else ray_branch}) - "
|
|
f"{ray_test_branch}/{ray_test_repo}"
|
|
)
|
|
return step_conf
|
|
|
|
|
|
def build_pipeline(steps):
|
|
all_steps = []
|
|
|
|
RAY_BRANCH = os.environ.get("RAY_BRANCH", "master")
|
|
RAY_REPO = os.environ.get("RAY_REPO", "https://github.com/ray-project/ray.git")
|
|
RAY_VERSION = os.environ.get("RAY_VERSION", "")
|
|
RAY_WHEELS = os.environ.get("RAY_WHEELS", "")
|
|
|
|
RAY_TEST_BRANCH = os.environ.get("RAY_TEST_BRANCH", RAY_BRANCH)
|
|
RAY_TEST_REPO = os.environ.get("RAY_TEST_REPO", RAY_REPO)
|
|
|
|
FILTER_FILE = os.environ.get("FILTER_FILE", "")
|
|
FILTER_TEST = os.environ.get("FILTER_TEST", "")
|
|
|
|
ray_wheels_list = [""]
|
|
if RAY_WHEELS:
|
|
ray_wheels_list = RAY_WHEELS.split("\n")
|
|
|
|
if len(ray_wheels_list) > 1:
|
|
logging.info(
|
|
f"This will run a bisec on the following URLs/commits: "
|
|
f"{ray_wheels_list}"
|
|
)
|
|
|
|
logging.info(
|
|
f"Building pipeline \n"
|
|
f"Ray repo/branch to test:\n"
|
|
f" RAY_REPO = {RAY_REPO}\n"
|
|
f" RAY_BRANCH = {RAY_BRANCH}\n\n"
|
|
f" RAY_VERSION = {RAY_VERSION}\n\n"
|
|
f" RAY_WHEELS = {RAY_WHEELS}\n\n"
|
|
f"Ray repo/branch containing the test configurations and scripts:"
|
|
f" RAY_TEST_REPO = {RAY_TEST_REPO}\n"
|
|
f" RAY_TEST_BRANCH = {RAY_TEST_BRANCH}\n\n"
|
|
f"Filtering for these tests:\n"
|
|
f" FILTER_FILE = {FILTER_FILE}\n"
|
|
f" FILTER_TEST = {FILTER_TEST}\n\n"
|
|
)
|
|
|
|
for test_file, test_names in steps.items():
|
|
if FILTER_FILE and FILTER_FILE not in test_file:
|
|
continue
|
|
|
|
test_base = os.path.basename(test_file)
|
|
for test_name in test_names:
|
|
if FILTER_TEST and FILTER_TEST not in test_name:
|
|
continue
|
|
|
|
if not isinstance(test_name, ReleaseTest):
|
|
test_name = ReleaseTest(name=test_name)
|
|
|
|
logging.info(f"Adding test: {test_base}/{test_name}")
|
|
|
|
for ray_wheels in ray_wheels_list:
|
|
step_conf = create_test_step(
|
|
ray_repo=RAY_REPO,
|
|
ray_branch=RAY_BRANCH,
|
|
ray_version=RAY_VERSION,
|
|
ray_wheels=ray_wheels,
|
|
ray_test_repo=RAY_TEST_REPO,
|
|
ray_test_branch=RAY_TEST_BRANCH,
|
|
test_file=test_file,
|
|
test_name=test_name,
|
|
)
|
|
|
|
all_steps.append(step_conf)
|
|
|
|
return all_steps
|
|
|
|
|
|
def alert_pipeline(stats: bool = False):
|
|
step_conf = copy.deepcopy(DEFAULT_STEP_TEMPLATE)
|
|
|
|
cmd = "python release/alert.py"
|
|
if stats:
|
|
cmd += " --stats"
|
|
|
|
step_conf["commands"] = [
|
|
"pip install -q -r release/requirements.txt",
|
|
"pip install -U boto3 botocore",
|
|
cmd,
|
|
]
|
|
step_conf["label"] = f"Send periodic alert (stats_only = {stats})"
|
|
return [step_conf]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
alert = os.environ.get("RELEASE_ALERT", "0")
|
|
|
|
ask_for_config = not bool(int(os.environ.get("AUTOMATIC", "0")))
|
|
|
|
if alert in ["1", "stats"]:
|
|
steps = alert_pipeline(alert == "stats")
|
|
elif ask_for_config:
|
|
steps = ask_configuration()
|
|
else:
|
|
TEST_SUITE = os.environ.get("RELEASE_TEST_SUITE", "nightly")
|
|
PIPELINE_SPEC = SUITES[TEST_SUITE]
|
|
|
|
steps = build_pipeline(PIPELINE_SPEC)
|
|
|
|
yaml.dump({"steps": steps}, sys.stdout)
|