mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[Nightly tests] Migrate rest of core tests (#23085)
MIgrate the rest of core tests
This commit is contained in:
parent
04ea180dfb
commit
2b38fe89e2
2 changed files with 370 additions and 44 deletions
|
@ -57,7 +57,7 @@ class SmokeTest(ReleaseTest):
|
|||
|
||||
|
||||
CORE_NIGHTLY_TESTS = {
|
||||
"~/ray/release/nightly_tests/nightly_tests.yaml": [
|
||||
# "~/ray/release/nightly_tests/nightly_tests.yaml": [
|
||||
# "shuffle_10gb",
|
||||
# "shuffle_50gb",
|
||||
# "shuffle_50gb_large_partition",
|
||||
|
@ -81,7 +81,7 @@ CORE_NIGHTLY_TESTS = {
|
|||
# SmokeTest("stress_test_dead_actors"),
|
||||
# SmokeTest("threaded_actors_stress_test"),
|
||||
# "pg_long_running_performance_test",
|
||||
],
|
||||
# ],
|
||||
# "~/ray/benchmarks/benchmark_tests.yaml": [
|
||||
# "single_node",
|
||||
# "object_store",
|
||||
|
@ -89,21 +89,21 @@ CORE_NIGHTLY_TESTS = {
|
|||
# "many_tasks_smoke_test",
|
||||
# "many_pgs_smoke_test",
|
||||
# ],
|
||||
"~/ray/release/nightly_tests/dataset/dataset_test.yaml": [
|
||||
"inference",
|
||||
"shuffle_data_loader",
|
||||
"parquet_metadata_resolution",
|
||||
"pipelined_training_50_gb",
|
||||
"pipelined_ingestion_1500_gb",
|
||||
"datasets_preprocess_ingest",
|
||||
"datasets_ingest_400G",
|
||||
SmokeTest("datasets_ingest_train_infer"),
|
||||
],
|
||||
"~/ray/release/nightly_tests/chaos_test.yaml": [
|
||||
"chaos_many_actors",
|
||||
"chaos_many_tasks_no_object_store",
|
||||
"chaos_pipelined_ingestion_1500_gb_15_windows",
|
||||
],
|
||||
# "~/ray/release/nightly_tests/dataset/dataset_test.yaml": [
|
||||
# "inference",
|
||||
# "shuffle_data_loader",
|
||||
# "parquet_metadata_resolution",
|
||||
# "pipelined_training_50_gb",
|
||||
# "pipelined_ingestion_1500_gb",
|
||||
# "datasets_preprocess_ingest",
|
||||
# "datasets_ingest_400G",
|
||||
# SmokeTest("datasets_ingest_train_infer"),
|
||||
# ],
|
||||
# "~/ray/release/nightly_tests/chaos_test.yaml": [
|
||||
# "chaos_many_actors",
|
||||
# "chaos_many_tasks_no_object_store",
|
||||
# "chaos_pipelined_ingestion_1500_gb_15_windows",
|
||||
# ],
|
||||
# "~/ray/release/microbenchmark/microbenchmark.yaml": [
|
||||
# "microbenchmark",
|
||||
# ],
|
||||
|
@ -137,10 +137,10 @@ CORE_DAILY_TESTS = {
|
|||
# "stress_test_many_tasks",
|
||||
# "stress_test_dead_actors",
|
||||
# ],
|
||||
"~/ray/release/nightly_tests/chaos_test.yaml": [
|
||||
"chaos_dask_on_ray_large_scale_test_no_spilling",
|
||||
"chaos_dask_on_ray_large_scale_test_spilling",
|
||||
],
|
||||
# "~/ray/release/nightly_tests/chaos_test.yaml": [
|
||||
# "chaos_dask_on_ray_large_scale_test_no_spilling",
|
||||
# "chaos_dask_on_ray_large_scale_test_spilling",
|
||||
# ],
|
||||
}
|
||||
|
||||
CORE_SCALABILITY_TESTS_DAILY = {
|
||||
|
|
|
@ -2391,6 +2391,204 @@
|
|||
# file_manager: sdk
|
||||
# stable: false
|
||||
|
||||
###############
|
||||
# Dataset tests
|
||||
###############
|
||||
|
||||
- name: inference
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: inference
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: app_config.yaml
|
||||
cluster_compute: inference.yaml
|
||||
|
||||
run:
|
||||
timeout: 600
|
||||
script: python inference.py
|
||||
wait_for_nodes:
|
||||
num_nodes: 2
|
||||
timeout: 600
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: shuffle_data_loader
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: shuffle_data_loader
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: shuffle_app_config.yaml
|
||||
cluster_compute: shuffle_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 1800
|
||||
script: python dataset_shuffle_data_loader.py
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: parquet_metadata_resolution
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: parquet_metadata_resolution
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: pipelined_training_app.yaml
|
||||
cluster_compute: pipelined_training_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 1200
|
||||
script: python parquet_metadata_resolution.py --num-files 915
|
||||
wait_for_nodes:
|
||||
num_nodes: 15
|
||||
timeout: 1200
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: pipelined_training_50_gb
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: pipelined_training_50_gb
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: pipelined_training_app.yaml
|
||||
cluster_compute: pipelined_training_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 4800
|
||||
script: python pipelined_training.py --epochs 1
|
||||
wait_for_nodes:
|
||||
num_nodes: 15
|
||||
timeout: 1200
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: pipelined_ingestion_1500_gb
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: pipelined_ingestion_1500_gb
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: pipelined_ingestion_app.yaml
|
||||
cluster_compute: pipelined_ingestion_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 9600
|
||||
script: python pipelined_training.py --epochs 2 --num-windows 2 --num-files 915
|
||||
--debug
|
||||
|
||||
wait_for_nodes:
|
||||
num_nodes: 21
|
||||
timeout: 2400
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: datasets_ingest_train_infer
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: datasets_ingest_train_infer
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: ray_sgd_training_app.yaml
|
||||
cluster_compute: ray_sgd_training_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 14400
|
||||
script: python ray_sgd_training.py --address auto --use-s3 --num-workers 16 --use-gpu
|
||||
--large-dataset
|
||||
|
||||
wait_for_nodes:
|
||||
num_nodes: 66
|
||||
timeout: 2400
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
smoke_test:
|
||||
cluster:
|
||||
app_config: ray_sgd_training_app.yaml
|
||||
compute_template: ray_sgd_training_smoke_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 3600
|
||||
script: python ray_sgd_training.py --address auto --use-s3 --num-workers 8 --use-gpu
|
||||
wait_for_nodes:
|
||||
num_nodes: 8
|
||||
timeout: 2400
|
||||
|
||||
- name: datasets_preprocess_ingest
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: datasets_preprocess_ingest
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: ray_sgd_training_app.yaml
|
||||
cluster_compute: ray_sgd_training_compute_no_gpu.yaml
|
||||
|
||||
run:
|
||||
timeout: 7200
|
||||
script: python ray_sgd_training.py --address auto --use-s3 --num-workers 16 --use-gpu
|
||||
--large-dataset --debug
|
||||
|
||||
wait_for_nodes:
|
||||
num_nodes: 21
|
||||
timeout: 2400
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: datasets_ingest_400G
|
||||
group: core-dataset-tests
|
||||
working_dir: dataset
|
||||
legacy:
|
||||
test_name: datasets_ingest_400G
|
||||
test_suite: dataset_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: ray_sgd_training_app.yaml
|
||||
cluster_compute: dataset_ingest_400G_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 7200
|
||||
script: python ray_sgd_runner.py --address auto --use-gpu --num-epochs 1
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
################
|
||||
# Core K8s tests
|
||||
################
|
||||
|
@ -2472,3 +2670,131 @@
|
|||
file_manager: job
|
||||
|
||||
stable: false
|
||||
|
||||
##################
|
||||
# Core Chaos tests
|
||||
##################
|
||||
|
||||
- name: chaos_many_tasks_no_object_store
|
||||
group: core-dataset-tests
|
||||
working_dir: nightly_tests
|
||||
legacy:
|
||||
test_name: chaos_many_tasks_no_object_store
|
||||
test_suite: chaos_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: chaos_test/app_config.yaml
|
||||
cluster_compute: chaos_test/compute_template.yaml
|
||||
|
||||
run:
|
||||
timeout: 3600
|
||||
wait_for_nodes:
|
||||
num_nodes: 10
|
||||
timeout: 600
|
||||
prepare: python setup_chaos.py --no-start
|
||||
script: python chaos_test/test_chaos_basic.py --workload=tasks
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: chaos_many_actors
|
||||
group: core-dataset-tests
|
||||
working_dir: nightly_tests
|
||||
legacy:
|
||||
test_name: chaos_many_actors
|
||||
test_suite: chaos_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: chaos_test/app_config.yaml
|
||||
cluster_compute: chaos_test/compute_template.yaml
|
||||
|
||||
run:
|
||||
timeout: 3600
|
||||
wait_for_nodes:
|
||||
num_nodes: 10
|
||||
timeout: 600
|
||||
prepare: python setup_chaos.py --no-start
|
||||
script: python chaos_test/test_chaos_basic.py --workload=actors
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: chaos_dask_on_ray_large_scale_test_no_spilling
|
||||
group: core-dataset-tests
|
||||
working_dir: nightly_tests
|
||||
legacy:
|
||||
test_name: chaos_dask_on_ray_large_scale_test_no_spilling
|
||||
test_suite: chaos_test
|
||||
|
||||
frequency: nightly
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: chaos_test/dask_on_ray_app_config_reconstruction.yaml
|
||||
cluster_compute: dask_on_ray/dask_on_ray_stress_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 7200
|
||||
wait_for_nodes:
|
||||
num_nodes: 21
|
||||
timeout: 600
|
||||
prepare: python setup_chaos.py --node-kill-interval 100
|
||||
script: python dask_on_ray/large_scale_test.py --num_workers 20 --worker_obj_store_size_in_gb
|
||||
20 --error_rate 0 --data_save_path /tmp/ray
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: chaos_dask_on_ray_large_scale_test_spilling
|
||||
group: core-dataset-tests
|
||||
working_dir: nightly_tests
|
||||
legacy:
|
||||
test_name: chaos_dask_on_ray_large_scale_test_spilling
|
||||
test_suite: chaos_test
|
||||
|
||||
frequency: nightly
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: chaos_test/dask_on_ray_app_config_reconstruction.yaml
|
||||
cluster_compute: dask_on_ray/dask_on_ray_stress_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 7200
|
||||
wait_for_nodes:
|
||||
num_nodes: 21
|
||||
timeout: 600
|
||||
prepare: python setup_chaos.py --node-kill-interval 100
|
||||
script: python dask_on_ray/large_scale_test.py --num_workers 150 --worker_obj_store_size_in_gb
|
||||
70 --error_rate 0 --data_save_path /tmp/ray
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: chaos_pipelined_ingestion_1500_gb_15_windows
|
||||
group: core-dataset-tests
|
||||
working_dir: nightly_tests
|
||||
legacy:
|
||||
test_name: chaos_pipelined_ingestion_1500_gb_15_windows
|
||||
test_suite: chaos_test
|
||||
|
||||
frequency: multi
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: dataset/pipelined_ingestion_app.yaml
|
||||
cluster_compute: dataset/pipelined_ingestion_compute.yaml
|
||||
|
||||
run:
|
||||
timeout: 7200
|
||||
wait_for_nodes:
|
||||
num_nodes: 21
|
||||
timeout: 2400
|
||||
prepare: ' python setup_chaos.py --node-kill-interval 300'
|
||||
script: python dataset/pipelined_training.py --epochs 1 --num-windows 15 --num-files
|
||||
915 --debug
|
||||
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue