Add chaos test for dataset shuffle (#25161)

Add chaos tests for dataset shuffle: both push-based and non-push-based.
This commit is contained in:
Jiajun Yao 2022-05-24 15:12:20 -07:00 committed by GitHub
parent 93ff0beb4e
commit 00cdd8dce5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -3667,3 +3667,51 @@
type: sdk_command
file_manager: sdk
- name: chaos_dataset_shuffle_push_based_sort_1tb
group: core-dataset-tests
working_dir: nightly_tests
legacy:
test_name: chaos_dataset_shuffle_push_based_sort_1tb
test_suite: chaos_test
stable: false
frequency: nightly
team: core
cluster:
cluster_env: shuffle/shuffle_app_config.yaml
cluster_compute: shuffle/datasets_large_scale_compute_small_instances.yaml
run:
timeout: 7200
prepare: ' python setup_chaos.py --node-kill-interval 1200 --max-nodes-to-kill 3'
script: RAY_DATASET_PUSH_BASED_SHUFFLE=1 python dataset/sort.py --num-partitions=1000 --partition-size=1e9
wait_for_nodes:
num_nodes: 20
type: sdk_command
file_manager: sdk
- name: chaos_dataset_shuffle_sort_1tb
group: core-dataset-tests
working_dir: nightly_tests
legacy:
test_name: chaos_dataset_shuffle_sort_1tb
test_suite: chaos_test
stable: false
frequency: nightly
team: core
cluster:
cluster_env: shuffle/shuffle_app_config.yaml
cluster_compute: shuffle/datasets_large_scale_compute_small_instances.yaml
run:
timeout: 7200
prepare: ' python setup_chaos.py --node-kill-interval 900 --max-nodes-to-kill 3'
script: python dataset/sort.py --num-partitions=1000 --partition-size=1e9
wait_for_nodes:
num_nodes: 20
type: sdk_command
file_manager: sdk