ray/release/nightly_tests/chaos_test.yaml
Stephanie Wang 1b45582e43
[tests] Enable chaos testing for Dask-on-Ray (#22927)
Turns on failures for Dask-on-Ray chaos tests.
2022-03-09 18:08:41 -05:00

64 lines
2.2 KiB
YAML

#
# Chaos tests.
#
# Run the test that invokes many tasks without object store usage.
- name: chaos_many_tasks_no_object_store
team: core
cluster:
app_config: chaos_test/app_config.yaml
compute_template: chaos_test/compute_template.yaml
run:
timeout: 3600
prepare: python wait_cluster.py 10 600; python setup_chaos.py --no-start
script: python chaos_test/test_chaos_basic.py --workload=tasks
- name: chaos_many_actors
team: core
cluster:
app_config: chaos_test/app_config.yaml
compute_template: chaos_test/compute_template.yaml
run:
timeout: 3600
prepare: python wait_cluster.py 10 600; python setup_chaos.py --no-start
script: python chaos_test/test_chaos_basic.py --workload=actors
- name: chaos_dask_on_ray_large_scale_test_no_spilling
team: core
cluster:
app_config: chaos_test/dask_on_ray_app_config_reconstruction.yaml
compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
run:
timeout: 7200
# Total run time without failures is about 300-400s.
prepare: python wait_cluster.py 21 600; python setup_chaos.py --node-kill-interval 100
script: python dask_on_ray/large_scale_test.py --num_workers 20 --worker_obj_store_size_in_gb 20 --error_rate 0 --data_save_path /tmp/ray
# Test large scale dask on ray test with spilling.
- name: chaos_dask_on_ray_large_scale_test_spilling
team: core
cluster:
app_config: chaos_test/dask_on_ray_app_config_reconstruction.yaml
compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
run:
timeout: 7200
# Total run time without failures is about 300-400s.
prepare: python wait_cluster.py 21 600; python setup_chaos.py --node-kill-interval 100
script: python dask_on_ray/large_scale_test.py --num_workers 150 --worker_obj_store_size_in_gb 70 --error_rate 0 --data_save_path /tmp/ray
- name: chaos_pipelined_ingestion_1500_gb_15_windows
team: core
cluster:
app_config: dataset/pipelined_ingestion_app.yaml
compute_template: dataset/pipelined_ingestion_compute.yaml
run:
timeout: 7200
prepare: python wait_cluster.py 21 2400; python setup_chaos.py --node-kill-interval 300
script: python dataset/pipelined_training.py --epochs 1 --num-windows 15 --num-files 915 --debug
stable: false