ray/release/nightly_tests/chaos_test.yaml
Chen Shen d0e79a36f9
[chaos-test] chaos test pipeline ingestion (#20929)
since it has been passing my test run; i'll land it and mark it as unstable.
2021-12-09 13:43:00 -08:00

65 lines
2.1 KiB
YAML

#
# Chaos tests.
#
# Run the test that invokes many tasks without object store usage.
- name: chaos_many_tasks_no_object_store
cluster:
app_config: chaos_test/app_config.yaml
compute_template: chaos_test/compute_template.yaml
run:
timeout: 3600
prepare: python wait_cluster.py 10 600; python setup_chaos.py --no-start
script: python chaos_test/test_chaos_basic.py --workload=tasks
- name: chaos_many_actors
cluster:
app_config: chaos_test/app_config.yaml
compute_template: chaos_test/compute_template.yaml
run:
timeout: 3600
prepare: python wait_cluster.py 10 600; python setup_chaos.py --no-start
script: python chaos_test/test_chaos_basic.py --workload=actors
- name: chaos_dask_on_ray_large_scale_test_no_spilling
cluster:
app_config: chaos_test/dask_on_ray_app_config_reconstruction.yaml
compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
run:
timeout: 7200
prepare: python wait_cluster.py 21 600
script: python dask_on_ray/large_scale_test.py --num_workers 20 --worker_obj_store_size_in_gb 20 --error_rate 0 --data_save_path /tmp/ray
stable: false
# Test large scale dask on ray test with spilling.
- name: chaos_dask_on_ray_large_scale_test_spilling
cluster:
app_config: chaos_test/dask_on_ray_app_config_reconstruction.yaml
compute_template: dask_on_ray/dask_on_ray_stress_compute.yaml
run:
timeout: 7200
prepare: python wait_cluster.py 21 600
script: python dask_on_ray/large_scale_test.py --num_workers 150 --worker_obj_store_size_in_gb 70 --error_rate 0 --data_save_path /tmp/ray
stable: false
- name: chaos_pipelined_ingestion_1500_gb_15_windows
owner:
mail: "core@anyscale.com"
slack: "@Chen Shen"
cluster:
app_config: dataset/pipelined_ingestion_app.yaml
compute_template: dataset/pipelined_ingestion_compute.yaml
run:
timeout: 7200
prepare: python wait_cluster.py 21 2400; python setup_chaos.py --node-kill-interval 300
script: python dataset/pipelined_training.py --epochs 1 --num-windows 15 --num-files 915 --debug
stable: false