diff --git a/release/.buildkite/build_pipeline.py b/release/.buildkite/build_pipeline.py index 93a4b389c..8712f731f 100644 --- a/release/.buildkite/build_pipeline.py +++ b/release/.buildkite/build_pipeline.py @@ -132,6 +132,7 @@ CORE_DAILY_TESTS = { "dask_on_ray_large_scale_test_spilling", "pg_autoscaling_regression_test", "threaded_actors_stress_test", + "k8s_threaded_actors_stress_test", "stress_test_many_tasks", "stress_test_dead_actors", "many_nodes_actor_test", diff --git a/release/nightly_tests/nightly_tests.yaml b/release/nightly_tests/nightly_tests.yaml index 19e7c5b88..4e994ad00 100644 --- a/release/nightly_tests/nightly_tests.yaml +++ b/release/nightly_tests/nightly_tests.yaml @@ -295,6 +295,24 @@ script: python stress_tests/test_threaded_actors.py --test-runtime 1800 --kill-interval_s 30 stable: false +- name: k8s_threaded_actors_stress_test + team: core + cluster: + app_config: stress_tests/stress_tests_app_config.yaml + compute_template: stress_tests/k8s_stress_test_threaded_actor_compute.yaml + compute_on_k8s: True + + run: + timeout: 7200 + prepare: python wait_cluster.py 201 600 + script: python stress_tests/test_threaded_actors.py --test-runtime 3600 --kill-interval_s 60 + + run: + timeout: 3600 + prepare: python wait_cluster.py 5 600 + script: python stress_tests/test_threaded_actors.py --test-runtime 1800 --kill-interval_s 30 + stable: false + # Test decision tree on autoscaling compute cluster. - name: decision_tree_autoscaling team: core diff --git a/release/nightly_tests/stress_tests/k8s_stress_test_threaded_actor_compute.yaml b/release/nightly_tests/stress_tests/k8s_stress_test_threaded_actor_compute.yaml new file mode 100644 index 000000000..5eab17027 --- /dev/null +++ b/release/nightly_tests/stress_tests/k8s_stress_test_threaded_actor_compute.yaml @@ -0,0 +1,26 @@ +cloud_id: cld_HSrCZdMCYDe1NmMCJhYRgQ4p +region: us-west-2 + +head_node_type: + name: head_node + instance_type: m5.8xlarge + resources: + cpu: 0 + +worker_node_types: + - name: worker_node + instance_type: m5.4xlarge + min_workers: 200 + max_workers: 200 + use_spot: false + resources: + cpu: 32 + +aws: + TagSpecifications: + - ResourceType: "instance" + Tags: + - Key: anyscale-user + Value: '{{env["ANYSCALE_USER"]}}' + - Key: anyscale-expiration + Value: '{{env["EXPIRATION_1D"]}}'