diff --git a/release/core_tests/app_cpu_nightly.yaml b/release/core_tests/app_cpu_nightly.yaml new file mode 100644 index 000000000..758c463fb --- /dev/null +++ b/release/core_tests/app_cpu_nightly.yaml @@ -0,0 +1,13 @@ +base_image: {{ env["RAY_IMAGE_NIGHTLY_CPU"] | default("anyscale/ray:nightly-py37") }} +env_vars: {} +debian_packages: + - curl + +python: + pip_packages: + - pytest + conda_packages: [] + +post_build_cmds: + - pip3 uninstall -y ray || true && pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }} + - {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }} diff --git a/release/core_tests/compute_2_cpu.yaml b/release/core_tests/compute_2_cpu.yaml new file mode 100644 index 000000000..e9ad74fc9 --- /dev/null +++ b/release/core_tests/compute_2_cpu.yaml @@ -0,0 +1,15 @@ +cloud_id: {{env["ANYSCALE_CLOUD_ID"]}} +region: us-west-2 + +max_workers: 1 + +head_node_type: + name: head_node + instance_type: m5.2xlarge + +worker_node_types: + - name: worker_node + instance_type: m5.2xlarge + max_workers: 1 + min_workers: 1 + use_spot: false diff --git a/release/core_tests/oom_actor_test.py b/release/core_tests/oom_actor_test.py new file mode 100644 index 000000000..67f7d544a --- /dev/null +++ b/release/core_tests/oom_actor_test.py @@ -0,0 +1,43 @@ + +"""Job submission test + +This test runs a basic Tune job on a remote cluster. + +Test owner: architkulkarni + +Acceptance criteria: Should run through and print "PASSED" +""" + +from math import ceil +import time +import ray +import psutil + + +def get_additional_bytes_to_reach_memory_usage_pct(pct: float) -> None: + node_mem = psutil.virtual_memory() + used = node_mem.total - node_mem.available + bytes_needed = node_mem.total * pct - used + assert bytes_needed > 0, "node has less memory than what is requested" + return bytes_needed + + +@ray.remote(max_retries=-1) +def inf_retry( + allocate_bytes: int, num_chunks: int = 10, allocate_interval_s: float = 0 +): + start = time.time() + chunks = [] + # divide by 8 as each element in the array occupies 8 bytes + bytes_per_chunk = allocate_bytes / 8 / num_chunks + for _ in range(num_chunks): + chunks.append([0] * ceil(bytes_per_chunk)) + time.sleep(allocate_interval_s) + end = time.time() + return end - start + + +if __name__ == "__main__": + bytes_to_alloc = get_additional_bytes_to_reach_memory_usage_pct(1) + ray.get(inf_retry.remote(bytes_to_alloc)) + \ No newline at end of file diff --git a/release/release_tests.yaml b/release/release_tests.yaml index 7b1cdbb9f..67ff4c264 100644 --- a/release/release_tests.yaml +++ b/release/release_tests.yaml @@ -4629,6 +4629,24 @@ type: sdk_command file_manager: sdk +- name: oom_actor_tests + group: core-daily-test + working_dir: core_tests + + frequency: nightly + team: core + cluster: + cluster_env: app_config_cpu_nightly.yaml + cluster_compute: compute_cpu_2.yaml + + run: + timeout: 600 + script: python oom_actor_test.py + wait_for_nodes: + num_nodes: 2 + type: sdk_command + file_manager: sdk + - name: k8s_serve_ha_test group: k8s-test working_dir: k8s_tests