mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
oom release test
Signed-off-by: Clarence Ng <clarence.wyng@gmail.com>
This commit is contained in:
parent
5cec2492bb
commit
670c7da148
4 changed files with 89 additions and 0 deletions
13
release/core_tests/app_cpu_nightly.yaml
Normal file
13
release/core_tests/app_cpu_nightly.yaml
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
base_image: {{ env["RAY_IMAGE_NIGHTLY_CPU"] | default("anyscale/ray:nightly-py37") }}
|
||||||
|
env_vars: {}
|
||||||
|
debian_packages:
|
||||||
|
- curl
|
||||||
|
|
||||||
|
python:
|
||||||
|
pip_packages:
|
||||||
|
- pytest
|
||||||
|
conda_packages: []
|
||||||
|
|
||||||
|
post_build_cmds:
|
||||||
|
- pip3 uninstall -y ray || true && pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
|
||||||
|
- {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }}
|
15
release/core_tests/compute_2_cpu.yaml
Normal file
15
release/core_tests/compute_2_cpu.yaml
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
|
||||||
|
region: us-west-2
|
||||||
|
|
||||||
|
max_workers: 1
|
||||||
|
|
||||||
|
head_node_type:
|
||||||
|
name: head_node
|
||||||
|
instance_type: m5.2xlarge
|
||||||
|
|
||||||
|
worker_node_types:
|
||||||
|
- name: worker_node
|
||||||
|
instance_type: m5.2xlarge
|
||||||
|
max_workers: 1
|
||||||
|
min_workers: 1
|
||||||
|
use_spot: false
|
43
release/core_tests/oom_actor_test.py
Normal file
43
release/core_tests/oom_actor_test.py
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
|
||||||
|
"""Job submission test
|
||||||
|
|
||||||
|
This test runs a basic Tune job on a remote cluster.
|
||||||
|
|
||||||
|
Test owner: architkulkarni
|
||||||
|
|
||||||
|
Acceptance criteria: Should run through and print "PASSED"
|
||||||
|
"""
|
||||||
|
|
||||||
|
from math import ceil
|
||||||
|
import time
|
||||||
|
import ray
|
||||||
|
import psutil
|
||||||
|
|
||||||
|
|
||||||
|
def get_additional_bytes_to_reach_memory_usage_pct(pct: float) -> None:
|
||||||
|
node_mem = psutil.virtual_memory()
|
||||||
|
used = node_mem.total - node_mem.available
|
||||||
|
bytes_needed = node_mem.total * pct - used
|
||||||
|
assert bytes_needed > 0, "node has less memory than what is requested"
|
||||||
|
return bytes_needed
|
||||||
|
|
||||||
|
|
||||||
|
@ray.remote(max_retries=-1)
|
||||||
|
def inf_retry(
|
||||||
|
allocate_bytes: int, num_chunks: int = 10, allocate_interval_s: float = 0
|
||||||
|
):
|
||||||
|
start = time.time()
|
||||||
|
chunks = []
|
||||||
|
# divide by 8 as each element in the array occupies 8 bytes
|
||||||
|
bytes_per_chunk = allocate_bytes / 8 / num_chunks
|
||||||
|
for _ in range(num_chunks):
|
||||||
|
chunks.append([0] * ceil(bytes_per_chunk))
|
||||||
|
time.sleep(allocate_interval_s)
|
||||||
|
end = time.time()
|
||||||
|
return end - start
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
bytes_to_alloc = get_additional_bytes_to_reach_memory_usage_pct(1)
|
||||||
|
ray.get(inf_retry.remote(bytes_to_alloc))
|
||||||
|
|
|
@ -4629,6 +4629,24 @@
|
||||||
type: sdk_command
|
type: sdk_command
|
||||||
file_manager: sdk
|
file_manager: sdk
|
||||||
|
|
||||||
|
- name: oom_actor_tests
|
||||||
|
group: core-daily-test
|
||||||
|
working_dir: core_tests
|
||||||
|
|
||||||
|
frequency: nightly
|
||||||
|
team: core
|
||||||
|
cluster:
|
||||||
|
cluster_env: app_config_cpu_nightly.yaml
|
||||||
|
cluster_compute: compute_cpu_2.yaml
|
||||||
|
|
||||||
|
run:
|
||||||
|
timeout: 600
|
||||||
|
script: python oom_actor_test.py
|
||||||
|
wait_for_nodes:
|
||||||
|
num_nodes: 2
|
||||||
|
type: sdk_command
|
||||||
|
file_manager: sdk
|
||||||
|
|
||||||
- name: k8s_serve_ha_test
|
- name: k8s_serve_ha_test
|
||||||
group: k8s-test
|
group: k8s-test
|
||||||
working_dir: k8s_tests
|
working_dir: k8s_tests
|
||||||
|
|
Loading…
Add table
Reference in a new issue