mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
oom release test
Signed-off-by: Clarence Ng <clarence.wyng@gmail.com>
This commit is contained in:
parent
5cec2492bb
commit
670c7da148
4 changed files with 89 additions and 0 deletions
13
release/core_tests/app_cpu_nightly.yaml
Normal file
13
release/core_tests/app_cpu_nightly.yaml
Normal file
|
@ -0,0 +1,13 @@
|
|||
base_image: {{ env["RAY_IMAGE_NIGHTLY_CPU"] | default("anyscale/ray:nightly-py37") }}
|
||||
env_vars: {}
|
||||
debian_packages:
|
||||
- curl
|
||||
|
||||
python:
|
||||
pip_packages:
|
||||
- pytest
|
||||
conda_packages: []
|
||||
|
||||
post_build_cmds:
|
||||
- pip3 uninstall -y ray || true && pip3 install -U {{ env["RAY_WHEELS"] | default("ray") }}
|
||||
- {{ env["RAY_WHEELS_SANITY_CHECK"] | default("echo No Ray wheels sanity check") }}
|
15
release/core_tests/compute_2_cpu.yaml
Normal file
15
release/core_tests/compute_2_cpu.yaml
Normal file
|
@ -0,0 +1,15 @@
|
|||
cloud_id: {{env["ANYSCALE_CLOUD_ID"]}}
|
||||
region: us-west-2
|
||||
|
||||
max_workers: 1
|
||||
|
||||
head_node_type:
|
||||
name: head_node
|
||||
instance_type: m5.2xlarge
|
||||
|
||||
worker_node_types:
|
||||
- name: worker_node
|
||||
instance_type: m5.2xlarge
|
||||
max_workers: 1
|
||||
min_workers: 1
|
||||
use_spot: false
|
43
release/core_tests/oom_actor_test.py
Normal file
43
release/core_tests/oom_actor_test.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
|
||||
"""Job submission test
|
||||
|
||||
This test runs a basic Tune job on a remote cluster.
|
||||
|
||||
Test owner: architkulkarni
|
||||
|
||||
Acceptance criteria: Should run through and print "PASSED"
|
||||
"""
|
||||
|
||||
from math import ceil
|
||||
import time
|
||||
import ray
|
||||
import psutil
|
||||
|
||||
|
||||
def get_additional_bytes_to_reach_memory_usage_pct(pct: float) -> None:
|
||||
node_mem = psutil.virtual_memory()
|
||||
used = node_mem.total - node_mem.available
|
||||
bytes_needed = node_mem.total * pct - used
|
||||
assert bytes_needed > 0, "node has less memory than what is requested"
|
||||
return bytes_needed
|
||||
|
||||
|
||||
@ray.remote(max_retries=-1)
|
||||
def inf_retry(
|
||||
allocate_bytes: int, num_chunks: int = 10, allocate_interval_s: float = 0
|
||||
):
|
||||
start = time.time()
|
||||
chunks = []
|
||||
# divide by 8 as each element in the array occupies 8 bytes
|
||||
bytes_per_chunk = allocate_bytes / 8 / num_chunks
|
||||
for _ in range(num_chunks):
|
||||
chunks.append([0] * ceil(bytes_per_chunk))
|
||||
time.sleep(allocate_interval_s)
|
||||
end = time.time()
|
||||
return end - start
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bytes_to_alloc = get_additional_bytes_to_reach_memory_usage_pct(1)
|
||||
ray.get(inf_retry.remote(bytes_to_alloc))
|
||||
|
|
@ -4629,6 +4629,24 @@
|
|||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: oom_actor_tests
|
||||
group: core-daily-test
|
||||
working_dir: core_tests
|
||||
|
||||
frequency: nightly
|
||||
team: core
|
||||
cluster:
|
||||
cluster_env: app_config_cpu_nightly.yaml
|
||||
cluster_compute: compute_cpu_2.yaml
|
||||
|
||||
run:
|
||||
timeout: 600
|
||||
script: python oom_actor_test.py
|
||||
wait_for_nodes:
|
||||
num_nodes: 2
|
||||
type: sdk_command
|
||||
file_manager: sdk
|
||||
|
||||
- name: k8s_serve_ha_test
|
||||
group: k8s-test
|
||||
working_dir: k8s_tests
|
||||
|
|
Loading…
Add table
Reference in a new issue