mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00

Adds a unit-tested and restructured ray_release package for running release tests. Relevant changes in behavior: Per default, Buildkite will wait for the wheels of the current commit to be available. Alternatively, users can a) specify a different commit hash, b) a wheels URL (which we will also wait for to be available) or c) specify a branch (or user/branch combination), in which case the latest available wheels will be used (e.g. if master is passed, behavior matches old default behavior). The main subpackages are: Cluster manager: Creates cluster envs/computes, starts cluster, terminates cluster Command runner: Runs commands, e.g. as client command or sdk command File manager: Uploads/downloads files to/from session Reporter: Reports results (e.g. to database) Much of the code base is unit tested, but there are probably some pieces missing. Example build (waited for wheels to be built): https://buildkite.com/ray-project/kf-dev/builds/51#_ Wheel build: https://buildkite.com/ray-project/ray-builders-branch/builds/6023
92 lines
2.1 KiB
Python
92 lines
2.1 KiB
Python
import tarfile
|
|
import tempfile
|
|
from typing import Optional
|
|
|
|
from ray_release.file_manager.file_manager import FileManager
|
|
|
|
|
|
def _pack(source_dir: str) -> bytes:
|
|
tmpfile = tempfile.mktemp()
|
|
with tarfile.open(tmpfile, "w:gz") as tar:
|
|
tar.add(source_dir, arcname="")
|
|
|
|
with open(tmpfile, "rb") as f:
|
|
stream = f.read()
|
|
|
|
return stream
|
|
|
|
|
|
def _unpack(stream: bytes, target_dir: str):
|
|
tmpfile = tempfile.mktemp()
|
|
|
|
with open(tmpfile, "wb") as f:
|
|
f.write(stream)
|
|
|
|
with tarfile.open(tmpfile) as tar:
|
|
tar.extractall(target_dir)
|
|
|
|
|
|
def send_dir_to_node(
|
|
node_ip: str,
|
|
local_dir: str,
|
|
remote_dir: str,
|
|
):
|
|
import ray
|
|
|
|
try:
|
|
packed = _pack(local_dir)
|
|
ray.get(
|
|
ray.remote(resources={f"node:{node_ip}": 0.01})(_unpack).remote(
|
|
packed, remote_dir
|
|
)
|
|
)
|
|
except Exception as e:
|
|
print(
|
|
f"Warning: Could not send remote directory contents. Message: " f"{str(e)}"
|
|
)
|
|
|
|
|
|
def fetch_dir_from_node(
|
|
node_ip: str,
|
|
remote_dir: str,
|
|
local_dir: str,
|
|
):
|
|
import ray
|
|
|
|
try:
|
|
packed = ray.get(
|
|
ray.remote(resources={f"node:{node_ip}": 0.01})(_pack).remote(remote_dir)
|
|
)
|
|
_unpack(packed, local_dir)
|
|
except Exception as e:
|
|
print(
|
|
f"Warning: Could not fetch remote directory contents. Message: " f"{str(e)}"
|
|
)
|
|
|
|
|
|
def _get_head_ip():
|
|
import ray
|
|
|
|
return ray.util.get_node_ip_address()
|
|
|
|
|
|
def send_dir_to_head(local_dir: str, remote_dir: str):
|
|
import ray
|
|
|
|
ip = ray.get(ray.remote(_get_head_ip).remote())
|
|
return send_dir_to_node(ip, local_dir, remote_dir)
|
|
|
|
|
|
def fetch_dir_fom_head(local_dir: str, remote_dir: str):
|
|
import ray
|
|
|
|
ip = ray.get(ray.remote(_get_head_ip).remote())
|
|
return fetch_dir_from_node(ip, remote_dir, local_dir)
|
|
|
|
|
|
class RemoteTaskFileManager(FileManager):
|
|
def upload(self, source: Optional[str] = None, target: Optional[str] = None):
|
|
send_dir_to_head(source, target)
|
|
|
|
def download(self, source: str, target: str):
|
|
fetch_dir_fom_head(source, target)
|