ray/release/ray_release/file_manager/remote_task.py
Kai Fricke 331b71ea8d
[ci/release] Refactor release test e2e into package (#22351)
Adds a unit-tested and restructured ray_release package for running release tests.

Relevant changes in behavior:

Per default, Buildkite will wait for the wheels of the current commit to be available. Alternatively, users can a) specify a different commit hash, b) a wheels URL (which we will also wait for to be available) or c) specify a branch (or user/branch combination), in which case the latest available wheels will be used (e.g. if master is passed, behavior matches old default behavior).

The main subpackages are:

    Cluster manager: Creates cluster envs/computes, starts cluster, terminates cluster
    Command runner: Runs commands, e.g. as client command or sdk command
    File manager: Uploads/downloads files to/from session
    Reporter: Reports results (e.g. to database)

Much of the code base is unit tested, but there are probably some pieces missing.

Example build (waited for wheels to be built): https://buildkite.com/ray-project/kf-dev/builds/51#_
Wheel build: https://buildkite.com/ray-project/ray-builders-branch/builds/6023
2022-02-16 17:35:02 +00:00

92 lines
2.1 KiB
Python

import tarfile
import tempfile
from typing import Optional
from ray_release.file_manager.file_manager import FileManager
def _pack(source_dir: str) -> bytes:
tmpfile = tempfile.mktemp()
with tarfile.open(tmpfile, "w:gz") as tar:
tar.add(source_dir, arcname="")
with open(tmpfile, "rb") as f:
stream = f.read()
return stream
def _unpack(stream: bytes, target_dir: str):
tmpfile = tempfile.mktemp()
with open(tmpfile, "wb") as f:
f.write(stream)
with tarfile.open(tmpfile) as tar:
tar.extractall(target_dir)
def send_dir_to_node(
node_ip: str,
local_dir: str,
remote_dir: str,
):
import ray
try:
packed = _pack(local_dir)
ray.get(
ray.remote(resources={f"node:{node_ip}": 0.01})(_unpack).remote(
packed, remote_dir
)
)
except Exception as e:
print(
f"Warning: Could not send remote directory contents. Message: " f"{str(e)}"
)
def fetch_dir_from_node(
node_ip: str,
remote_dir: str,
local_dir: str,
):
import ray
try:
packed = ray.get(
ray.remote(resources={f"node:{node_ip}": 0.01})(_pack).remote(remote_dir)
)
_unpack(packed, local_dir)
except Exception as e:
print(
f"Warning: Could not fetch remote directory contents. Message: " f"{str(e)}"
)
def _get_head_ip():
import ray
return ray.util.get_node_ip_address()
def send_dir_to_head(local_dir: str, remote_dir: str):
import ray
ip = ray.get(ray.remote(_get_head_ip).remote())
return send_dir_to_node(ip, local_dir, remote_dir)
def fetch_dir_fom_head(local_dir: str, remote_dir: str):
import ray
ip = ray.get(ray.remote(_get_head_ip).remote())
return fetch_dir_from_node(ip, remote_dir, local_dir)
class RemoteTaskFileManager(FileManager):
def upload(self, source: Optional[str] = None, target: Optional[str] = None):
send_dir_to_head(source, target)
def download(self, source: str, target: str):
fetch_dir_fom_head(source, target)