mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[air/release] Improve file packing/unpacking (#23621)
We use tarfile to pack/unpack directories in several locations. Instead of using temporary files, we can just use io.BytesIO to avoid unnecessary disk writes. Note that this functionality is present in 3 different modules - in Ray (AIR), in the release test package, and in a specific release test. The implementations should live in the three modules independently, so we don't add a common utility for this (e.g. the ray_release package should be independent of the Ray package).
This commit is contained in:
parent
0bb82f29b6
commit
fe27dbcd9a
3 changed files with 18 additions and 43 deletions
|
@ -1,3 +1,4 @@
|
|||
import io
|
||||
import shutil
|
||||
import tarfile
|
||||
import tempfile
|
||||
|
@ -451,26 +452,15 @@ def _temporary_checkpoint_dir() -> str:
|
|||
|
||||
def _pack(path: str) -> bytes:
|
||||
"""Pack directory in ``path`` into an archive, return as bytes string."""
|
||||
_, tmpfile = tempfile.mkstemp()
|
||||
with tarfile.open(tmpfile, "w:gz") as tar:
|
||||
stream = io.BytesIO()
|
||||
with tarfile.open(fileobj=stream, mode="w:gz", format=tarfile.PAX_FORMAT) as tar:
|
||||
tar.add(path, arcname="")
|
||||
|
||||
with open(tmpfile, "rb") as f:
|
||||
stream = f.read()
|
||||
|
||||
os.remove(tmpfile)
|
||||
return stream
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
def _unpack(stream: bytes, path: str) -> str:
|
||||
"""Unpack archive in bytes string into directory in ``path``."""
|
||||
_, tmpfile = tempfile.mkstemp()
|
||||
|
||||
with open(tmpfile, "wb") as f:
|
||||
f.write(stream)
|
||||
|
||||
with tarfile.open(tmpfile) as tar:
|
||||
with tarfile.open(fileobj=io.BytesIO(stream)) as tar:
|
||||
tar.extractall(path)
|
||||
|
||||
os.remove(tmpfile)
|
||||
return path
|
||||
|
|
|
@ -1,28 +1,20 @@
|
|||
import io
|
||||
import tarfile
|
||||
import tempfile
|
||||
from typing import Optional
|
||||
|
||||
from ray_release.file_manager.file_manager import FileManager
|
||||
|
||||
|
||||
def _pack(source_dir: str) -> bytes:
|
||||
tmpfile = tempfile.mktemp()
|
||||
with tarfile.open(tmpfile, "w:gz") as tar:
|
||||
stream = io.BytesIO()
|
||||
with tarfile.open(fileobj=stream, mode="w:gz", format=tarfile.PAX_FORMAT) as tar:
|
||||
tar.add(source_dir, arcname="")
|
||||
|
||||
with open(tmpfile, "rb") as f:
|
||||
stream = f.read()
|
||||
|
||||
return stream
|
||||
return stream.getvalue()
|
||||
|
||||
|
||||
def _unpack(stream: bytes, target_dir: str):
|
||||
tmpfile = tempfile.mktemp()
|
||||
|
||||
with open(tmpfile, "wb") as f:
|
||||
f.write(stream)
|
||||
|
||||
with tarfile.open(tmpfile) as tar:
|
||||
with tarfile.open(fileobj=io.BytesIO(stream)) as tar:
|
||||
tar.extractall(target_dir)
|
||||
|
||||
|
||||
|
@ -59,9 +51,7 @@ def fetch_dir_from_node(
|
|||
)
|
||||
_unpack(packed, local_dir)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"Warning: Could not fetch remote directory contents. Message: " f"{str(e)}"
|
||||
)
|
||||
print(f"Warning: Could not fetch remote directory contents. Message: {str(e)}")
|
||||
|
||||
|
||||
def _get_head_ip():
|
||||
|
|
|
@ -29,6 +29,7 @@ More details on the expected results can be found in the scenario descriptions.
|
|||
|
||||
import argparse
|
||||
import csv
|
||||
import io
|
||||
import tarfile
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
|
@ -407,22 +408,16 @@ def fetch_remote_directory_content(
|
|||
local_dir: str,
|
||||
):
|
||||
def _pack(dir: str):
|
||||
_, tmpfile = tempfile.mkstemp()
|
||||
with tarfile.open(tmpfile, "w:gz") as tar:
|
||||
stream = io.BytesIO()
|
||||
with tarfile.open(
|
||||
fileobj=stream, mode="w:gz", format=tarfile.PAX_FORMAT
|
||||
) as tar:
|
||||
tar.add(dir, arcname="")
|
||||
|
||||
with open(tmpfile, "rb") as f:
|
||||
stream = f.read()
|
||||
|
||||
return stream
|
||||
return stream.getvalue()
|
||||
|
||||
def _unpack(stream: str, dir: str):
|
||||
_, tmpfile = tempfile.mkstemp()
|
||||
|
||||
with open(tmpfile, "wb") as f:
|
||||
f.write(stream)
|
||||
|
||||
with tarfile.open(tmpfile) as tar:
|
||||
with tarfile.open(fileobj=io.BytesIO(stream)) as tar:
|
||||
tar.extractall(dir)
|
||||
|
||||
try:
|
||||
|
|
Loading…
Add table
Reference in a new issue