[air/release] Improve file packing/unpacking (#23621)

We use tarfile to pack/unpack directories in several locations. Instead of using temporary files, we can just use io.BytesIO to avoid unnecessary disk writes.

Note that this functionality is present in 3 different modules - in Ray (AIR), in the release test package, and in a specific release test. The implementations should live in the three modules independently, so we don't add a common utility for this (e.g. the ray_release package should be independent of the Ray package).
This commit is contained in:
Kai Fricke 2022-04-01 07:38:14 -07:00 committed by GitHub
parent 0bb82f29b6
commit fe27dbcd9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 18 additions and 43 deletions

View file

@ -1,3 +1,4 @@
import io
import shutil
import tarfile
import tempfile
@ -451,26 +452,15 @@ def _temporary_checkpoint_dir() -> str:
def _pack(path: str) -> bytes:
"""Pack directory in ``path`` into an archive, return as bytes string."""
_, tmpfile = tempfile.mkstemp()
with tarfile.open(tmpfile, "w:gz") as tar:
stream = io.BytesIO()
with tarfile.open(fileobj=stream, mode="w:gz", format=tarfile.PAX_FORMAT) as tar:
tar.add(path, arcname="")
with open(tmpfile, "rb") as f:
stream = f.read()
os.remove(tmpfile)
return stream
return stream.getvalue()
def _unpack(stream: bytes, path: str) -> str:
"""Unpack archive in bytes string into directory in ``path``."""
_, tmpfile = tempfile.mkstemp()
with open(tmpfile, "wb") as f:
f.write(stream)
with tarfile.open(tmpfile) as tar:
with tarfile.open(fileobj=io.BytesIO(stream)) as tar:
tar.extractall(path)
os.remove(tmpfile)
return path

View file

@ -1,28 +1,20 @@
import io
import tarfile
import tempfile
from typing import Optional
from ray_release.file_manager.file_manager import FileManager
def _pack(source_dir: str) -> bytes:
tmpfile = tempfile.mktemp()
with tarfile.open(tmpfile, "w:gz") as tar:
stream = io.BytesIO()
with tarfile.open(fileobj=stream, mode="w:gz", format=tarfile.PAX_FORMAT) as tar:
tar.add(source_dir, arcname="")
with open(tmpfile, "rb") as f:
stream = f.read()
return stream
return stream.getvalue()
def _unpack(stream: bytes, target_dir: str):
tmpfile = tempfile.mktemp()
with open(tmpfile, "wb") as f:
f.write(stream)
with tarfile.open(tmpfile) as tar:
with tarfile.open(fileobj=io.BytesIO(stream)) as tar:
tar.extractall(target_dir)
@ -59,9 +51,7 @@ def fetch_dir_from_node(
)
_unpack(packed, local_dir)
except Exception as e:
print(
f"Warning: Could not fetch remote directory contents. Message: " f"{str(e)}"
)
print(f"Warning: Could not fetch remote directory contents. Message: {str(e)}")
def _get_head_ip():

View file

@ -29,6 +29,7 @@ More details on the expected results can be found in the scenario descriptions.
import argparse
import csv
import io
import tarfile
from dataclasses import dataclass
import json
@ -407,22 +408,16 @@ def fetch_remote_directory_content(
local_dir: str,
):
def _pack(dir: str):
_, tmpfile = tempfile.mkstemp()
with tarfile.open(tmpfile, "w:gz") as tar:
stream = io.BytesIO()
with tarfile.open(
fileobj=stream, mode="w:gz", format=tarfile.PAX_FORMAT
) as tar:
tar.add(dir, arcname="")
with open(tmpfile, "rb") as f:
stream = f.read()
return stream
return stream.getvalue()
def _unpack(stream: str, dir: str):
_, tmpfile = tempfile.mkstemp()
with open(tmpfile, "wb") as f:
f.write(stream)
with tarfile.open(tmpfile) as tar:
with tarfile.open(fileobj=io.BytesIO(stream)) as tar:
tar.extractall(dir)
try: