mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[Serve] Handle name collisions when unzipping directories in unzip_package (#21723)
Currently, the `unzip_package` function relies on `extract_file_and_remove_top_level_dir` to unzip and remove the top-level directory from archive working directories. However, `extract_file_and_remove_top_level_dir` uses `os.rename()` to remove the tld by manually unzipping each file from a zip file and moving it to the tld's parent. When the tld contains directories or files with the same name as the tld, `os.rename()` fails to move these files to the tld's parent because of the name collision between the file and the tld. This change replaces `extract_file_and_remove_top_level_dir` with `remove_dir_from_filepaths`. Now, `unzip_package` unzips the entire zip file before `remove_dir_from_filepaths` moves all the tld's children to the tld's parent using `os.rename()`. This edge case is tested in the new unit test `test_unzip_with_matching_subdirectory_names`. Additionally, `extract_file_and_remove_top_level_dir`'s unit test is replaced with `TestRemoveDirFromFilepaths`, which tests the new `remove_dir_from_filepaths` function.
This commit is contained in:
parent
2954bf9a48
commit
45eebdd6e3
2 changed files with 97 additions and 64 deletions
|
@ -1,4 +1,5 @@
|
|||
from enum import Enum
|
||||
from tempfile import TemporaryDirectory
|
||||
from filelock import FileLock
|
||||
import hashlib
|
||||
import logging
|
||||
|
@ -526,22 +527,29 @@ def get_top_level_dir_from_compressed_package(package_path: str):
|
|||
return top_level_directory
|
||||
|
||||
|
||||
def extract_file_and_remove_top_level_dir(base_dir: str, fname: str,
|
||||
zip_ref: ZipFile):
|
||||
def remove_dir_from_filepaths(base_dir: str, rdir: str):
|
||||
"""
|
||||
Extracts fname file from zip_ref zip file, removes the top level directory
|
||||
from fname's file path, and stores fname in the base_dir.
|
||||
base_dir: String path of the directory containing rdir
|
||||
rdir: String path of directory relative to base_dir whose contents should
|
||||
be moved to its base_dir, its parent directory
|
||||
|
||||
Removes rdir from the filepaths of all files and directories inside it.
|
||||
In other words, moves all the files inside rdir to the directory that
|
||||
contains rdir. Assumes base_dir's contents and rdir's contents have no
|
||||
name conflicts.
|
||||
"""
|
||||
|
||||
fname_without_top_level_dir = "/".join(fname.split("/")[1:])
|
||||
# Move rdir to a temporary directory, so its contents can be moved to
|
||||
# base_dir without any name conflicts
|
||||
with TemporaryDirectory() as tmp_dir:
|
||||
os.rename(os.path.join(base_dir, rdir), os.path.join(tmp_dir, rdir))
|
||||
|
||||
# If this condition is false, it means there was no top-level directory,
|
||||
# so we do nothing
|
||||
if fname_without_top_level_dir:
|
||||
zip_ref.extract(fname, base_dir)
|
||||
# Shift children out of rdir and into base_dir
|
||||
rdir_children = os.listdir(os.path.join(tmp_dir, rdir))
|
||||
for child in rdir_children:
|
||||
os.rename(
|
||||
os.path.join(base_dir, fname),
|
||||
os.path.join(base_dir, fname_without_top_level_dir))
|
||||
os.path.join(tmp_dir, rdir, child),
|
||||
os.path.join(base_dir, child))
|
||||
|
||||
|
||||
def unzip_package(package_path: str,
|
||||
|
@ -563,6 +571,8 @@ def unzip_package(package_path: str,
|
|||
|
||||
logger.debug(f"Unpacking {package_path} to {target_dir}")
|
||||
|
||||
with ZipFile(str(package_path), "r") as zip_ref:
|
||||
zip_ref.extractall(target_dir)
|
||||
if remove_top_level_directory:
|
||||
top_level_directory = get_top_level_dir_from_compressed_package(
|
||||
package_path)
|
||||
|
@ -571,20 +581,8 @@ def unzip_package(package_path: str,
|
|||
"a single top level directory. Make sure there "
|
||||
"are no hidden files at the same level as the "
|
||||
"top level directory.")
|
||||
with ZipFile(str(package_path), "r") as zip_ref:
|
||||
for fname in zip_ref.namelist():
|
||||
extract_file_and_remove_top_level_dir(
|
||||
base_dir=target_dir, fname=fname, zip_ref=zip_ref)
|
||||
|
||||
# Remove now-empty top_level_directory and any empty subdirectories
|
||||
# left over from extract_file_and_remove_top_level_dir operations
|
||||
leftover_top_level_directory = os.path.join(
|
||||
target_dir, top_level_directory)
|
||||
if os.path.isdir(leftover_top_level_directory):
|
||||
shutil.rmtree(leftover_top_level_directory)
|
||||
else:
|
||||
with ZipFile(str(package_path), "r") as zip_ref:
|
||||
zip_ref.extractall(target_dir)
|
||||
remove_dir_from_filepaths(target_dir, top_level_directory)
|
||||
|
||||
if unlink_zip:
|
||||
Path(package_path).unlink()
|
||||
|
|
|
@ -1,12 +1,11 @@
|
|||
import os
|
||||
from pathlib import Path
|
||||
import random
|
||||
from shutil import rmtree, make_archive
|
||||
from shutil import copytree, rmtree, make_archive
|
||||
import string
|
||||
import sys
|
||||
import tempfile
|
||||
from filecmp import dircmp
|
||||
from zipfile import ZipFile
|
||||
import uuid
|
||||
|
||||
import pytest
|
||||
|
@ -16,8 +15,8 @@ from ray.experimental.internal_kv import (_internal_kv_del,
|
|||
from ray._private.runtime_env.packaging import (
|
||||
_dir_travel, get_uri_for_directory, _get_excludes,
|
||||
upload_package_if_needed, parse_uri, Protocol,
|
||||
get_top_level_dir_from_compressed_package,
|
||||
extract_file_and_remove_top_level_dir, unzip_package)
|
||||
get_top_level_dir_from_compressed_package, remove_dir_from_filepaths,
|
||||
unzip_package)
|
||||
|
||||
TOP_LEVEL_DIR_NAME = "top_level"
|
||||
ARCHIVE_NAME = "archive.zip"
|
||||
|
@ -168,26 +167,28 @@ class TestGetTopLevelDirFromCompressedPackage:
|
|||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Fail to create temp dir.")
|
||||
class TestExtractFileAndRemoveTopLevelDir:
|
||||
def test_valid_extraction(self, random_zip_file_with_top_level_dir):
|
||||
class TestRemoveDirFromFilepaths:
|
||||
def test_valid_removal(self, random_zip_file_with_top_level_dir):
|
||||
# This test copies the TOP_LEVEL_DIR_NAME directory, and then it
|
||||
# shifts the contents of the copied directory into the base tmp_path
|
||||
# directory. Then it compares the contents of tmp_path with the
|
||||
# TOP_LEVEL_DIR_NAME directory to ensure that they match.
|
||||
|
||||
archive_path = random_zip_file_with_top_level_dir
|
||||
tmp_path = archive_path[:archive_path.rfind("/")]
|
||||
rmtree(os.path.join(tmp_path, TOP_LEVEL_DIR_NAME))
|
||||
with ZipFile(archive_path, "r") as zf:
|
||||
for fname in zf.namelist():
|
||||
extract_file_and_remove_top_level_dir(
|
||||
base_dir=tmp_path, fname=fname, zip_ref=zf)
|
||||
rmtree(os.path.join(tmp_path, TOP_LEVEL_DIR_NAME))
|
||||
with ZipFile(archive_path, "r") as zf:
|
||||
zf.extractall(tmp_path)
|
||||
original_dir_path = os.path.join(tmp_path, TOP_LEVEL_DIR_NAME)
|
||||
copy_dir_path = os.path.join(tmp_path, TOP_LEVEL_DIR_NAME + "_copy")
|
||||
copytree(original_dir_path, copy_dir_path)
|
||||
remove_dir_from_filepaths(tmp_path, TOP_LEVEL_DIR_NAME + "_copy")
|
||||
dcmp = dircmp(tmp_path, f"{tmp_path}/{TOP_LEVEL_DIR_NAME}")
|
||||
|
||||
# Since this test uses the tmp_path as the target directory, and since
|
||||
# the tmp_path also contains the zip file and the top level directory,
|
||||
# make sure that the only difference between the tmp_path's contents
|
||||
# and the top level directory's contents are the zip file and the top
|
||||
# level directory itself. This implies that all files have been
|
||||
# extracted from the top level directory and moved into the tmp_path.
|
||||
# and the top level directory's contents are the zip file from the
|
||||
# Pytest fixture and the top level directory itself. This implies that
|
||||
# all files have been extracted from the top level directory and moved
|
||||
# into the tmp_path.
|
||||
assert set(dcmp.left_only) == {ARCHIVE_NAME, TOP_LEVEL_DIR_NAME}
|
||||
|
||||
# Make sure that all the subdirectories and files have been moved to
|
||||
|
@ -198,17 +199,9 @@ class TestExtractFileAndRemoveTopLevelDir:
|
|||
@pytest.mark.skipif(sys.platform == "win32", reason="Fail to create temp dir.")
|
||||
@pytest.mark.parametrize("remove_top_level_directory", [False, True])
|
||||
@pytest.mark.parametrize("unlink_zip", [False, True])
|
||||
def test_unzip_package(random_zip_file_with_top_level_dir,
|
||||
remove_top_level_directory, unlink_zip):
|
||||
archive_path = random_zip_file_with_top_level_dir
|
||||
tmp_path = archive_path[:archive_path.rfind("/")]
|
||||
tmp_subdir = f"{tmp_path}/{TOP_LEVEL_DIR_NAME}_tmp"
|
||||
unzip_package(
|
||||
package_path=archive_path,
|
||||
target_dir=tmp_subdir,
|
||||
remove_top_level_directory=remove_top_level_directory,
|
||||
unlink_zip=unlink_zip)
|
||||
|
||||
class TestUnzipPackage:
|
||||
def dcmp_helper(self, remove_top_level_directory, unlink_zip, tmp_subdir,
|
||||
tmp_path, archive_path):
|
||||
dcmp = None
|
||||
if remove_top_level_directory:
|
||||
dcmp = dircmp(f"{tmp_subdir}", f"{tmp_path}/{TOP_LEVEL_DIR_NAME}")
|
||||
|
@ -223,6 +216,48 @@ def test_unzip_package(random_zip_file_with_top_level_dir,
|
|||
else:
|
||||
assert Path(archive_path).is_file()
|
||||
|
||||
def test_unzip_package(self, random_zip_file_with_top_level_dir,
|
||||
remove_top_level_directory, unlink_zip):
|
||||
archive_path = random_zip_file_with_top_level_dir
|
||||
tmp_path = archive_path[:archive_path.rfind("/")]
|
||||
tmp_subdir = f"{tmp_path}/{TOP_LEVEL_DIR_NAME}_tmp"
|
||||
|
||||
unzip_package(
|
||||
package_path=archive_path,
|
||||
target_dir=tmp_subdir,
|
||||
remove_top_level_directory=remove_top_level_directory,
|
||||
unlink_zip=unlink_zip)
|
||||
|
||||
self.dcmp_helper(remove_top_level_directory, unlink_zip, tmp_subdir,
|
||||
tmp_path, archive_path)
|
||||
|
||||
def test_unzip_with_matching_subdirectory_names(
|
||||
self, remove_top_level_directory, unlink_zip):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
path = Path(tmp_dir)
|
||||
top_level_dir = path / TOP_LEVEL_DIR_NAME
|
||||
top_level_dir.mkdir(parents=True)
|
||||
next_level_dir = top_level_dir
|
||||
for _ in range(10):
|
||||
dir1 = next_level_dir / TOP_LEVEL_DIR_NAME
|
||||
dir1.mkdir(parents=True)
|
||||
next_level_dir = dir1
|
||||
make_archive(path / ARCHIVE_NAME[:ARCHIVE_NAME.rfind(".")], "zip",
|
||||
path, TOP_LEVEL_DIR_NAME)
|
||||
archive_path = str(path / ARCHIVE_NAME)
|
||||
|
||||
tmp_path = archive_path[:archive_path.rfind("/")]
|
||||
tmp_subdir = f"{tmp_path}/{TOP_LEVEL_DIR_NAME}_tmp"
|
||||
|
||||
unzip_package(
|
||||
package_path=archive_path,
|
||||
target_dir=tmp_subdir,
|
||||
remove_top_level_directory=remove_top_level_directory,
|
||||
unlink_zip=unlink_zip)
|
||||
|
||||
self.dcmp_helper(remove_top_level_directory, unlink_zip,
|
||||
tmp_subdir, tmp_path, archive_path)
|
||||
|
||||
|
||||
@pytest.mark.skipif(sys.platform == "win32", reason="Fail to create temp dir.")
|
||||
def test_travel():
|
||||
|
|
Loading…
Add table
Reference in a new issue