diff --git a/python/ray/_private/runtime_env/packaging.py b/python/ray/_private/runtime_env/packaging.py index 36b784ad7..810556652 100644 --- a/python/ray/_private/runtime_env/packaging.py +++ b/python/ray/_private/runtime_env/packaging.py @@ -1,4 +1,5 @@ from enum import Enum +from tempfile import TemporaryDirectory from filelock import FileLock import hashlib import logging @@ -526,22 +527,29 @@ def get_top_level_dir_from_compressed_package(package_path: str): return top_level_directory -def extract_file_and_remove_top_level_dir(base_dir: str, fname: str, - zip_ref: ZipFile): +def remove_dir_from_filepaths(base_dir: str, rdir: str): """ - Extracts fname file from zip_ref zip file, removes the top level directory - from fname's file path, and stores fname in the base_dir. + base_dir: String path of the directory containing rdir + rdir: String path of directory relative to base_dir whose contents should + be moved to its base_dir, its parent directory + + Removes rdir from the filepaths of all files and directories inside it. + In other words, moves all the files inside rdir to the directory that + contains rdir. Assumes base_dir's contents and rdir's contents have no + name conflicts. """ - fname_without_top_level_dir = "/".join(fname.split("/")[1:]) + # Move rdir to a temporary directory, so its contents can be moved to + # base_dir without any name conflicts + with TemporaryDirectory() as tmp_dir: + os.rename(os.path.join(base_dir, rdir), os.path.join(tmp_dir, rdir)) - # If this condition is false, it means there was no top-level directory, - # so we do nothing - if fname_without_top_level_dir: - zip_ref.extract(fname, base_dir) - os.rename( - os.path.join(base_dir, fname), - os.path.join(base_dir, fname_without_top_level_dir)) + # Shift children out of rdir and into base_dir + rdir_children = os.listdir(os.path.join(tmp_dir, rdir)) + for child in rdir_children: + os.rename( + os.path.join(tmp_dir, rdir, child), + os.path.join(base_dir, child)) def unzip_package(package_path: str, @@ -563,6 +571,8 @@ def unzip_package(package_path: str, logger.debug(f"Unpacking {package_path} to {target_dir}") + with ZipFile(str(package_path), "r") as zip_ref: + zip_ref.extractall(target_dir) if remove_top_level_directory: top_level_directory = get_top_level_dir_from_compressed_package( package_path) @@ -571,20 +581,8 @@ def unzip_package(package_path: str, "a single top level directory. Make sure there " "are no hidden files at the same level as the " "top level directory.") - with ZipFile(str(package_path), "r") as zip_ref: - for fname in zip_ref.namelist(): - extract_file_and_remove_top_level_dir( - base_dir=target_dir, fname=fname, zip_ref=zip_ref) - # Remove now-empty top_level_directory and any empty subdirectories - # left over from extract_file_and_remove_top_level_dir operations - leftover_top_level_directory = os.path.join( - target_dir, top_level_directory) - if os.path.isdir(leftover_top_level_directory): - shutil.rmtree(leftover_top_level_directory) - else: - with ZipFile(str(package_path), "r") as zip_ref: - zip_ref.extractall(target_dir) + remove_dir_from_filepaths(target_dir, top_level_directory) if unlink_zip: Path(package_path).unlink() diff --git a/python/ray/tests/test_runtime_env_packaging.py b/python/ray/tests/test_runtime_env_packaging.py index 67e90bb1f..3f787937f 100644 --- a/python/ray/tests/test_runtime_env_packaging.py +++ b/python/ray/tests/test_runtime_env_packaging.py @@ -1,12 +1,11 @@ import os from pathlib import Path import random -from shutil import rmtree, make_archive +from shutil import copytree, rmtree, make_archive import string import sys import tempfile from filecmp import dircmp -from zipfile import ZipFile import uuid import pytest @@ -16,8 +15,8 @@ from ray.experimental.internal_kv import (_internal_kv_del, from ray._private.runtime_env.packaging import ( _dir_travel, get_uri_for_directory, _get_excludes, upload_package_if_needed, parse_uri, Protocol, - get_top_level_dir_from_compressed_package, - extract_file_and_remove_top_level_dir, unzip_package) + get_top_level_dir_from_compressed_package, remove_dir_from_filepaths, + unzip_package) TOP_LEVEL_DIR_NAME = "top_level" ARCHIVE_NAME = "archive.zip" @@ -168,26 +167,28 @@ class TestGetTopLevelDirFromCompressedPackage: @pytest.mark.skipif(sys.platform == "win32", reason="Fail to create temp dir.") -class TestExtractFileAndRemoveTopLevelDir: - def test_valid_extraction(self, random_zip_file_with_top_level_dir): +class TestRemoveDirFromFilepaths: + def test_valid_removal(self, random_zip_file_with_top_level_dir): + # This test copies the TOP_LEVEL_DIR_NAME directory, and then it + # shifts the contents of the copied directory into the base tmp_path + # directory. Then it compares the contents of tmp_path with the + # TOP_LEVEL_DIR_NAME directory to ensure that they match. + archive_path = random_zip_file_with_top_level_dir tmp_path = archive_path[:archive_path.rfind("/")] - rmtree(os.path.join(tmp_path, TOP_LEVEL_DIR_NAME)) - with ZipFile(archive_path, "r") as zf: - for fname in zf.namelist(): - extract_file_and_remove_top_level_dir( - base_dir=tmp_path, fname=fname, zip_ref=zf) - rmtree(os.path.join(tmp_path, TOP_LEVEL_DIR_NAME)) - with ZipFile(archive_path, "r") as zf: - zf.extractall(tmp_path) + original_dir_path = os.path.join(tmp_path, TOP_LEVEL_DIR_NAME) + copy_dir_path = os.path.join(tmp_path, TOP_LEVEL_DIR_NAME + "_copy") + copytree(original_dir_path, copy_dir_path) + remove_dir_from_filepaths(tmp_path, TOP_LEVEL_DIR_NAME + "_copy") dcmp = dircmp(tmp_path, f"{tmp_path}/{TOP_LEVEL_DIR_NAME}") # Since this test uses the tmp_path as the target directory, and since # the tmp_path also contains the zip file and the top level directory, # make sure that the only difference between the tmp_path's contents - # and the top level directory's contents are the zip file and the top - # level directory itself. This implies that all files have been - # extracted from the top level directory and moved into the tmp_path. + # and the top level directory's contents are the zip file from the + # Pytest fixture and the top level directory itself. This implies that + # all files have been extracted from the top level directory and moved + # into the tmp_path. assert set(dcmp.left_only) == {ARCHIVE_NAME, TOP_LEVEL_DIR_NAME} # Make sure that all the subdirectories and files have been moved to @@ -198,30 +199,64 @@ class TestExtractFileAndRemoveTopLevelDir: @pytest.mark.skipif(sys.platform == "win32", reason="Fail to create temp dir.") @pytest.mark.parametrize("remove_top_level_directory", [False, True]) @pytest.mark.parametrize("unlink_zip", [False, True]) -def test_unzip_package(random_zip_file_with_top_level_dir, - remove_top_level_directory, unlink_zip): - archive_path = random_zip_file_with_top_level_dir - tmp_path = archive_path[:archive_path.rfind("/")] - tmp_subdir = f"{tmp_path}/{TOP_LEVEL_DIR_NAME}_tmp" - unzip_package( - package_path=archive_path, - target_dir=tmp_subdir, - remove_top_level_directory=remove_top_level_directory, - unlink_zip=unlink_zip) +class TestUnzipPackage: + def dcmp_helper(self, remove_top_level_directory, unlink_zip, tmp_subdir, + tmp_path, archive_path): + dcmp = None + if remove_top_level_directory: + dcmp = dircmp(f"{tmp_subdir}", f"{tmp_path}/{TOP_LEVEL_DIR_NAME}") + else: + dcmp = dircmp(f"{tmp_subdir}/{TOP_LEVEL_DIR_NAME}", + f"{tmp_path}/{TOP_LEVEL_DIR_NAME}") + assert len(dcmp.left_only) == 0 + assert len(dcmp.right_only) == 0 - dcmp = None - if remove_top_level_directory: - dcmp = dircmp(f"{tmp_subdir}", f"{tmp_path}/{TOP_LEVEL_DIR_NAME}") - else: - dcmp = dircmp(f"{tmp_subdir}/{TOP_LEVEL_DIR_NAME}", - f"{tmp_path}/{TOP_LEVEL_DIR_NAME}") - assert len(dcmp.left_only) == 0 - assert len(dcmp.right_only) == 0 + if unlink_zip: + assert not Path(archive_path).is_file() + else: + assert Path(archive_path).is_file() - if unlink_zip: - assert not Path(archive_path).is_file() - else: - assert Path(archive_path).is_file() + def test_unzip_package(self, random_zip_file_with_top_level_dir, + remove_top_level_directory, unlink_zip): + archive_path = random_zip_file_with_top_level_dir + tmp_path = archive_path[:archive_path.rfind("/")] + tmp_subdir = f"{tmp_path}/{TOP_LEVEL_DIR_NAME}_tmp" + + unzip_package( + package_path=archive_path, + target_dir=tmp_subdir, + remove_top_level_directory=remove_top_level_directory, + unlink_zip=unlink_zip) + + self.dcmp_helper(remove_top_level_directory, unlink_zip, tmp_subdir, + tmp_path, archive_path) + + def test_unzip_with_matching_subdirectory_names( + self, remove_top_level_directory, unlink_zip): + with tempfile.TemporaryDirectory() as tmp_dir: + path = Path(tmp_dir) + top_level_dir = path / TOP_LEVEL_DIR_NAME + top_level_dir.mkdir(parents=True) + next_level_dir = top_level_dir + for _ in range(10): + dir1 = next_level_dir / TOP_LEVEL_DIR_NAME + dir1.mkdir(parents=True) + next_level_dir = dir1 + make_archive(path / ARCHIVE_NAME[:ARCHIVE_NAME.rfind(".")], "zip", + path, TOP_LEVEL_DIR_NAME) + archive_path = str(path / ARCHIVE_NAME) + + tmp_path = archive_path[:archive_path.rfind("/")] + tmp_subdir = f"{tmp_path}/{TOP_LEVEL_DIR_NAME}_tmp" + + unzip_package( + package_path=archive_path, + target_dir=tmp_subdir, + remove_top_level_directory=remove_top_level_directory, + unlink_zip=unlink_zip) + + self.dcmp_helper(remove_top_level_directory, unlink_zip, + tmp_subdir, tmp_path, archive_path) @pytest.mark.skipif(sys.platform == "win32", reason="Fail to create temp dir.")