From ac39e23145336fa7d450dd170e3e12ffc3d72d83 Mon Sep 17 00:00:00 2001 From: mehrdadn Date: Thu, 16 Jul 2020 09:26:47 -0700 Subject: [PATCH] Get rid of build shell scripts and move them to Python (#6082) --- build.sh | 168 ++----------------------- python/setup.py | 317 ++++++++++++++++++++++++++++++++++++------------ 2 files changed, 247 insertions(+), 238 deletions(-) diff --git a/build.sh b/build.sh index d32b49257..2cf38501e 100755 --- a/build.sh +++ b/build.sh @@ -1,166 +1,14 @@ #!/usr/bin/env bash -set -x - -# Cause the script to exit if a single command fails. -set -e - -# As the supported Python versions change, edit this array: -SUPPORTED_PYTHONS=( "3.5" "3.6" "3.7" "3.8" ) - -ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE:-$0}")"; pwd) - -function usage() -{ - cat <] - -Options: - -h|--help print the help info - -l|--language language1[,language2] - a list of languages to build native libraries. - Supported languages include "python" and "java". - If not specified, only python library will be built. - -p|--python mypython which python executable (default: result of "which python") -EOF -} - -# Determine how many parallel jobs to use for make based on the number of cores -unamestr="$(uname)" -if [[ "$unamestr" == "Linux" ]]; then - PARALLEL=1 -elif [[ "$unamestr" == "Darwin" ]]; then - PARALLEL=$(sysctl -n hw.ncpu) -elif [[ "${OSTYPE}" == "msys" ]]; then - PARALLEL="${NUMBER_OF_PROCESSORS-1}" -else - echo "Unrecognized platform." - exit 1 +if [ "${OSTYPE}" = msys ]; then + echo "WARNING: ${0##*/} is not recommended on MSYS2, as MSYS2 alters the build environment." fi -RAY_BUILD_PYTHON="YES" -RAY_BUILD_JAVA="NO" -PYTHON_EXECUTABLE="" -BUILD_DIR="" - -# Parse options -while [[ $# -gt 0 ]]; do - key="$1" - case $key in - -h|--help) - usage - exit 0 - ;; - -l|--language) - LANGUAGE="$2" - RAY_BUILD_PYTHON="NO" - RAY_BUILD_JAVA="NO" - if [[ "$LANGUAGE" == *"python"* ]]; then - RAY_BUILD_PYTHON="YES" - fi - if [[ "$LANGUAGE" == *"java"* ]]; then - RAY_BUILD_JAVA="YES" - fi - if [ "$RAY_BUILD_PYTHON" == "NO" ] && [ "$RAY_BUILD_JAVA" == "NO" ]; then - echo "Unrecognized language: $LANGUAGE" - exit -1 - fi - shift - ;; - -p|--python) - PYTHON_EXECUTABLE="$2" - shift - ;; - *) - echo "ERROR: unknown option \"$key\"" - echo - usage - exit -1 - ;; - esac - shift -done - -if [[ -z "$PYTHON_EXECUTABLE" ]]; then - PYTHON_EXECUTABLE=$(which python) +if [ -z "${PYTHON3_BIN_PATH-}" ]; then + PYTHON3_BIN_PATH="$(command -v python3 || command -v python || echo python)" fi -PYTHON_VERSION=`"$PYTHON_EXECUTABLE" -c 'import sys; version=sys.version_info[:3]; print("{0}.{1}".format(*version))'` -found= -for allowed in ${SUPPORTED_PYTHONS[@]} -do - if [[ "$PYTHON_VERSION" == $allowed ]] - then - found=$allowed - break - fi -done -if [[ -z $found ]] -then - cat </dev/null; then - pickle5_available=1 - fi - if [ 1 -ne "${pickle5_available}" ]; then - # Install pickle5-backport. - TEMP_DIR="$(mktemp -d)" - pushd "$TEMP_DIR" - curl -f -s -L -R -o "pickle5-backport.zip" "https://github.com/pitrou/pickle5-backport/archive/c0c1a158f59366696161e0dffdd10cfe17601372.zip" - unzip -q pickle5-backport.zip - pushd pickle5-backport-c0c1a158f59366696161e0dffdd10cfe17601372 - CC=gcc "$PYTHON_EXECUTABLE" setup.py --quiet bdist_wheel - unzip -q -o dist/*.whl -d "$pickle5_path" - popd - popd - rm -rf "$TEMP_DIR" - fi - - if [ -z "$SKIP_THIRDPARTY_INSTALL" ]; then - CC=gcc "$PYTHON_EXECUTABLE" -m pip install -q psutil setproctitle \ - --target="$ROOT_DIR/python/ray/thirdparty_files" - fi - - export PYTHON3_BIN_PATH="$PYTHON_EXECUTABLE" - - "$BAZEL_EXECUTABLE" build //:ray_pkg -fi - -popd +BAZEL_SH="${SHELL}" exec \ + "${PYTHON3_BIN_PATH}" -c \ + "import runpy, sys; runpy.run_path(sys.argv.pop(), run_name='__api__')" \ + build "$@" "${0%/*}"/python/setup.py diff --git a/python/setup.py b/python/setup.py index 7a5879a56..d4dfe832c 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,12 +1,20 @@ -from itertools import chain +import argparse +import glob +import io import os import re import shutil import subprocess import sys +import tarfile +import tempfile +import zipfile -from setuptools import setup, find_packages, Distribution -import setuptools.command.build_ext as _build_ext +from itertools import chain + +import urllib.error +import urllib.parse +import urllib.request # Ideally, we could include these files by putting them in a # MANIFEST.in or using the package_data argument to setup, but the @@ -14,12 +22,20 @@ import setuptools.command.build_ext as _build_ext # before these files have been created, so we have to move the files # manually. +SUPPORTED_PYTHONS = [(3, 5), (3, 6), (3, 7), (3, 8)] + +ROOT_DIR = os.path.dirname(__file__) +BUILD_JAVA = os.getenv("RAY_INSTALL_JAVA") == "1" + exe_suffix = ".exe" if sys.platform == "win32" else "" # .pyd is the extension Python requires on Windows for shared libraries. # https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll pyd_suffix = ".pyd" if sys.platform == "win32" else ".so" +pickle5_url = ("https://github.com/pitrou/pickle5-backport/archive/" + "c0c1a158f59366696161e0dffdd10cfe17601372.tar.gz") + # NOTE: The lists below must be kept in sync with ray/BUILD.bazel. ray_files = [ "ray/core/src/ray/thirdparty/redis/src/redis-server" + exe_suffix, @@ -31,8 +47,7 @@ ray_files = [ "ray/streaming/_streaming.so", ] -build_java = os.getenv("RAY_INSTALL_JAVA") == "1" -if build_java: +if BUILD_JAVA: ray_files.append("ray/jars/ray_dist.jar") # These are the directories where automatically generated Python protobuf @@ -72,7 +87,7 @@ optional_ray_files += ray_autoscaler_files optional_ray_files += ray_project_files optional_ray_files += ray_dashboard_files -if "RAY_USE_NEW_GCS" in os.environ and os.environ["RAY_USE_NEW_GCS"] == "on": +if os.getenv("RAY_USE_NEW_GCS") == "on": ray_files += [ "ray/core/src/credis/build/src/libmember.so", "ray/core/src/credis/build/src/libmaster.so", @@ -101,83 +116,145 @@ extras["streaming"] = ["msgpack >= 0.6.2"] extras["all"] = list(set(chain.from_iterable(extras.values()))) -class build_ext(_build_ext.build_ext): - def run(self): - # Note: We are passing in sys.executable so that we use the same - # version of Python to build packages inside the build.sh script. Note - # that certain flags will not be passed along such as --user or sudo. - # TODO(rkn): Fix this. - command = ["../build.sh", "-p", sys.executable] - if sys.platform == "win32" and command[0].lower().endswith(".sh"): - # We can't run .sh files directly in Windows, so find a shell. - # Don't use "bash" instead of "sh", because that might run the Bash - # from WSL! (We want MSYS2's Bash, which is also sh by default.) - shell = os.getenv("BAZEL_SH", "sh") # NOT "bash"! (see above) - command.insert(0, shell) - if build_java: - # Also build binaries for Java if the above env variable exists. - command += ["-l", "python,java"] - subprocess.check_call(command) +def is_native_windows_or_msys(): + """Check to see if we are running on native Windows, + but NOT WSL (which is seen as Linux).""" + return sys.platform == "msys" or sys.platform == "win32" - # We also need to install pickle5 along with Ray, so make sure that the - # relevant non-Python pickle5 files get copied. - pickle5_files = self.walk_directory("./ray/pickle5_files/pickle5") - thirdparty_files = self.walk_directory("./ray/thirdparty_files") +def is_invalid_windows_platform(): + # 'GCC' check is how you detect MinGW: + # https://github.com/msys2/MINGW-packages/blob/abd06ca92d876b9db05dd65f27d71c4ebe2673a9/mingw-w64-python2/0410-MINGW-build-extensions-with-GCC.patch#L53 + platform = sys.platform + ver = sys.version + return platform == "msys" or (platform == "win32" and ver and "GCC" in ver) - files_to_include = ray_files + pickle5_files + thirdparty_files - # Copy over the autogenerated protobuf Python bindings. - for directory in generated_python_directories: - for filename in os.listdir(directory): - if filename[-3:] == ".py": - files_to_include.append(os.path.join(directory, filename)) +def download(url): + try: + result = urllib.request.urlopen(url).read() + except urllib.error.URLError: + # This fallback is necessary on Python 3.5 on macOS due to TLS 1.2. + curl_args = ["curl", "-s", "-L", "-f", "-o", "-", url] + result = subprocess.check_output(curl_args) + return result - for filename in files_to_include: - self.move_file(filename) - # Try to copy over the optional files. - for filename in optional_ray_files: +# Installs pickle5-backport into the local subdirectory. +def download_pickle5(pickle5_dir): + pickle5_file = urllib.parse.unquote( + urllib.parse.urlparse(pickle5_url).path) + pickle5_name = re.sub("\\.tar\\.gz$", ".tgz", pickle5_file, flags=re.I) + url_path_parts = os.path.splitext(pickle5_name)[0].split("/") + (project, commit) = (url_path_parts[2], url_path_parts[4]) + pickle5_archive = download(pickle5_url) + with tempfile.TemporaryDirectory() as work_dir: + tf = tarfile.open(None, "r", io.BytesIO(pickle5_archive)) + try: + tf.extractall(work_dir) + finally: + tf.close() + src_dir = os.path.join(work_dir, project + "-" + commit) + args = [sys.executable, "setup.py", "-q", "bdist_wheel"] + subprocess.check_call(args, cwd=src_dir) + for wheel in glob.glob(os.path.join(src_dir, "dist", "*.whl")): + wzf = zipfile.ZipFile(wheel, "r") try: - self.move_file(filename) - except Exception: - print("Failed to copy optional file {}. This is ok." - .format(filename)) - - def walk_directory(self, directory): - file_list = [] - for (root, dirs, filenames) in os.walk(directory): - for name in filenames: - file_list.append(os.path.join(root, name)) - return file_list - - def move_file(self, filename): - # TODO(rkn): This feels very brittle. It may not handle all cases. See - # https://github.com/apache/arrow/blob/master/python/setup.py for an - # example. - source = filename - destination = os.path.join(self.build_lib, filename) - # Create the target directory if it doesn't already exist. - os.makedirs(os.path.dirname(destination), exist_ok=True) - if not os.path.exists(destination): - print("Copying {} to {}.".format(source, destination)) - if sys.platform == "win32": - # Does not preserve file mode (needed to avoid read-only bit) - shutil.copyfile(source, destination, follow_symlinks=True) - else: - # Preserves file mode (needed to copy executable bit) - shutil.copy(source, destination, follow_symlinks=True) + wzf.extractall(pickle5_dir) + finally: + wzf.close() -class BinaryDistribution(Distribution): - def has_ext_modules(self): - return True +def build(build_python, build_java): + if tuple(sys.version_info[:2]) not in SUPPORTED_PYTHONS: + msg = ("Detected Python version {}, which is not supported. " + "Only Python {} are supported.").format( + ".".join(map(str, sys.version_info[:2])), + ", ".join(".".join(map(str, v)) for v in SUPPORTED_PYTHONS)) + raise RuntimeError(msg) + + if is_invalid_windows_platform(): + msg = ("Please use official native CPython on Windows," + " not Cygwin/MSYS/MSYS2/MinGW/etc.\n" + + "Detected: {}\n at: {!r}".format(sys.version, sys.executable)) + raise OSError(msg) + + if is_native_windows_or_msys(): + BAZEL_SH = os.getenv("BAZEL_SH") + SYSTEMROOT = os.getenv("SystemRoot") + wsl_bash = os.path.join(SYSTEMROOT, "System32", "bash.exe") + if (not BAZEL_SH) and SYSTEMROOT and os.path.isfile(wsl_bash): + msg = ("You appear to have Bash from WSL," + " which Bazel may invoke unexpectedly. " + "To avoid potential problems," + " please explicitly set the {name!r}" + " environment variable for Bazel.").format(name="BAZEL_SH") + raise RuntimeError(msg) + + # Check if the current Python already has pickle5 (either comes with newer + # Python versions, or has been installed by us before). + pickle5 = None + if sys.version_info >= (3, 8, 2): + import pickle as pickle5 + else: + try: + import pickle5 + except ImportError: + pass + if not pickle5: + download_pickle5(os.path.join(ROOT_DIR, "ray", "pickle5_files")) + + # Note: We are passing in sys.executable so that we use the same + # version of Python to build packages inside the build.sh script. Note + # that certain flags will not be passed along such as --user or sudo. + # TODO(rkn): Fix this. + if not os.getenv("SKIP_THIRDPARTY_INSTALL"): + pip_packages = ["psutil", "setproctitle"] + subprocess.check_call( + [ + sys.executable, "-m", "pip", "install", "-q", + "--target=" + os.path.join(ROOT_DIR, "ray", "thirdparty_files") + ] + pip_packages, + env=dict(os.environ, CC="gcc")) + + bazel = os.getenv("BAZEL_EXECUTABLE", "bazel") + bazel_targets = [] + bazel_targets += ["//:ray_pkg"] if build_python else [] + bazel_targets += ["//java:ray_java_pkg"] if build_java else [] + return subprocess.check_call( + [bazel, "build", "--verbose_failures", "--"] + bazel_targets, + env=dict(os.environ, PYTHON3_BIN_PATH=sys.executable)) + + +def walk_directory(directory): + file_list = [] + for (root, dirs, filenames) in os.walk(directory): + for name in filenames: + file_list.append(os.path.join(root, name)) + return file_list + + +def move_file(target_dir, filename): + # TODO(rkn): This feels very brittle. It may not handle all cases. See + # https://github.com/apache/arrow/blob/master/python/setup.py for an + # example. + source = filename + destination = os.path.join(target_dir, filename) + # Create the target directory if it doesn't already exist. + os.makedirs(os.path.dirname(destination), exist_ok=True) + if not os.path.exists(destination): + print("Copying {} to {}.".format(source, destination)) + if sys.platform == "win32": + # Does not preserve file mode (needed to avoid read-only bit) + shutil.copyfile(source, destination, follow_symlinks=True) + else: + # Preserves file mode (needed to copy executable bit) + shutil.copy(source, destination, follow_symlinks=True) def find_version(*filepath): # Extract version information from filepath - here = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(here, *filepath)) as fp: + with open(os.path.join(ROOT_DIR, *filepath)) as fp: version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", fp.read(), re.M) if version_match: @@ -185,7 +262,7 @@ def find_version(*filepath): raise RuntimeError("Unable to find version string.") -requires = [ +install_requires = [ "aiohttp", "click >= 7.0", "colorama", @@ -201,22 +278,106 @@ requires = [ "redis >= 3.3.2, < 3.5.0", ] -setup( + +def pip_run(build_ext): + build(True, BUILD_JAVA) + + files_to_include = list(ray_files) + + # We also need to install pickle5 along with Ray, so make sure that the + # relevant non-Python pickle5 files get copied. + pickle5_dir = os.path.join(ROOT_DIR, "ray", "pickle5_files") + files_to_include += walk_directory(os.path.join(pickle5_dir, "pickle5")) + + thirdparty_dir = os.path.join(ROOT_DIR, "ray", "thirdparty_files") + files_to_include += walk_directory(thirdparty_dir) + + # Copy over the autogenerated protobuf Python bindings. + for directory in generated_python_directories: + for filename in os.listdir(directory): + if filename[-3:] == ".py": + files_to_include.append(os.path.join(directory, filename)) + + for filename in files_to_include: + move_file(build_ext.build_lib, filename) + + # Try to copy over the optional files. + for filename in optional_ray_files: + try: + move_file(build_ext.build_lib, filename) + except Exception: + print("Failed to copy optional file {}. This is ok." + .format(filename)) + + +def api_main(program, *args): + parser = argparse.ArgumentParser() + parser.add_argument("command", type=str, choices=["build", "help"]) + parser.add_argument( + "-l", + "--language", + default="python", + type=str, + help="A list of languages to build native libraries. " + "Supported languages include \"python\" and \"java\". " + "If not specified, only the Python library will be built.") + parsed_args = parser.parse_args(args) + + result = None + + if parsed_args.command == "build": + kwargs = {} + for lang in parsed_args.language.split(","): + if "python" in lang: + kwargs.update(build_python=True) + elif "java" in lang: + kwargs.update(build_java=True) + else: + raise ValueError("invalid language: {!r}".format(lang)) + result = build(**kwargs) + elif parsed_args.command == "help": + parser.print_help() + else: + raise ValueError("Invalid command: {!r}".format(parsed_args.command)) + + return result + + +if __name__ == "__api__": + api_main(*sys.argv) + +if __name__ == "__main__": + import setuptools + import setuptools.command.build_ext + + class build_ext(setuptools.command.build_ext.build_ext): + def run(self): + return pip_run(self) + + class BinaryDistribution(setuptools.Distribution): + def has_ext_modules(self): + return True + + +setuptools.setup( name="ray", version=find_version("ray", "__init__.py"), author="Ray Team", author_email="ray-dev@googlegroups.com", - description=("A system for parallel and distributed Python that unifies " - "the ML ecosystem."), - long_description=open("../README.rst").read(), + description=("A system for parallel and distributed Python that " + "unifies the ML ecosystem."), + long_description=io.open( + os.path.join(ROOT_DIR, os.path.pardir, "README.rst"), + "r", + encoding="utf-8").read(), url="https://github.com/ray-project/ray", keywords=("ray distributed parallel machine-learning " "reinforcement-learning deep-learning python"), - packages=find_packages(), + packages=setuptools.find_packages(), cmdclass={"build_ext": build_ext}, # The BinaryDistribution argument triggers build_ext. distclass=BinaryDistribution, - install_requires=requires, + install_requires=install_requires, setup_requires=["cython >= 0.29.14", "wheel"], extras_require=extras, entry_points={ @@ -227,4 +388,4 @@ setup( }, include_package_data=True, zip_safe=False, - license="Apache 2.0") + license="Apache 2.0") if __name__ == "__main__" else None