ray/python/setup.py

558 lines
19 KiB
Python
Raw Normal View History

import argparse
import errno
import glob
import io
import logging
import os
import re
import shutil
import subprocess
import sys
import tarfile
import tempfile
import zipfile
from itertools import chain
from itertools import takewhile
from enum import Enum
2016-02-22 17:35:03 -08:00
import urllib.error
import urllib.parse
import urllib.request
2016-02-07 15:50:02 -08:00
logger = logging.getLogger(__name__)
SUPPORTED_PYTHONS = [(3, 6), (3, 7), (3, 8), (3, 9)]
SUPPORTED_BAZEL = (3, 4, 1)
ROOT_DIR = os.path.dirname(__file__)
BUILD_JAVA = os.getenv("RAY_INSTALL_JAVA") == "1"
PICKLE5_SUBDIR = os.path.join("ray", "pickle5_files")
THIRDPARTY_SUBDIR = os.path.join("ray", "thirdparty_files")
CLEANABLE_SUBDIRS = [PICKLE5_SUBDIR, THIRDPARTY_SUBDIR]
exe_suffix = ".exe" if sys.platform == "win32" else ""
# .pyd is the extension Python requires on Windows for shared libraries.
# https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll
pyd_suffix = ".pyd" if sys.platform == "win32" else ".so"
pickle5_url = ("https://github.com/pitrou/pickle5-backport/archive/"
"e6117502435aba2901585cc6c692fb9582545f08.tar.gz")
def find_version(*filepath):
# Extract version information from filepath
with open(os.path.join(ROOT_DIR, *filepath)) as fp:
version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
fp.read(), re.M)
if version_match:
return version_match.group(1)
raise RuntimeError("Unable to find version string.")
class SetupType(Enum):
RAY = 1
RAY_CPP = 2
class BuildType(Enum):
DEFAULT = 1
DEBUG = 2
ASAN = 3
class SetupSpec:
def __init__(self, type: SetupType, name: str, description: str,
build_type: BuildType):
self.type: SetupType = type
self.name: str = name
version = find_version("ray", "__init__.py")
# add .dbg suffix if debug mode is on.
if build_type == BuildType.DEBUG:
self.version: str = f"{version}+dbg"
elif build_type == BuildType.ASAN:
self.version: str = f"{version}+asan"
else:
self.version = version
self.description: str = description
self.build_type: BuildType = build_type
self.files_to_include: list = []
self.install_requires: list = []
self.extras: dict = {}
def get_packages(self):
if self.type == SetupType.RAY:
return setuptools.find_packages()
else:
return []
build_type = os.getenv("RAY_DEBUG_BUILD")
if build_type == "debug":
BUILD_TYPE = BuildType.DEBUG
elif build_type == "asan":
BUILD_TYPE = BuildType.ASAN
else:
BUILD_TYPE = BuildType.DEFAULT
if os.getenv("RAY_INSTALL_CPP") == "1":
# "ray-cpp" wheel package.
setup_spec = SetupSpec(
SetupType.RAY_CPP, "ray-cpp",
"A subpackage of Ray which provides the Ray C++ API.", BUILD_TYPE)
else:
# "ray" primary wheel package.
setup_spec = SetupSpec(
SetupType.RAY, "ray", "Ray provides a simple, "
"universal API for building distributed applications.", BUILD_TYPE)
# Ideally, we could include these files by putting them in a
# MANIFEST.in or using the package_data argument to setup, but the
# MANIFEST.in gets applied at the very beginning when setup.py runs
# before these files have been created, so we have to move the files
# manually.
2019-04-02 22:17:33 -07:00
# NOTE: The lists below must be kept in sync with ray/BUILD.bazel.
ray_files = [
"ray/core/src/ray/thirdparty/redis/src/redis-server" + exe_suffix,
"ray/_raylet" + pyd_suffix,
"ray/core/src/ray/gcs/gcs_server" + exe_suffix,
"ray/core/src/ray/raylet/raylet" + exe_suffix,
"ray/streaming/_streaming.so",
]
2020-11-30 13:53:09 +08:00
if BUILD_JAVA or os.path.exists(
os.path.join(ROOT_DIR, "ray/jars/ray_dist.jar")):
ray_files.append("ray/jars/ray_dist.jar")
if setup_spec.type == SetupType.RAY_CPP:
setup_spec.files_to_include += ["ray/core/src/ray/cpp/default_worker"]
# C++ API library and project template files.
setup_spec.files_to_include += [
os.path.join(dirpath, filename)
for dirpath, dirnames, filenames in os.walk("ray/cpp")
for filename in filenames
]
# These are the directories where automatically generated Python protobuf
# bindings are created.
generated_python_directories = [
"ray/core/generated",
"ray/streaming/generated",
]
ray_files.append("ray/nightly-wheels.yaml")
# Autoscaler files.
ray_files += [
"ray/autoscaler/aws/defaults.yaml",
"ray/autoscaler/azure/defaults.yaml",
"ray/autoscaler/_private/_azure/azure-vm-template.json",
"ray/autoscaler/_private/_azure/azure-config-template.json",
"ray/autoscaler/gcp/defaults.yaml",
"ray/autoscaler/local/defaults.yaml",
"ray/autoscaler/kubernetes/defaults.yaml",
"ray/autoscaler/_private/_kubernetes/kubectl-rsync.sh",
"ray/autoscaler/staroid/defaults.yaml",
"ray/autoscaler/ray-schema.json",
[autoscaler] GCP node provider (#2061) * Google Cloud Platform scaffolding * Add minimal gcp config example * Add googleapiclient discoveries, update gcp.config constants * Rename and update gcp.config key pair name function * Implement gcp.config._configure_project * Fix the create project get project flow * Implement gcp.config._configure_iam_role * Implement service account iam binding * Implement gcp.config._configure_key_pair * Implement rsa key pair generation * Implement gcp.config._configure_subnet * Save work-in-progress gcp.config._configure_firewall_rules. These are likely to be not needed at all. Saving them if we happen to need them later. * Remove unnecessary firewall configuration * Update example-minimal.yaml configuration * Add new wait_for_compute_operation, rename old wait_for_operation * Temporarily rename autoscaler tags due to gcp incompatibility * Implement initial gcp.node_provider.nodes * Still missing filter support * Implement initial gcp.node_provider.create_node * Implement another compute wait operation (wait_For_compute_zone_operation). TODO: figure out if we can remove the function. * Implement initial gcp.node_provider._node and node status functions * Implement initial gcp.node_provider.terminate_node * Implement node tagging and ip getter methods for nodes * Temporarily rename tags due to gcp incompatibility * Tiny tweaks for autoscaler.updater * Remove unused config from gcp node_provider * Add new example-full example to gcp, update load_gcp_example_config * Implement label filtering for gcp.node_provider.nodes * Revert unnecessary change in ssh command * Revert "Temporarily rename tags due to gcp incompatibility" This reverts commit e2fe634c5d11d705c0f5d3e76c80c37394bb23fb. * Revert "Temporarily rename autoscaler tags due to gcp incompatibility" This reverts commit c938ee435f4b75854a14e78242ad7f1d1ed8ad4b. * Refactor autoscaler tagging to support multiple tag specs * Remove missing cryptography imports * Update quote function import * Fix threading issue in gcp.config with the compute discovery object * Add gcs support for log_sync * Fix the labels/tags naming discrepancy * Add expanduser to file_mounts hashing * Fix gcp.node_provider.internal_ip * Add uuid to node name * Remove 'set -i' from updater ssh command * Also add TODO with the context and reason for the change. * Update ssh key creation in autoscaler.gcp.config * Fix wait_for_compute_zone_operation's threading issue Google discovery api's compute object is not thread safe, and thus needs to be recreated for each thread. This moves the `wait_for_compute_zone_operation` under `autoscaler.gcp.config`, and adds compute as its argument. * Address pr feedback from @ericl * Expand local file mount paths in NodeUpdater * Add ssh_user name to key names * Update updater ssh to attempt 'set -i' and fall back if that fails * Update gcp/example-full.yaml * Fix wait crm operation in gcp.config * Update gcp/example-minimal.yaml to match aws/example-minimal.yaml * Fix gcp/example-full.yaml comment indentation * Add gcp/example-full.yaml to setup files * Update example-full.yaml command * Revert "Refactor autoscaler tagging to support multiple tag specs" This reverts commit 9cf48409ca2e5b66f800153853072c706fa502f6. * Update tag spec to only use characters [0-9a-z_-] * Change the tag values to conform gcp spec * Add project_id in the ssh key name * Replace '_' with '-' in autoscaler tag names * Revert "Update updater ssh to attempt 'set -i' and fall back if that fails" This reverts commit 23a0066c5254449e49746bd5e43b94b66f32bfb4. * Revert "Remove 'set -i' from updater ssh command" This reverts commit 5fa034cdf79fa7f8903691518c0d75699c630172. * Add fallback to `set -i` in force_interactive command * Update autoscaler tests to match current implementation * Update GCPNodeProvider.create_node to include hash in instance name * Add support for creating multiple instance on one create_node call * Clean TODOs * Update styles * Replace single quotes with double quotes * Some minor indentation fixes etc. * Remove unnecessary comment. Fix indentation. * Yapfify files that fail flake8 test * Yapfify more files * Update project_id handling in gcp node provider * temporary yapf mod * Revert "temporary yapf mod" This reverts commit b6744e4e15d4d936d1a14f4bf155ed1d3bb14126. * Fix autoscaler/updater.py lint error, remove unused variable
2018-05-31 09:00:03 -07:00
]
# Dashboard files.
ray_files += [
os.path.join(dirpath, filename) for dirpath, dirnames, filenames in
os.walk("ray/new_dashboard/client/build") for filename in filenames
]
# If you're adding dependencies for ray extras, please
# also update the matching section of requirements/requirements.txt
# in this directory
if setup_spec.type == SetupType.RAY:
setup_spec.extras = {
"default": [
"aiohttp",
"aiohttp_cors",
"aioredis < 2",
"colorful",
"py-spy >= 0.2.0",
"jsonschema",
"requests",
"gpustat",
"opencensus",
"prometheus_client >= 0.7.1",
],
"serve": ["uvicorn", "requests", "starlette", "fastapi"],
"tune": ["pandas", "tabulate", "tensorboardX>=1.9", "requests"],
"k8s": ["kubernetes", "urllib3"],
"observability": [
"opentelemetry-api==1.1.0", "opentelemetry-sdk==1.1.0",
"opentelemetry-exporter-otlp==1.1.0"
],
"cpp": ["ray-cpp==" + setup_spec.version]
}
if sys.version_info >= (3, 7, 0):
setup_spec.extras["k8s"].append("kopf")
setup_spec.extras["rllib"] = setup_spec.extras["tune"] + [
"dm_tree",
"gym",
"lz4",
# matplotlib (dependency of scikit-image) 3.4.3 breaks docker build
# Todo: Remove this when safe?
"matplotlib!=3.4.3",
"scikit-image",
"pyyaml",
"scipy",
]
setup_spec.extras["all"] = list(
set(chain.from_iterable(setup_spec.extras.values())))
# These are the main dependencies for users of ray. This list
# should be carefully curated. If you change it, please reflect
# the change in the matching section of requirements/requirements.txt
if setup_spec.type == SetupType.RAY:
setup_spec.install_requires = [
"attrs",
"click >= 7.0",
"dataclasses; python_version < '3.7'",
"filelock",
"grpcio >= 1.28.1",
"msgpack >= 1.0.0, < 2.0.0",
"numpy >= 1.16; python_version < '3.9'",
"numpy >= 1.19.3; python_version >= '3.9'",
"protobuf >= 3.15.3",
"pyyaml",
"redis >= 3.5.0",
]
def is_native_windows_or_msys():
"""Check to see if we are running on native Windows,
but NOT WSL (which is seen as Linux)."""
return sys.platform == "msys" or sys.platform == "win32"
def is_invalid_windows_platform():
# 'GCC' check is how you detect MinGW:
# https://github.com/msys2/MINGW-packages/blob/abd06ca92d876b9db05dd65f27d71c4ebe2673a9/mingw-w64-python2/0410-MINGW-build-extensions-with-GCC.patch#L53
platform = sys.platform
ver = sys.version
return platform == "msys" or (platform == "win32" and ver and "GCC" in ver)
# Calls Bazel in PATH, falling back to the standard user installatation path
# (~/.bazel/bin/bazel) if it isn't found.
def bazel_invoke(invoker, cmdline, *args, **kwargs):
home = os.path.expanduser("~")
first_candidate = os.getenv("BAZEL_PATH", "bazel")
candidates = [first_candidate]
if sys.platform == "win32":
mingw_dir = os.getenv("MINGW_DIR")
if mingw_dir:
candidates.append(mingw_dir + "/bin/bazel.exe")
else:
candidates.append(os.path.join(home, ".bazel", "bin", "bazel"))
result = None
for i, cmd in enumerate(candidates):
try:
result = invoker([cmd] + cmdline, *args, **kwargs)
break
except IOError:
if i >= len(candidates) - 1:
raise
return result
def download(url):
try:
result = urllib.request.urlopen(url).read()
except urllib.error.URLError:
# This fallback is necessary on Python 3.5 on macOS due to TLS 1.2.
curl_args = ["curl", "-s", "-L", "-f", "-o", "-", url]
result = subprocess.check_output(curl_args)
return result
# Installs pickle5-backport into the local subdirectory.
def download_pickle5(pickle5_dir):
pickle5_file = urllib.parse.unquote(
urllib.parse.urlparse(pickle5_url).path)
pickle5_name = re.sub("\\.tar\\.gz$", ".tgz", pickle5_file, flags=re.I)
url_path_parts = os.path.splitext(pickle5_name)[0].split("/")
(project, commit) = (url_path_parts[2], url_path_parts[4])
pickle5_archive = download(pickle5_url)
with tempfile.TemporaryDirectory() as work_dir:
tf = tarfile.open(None, "r", io.BytesIO(pickle5_archive))
try:
tf.extractall(work_dir)
finally:
tf.close()
src_dir = os.path.join(work_dir, project + "-" + commit)
args = [sys.executable, "setup.py", "-q", "bdist_wheel"]
subprocess.check_call(args, cwd=src_dir)
for wheel in glob.glob(os.path.join(src_dir, "dist", "*.whl")):
wzf = zipfile.ZipFile(wheel, "r")
try:
wzf.extractall(pickle5_dir)
finally:
wzf.close()
def build(build_python, build_java, build_cpp):
if tuple(sys.version_info[:2]) not in SUPPORTED_PYTHONS:
msg = ("Detected Python version {}, which is not supported. "
"Only Python {} are supported.").format(
".".join(map(str, sys.version_info[:2])),
", ".join(".".join(map(str, v)) for v in SUPPORTED_PYTHONS))
raise RuntimeError(msg)
if is_invalid_windows_platform():
msg = ("Please use official native CPython on Windows,"
" not Cygwin/MSYS/MSYS2/MinGW/etc.\n" +
"Detected: {}\n at: {!r}".format(sys.version, sys.executable))
raise OSError(msg)
bazel_env = dict(os.environ, PYTHON3_BIN_PATH=sys.executable)
if is_native_windows_or_msys():
SHELL = bazel_env.get("SHELL")
if SHELL:
bazel_env.setdefault("BAZEL_SH", os.path.normpath(SHELL))
BAZEL_SH = bazel_env["BAZEL_SH"]
SYSTEMROOT = os.getenv("SystemRoot")
wsl_bash = os.path.join(SYSTEMROOT, "System32", "bash.exe")
if (not BAZEL_SH) and SYSTEMROOT and os.path.isfile(wsl_bash):
msg = ("You appear to have Bash from WSL,"
" which Bazel may invoke unexpectedly. "
"To avoid potential problems,"
" please explicitly set the {name!r}"
" environment variable for Bazel.").format(name="BAZEL_SH")
raise RuntimeError(msg)
# Check if the current Python already has pickle5 (either comes with newer
# Python versions, or has been installed by us before).
pickle5 = None
if sys.version_info >= (3, 8, 2):
import pickle as pickle5
else:
try:
import pickle5
except ImportError:
pass
if not pickle5:
download_pickle5(os.path.join(ROOT_DIR, PICKLE5_SUBDIR))
# Note: We are passing in sys.executable so that we use the same
# version of Python to build packages inside the build.sh script. Note
# that certain flags will not be passed along such as --user or sudo.
# TODO(rkn): Fix this.
if not os.getenv("SKIP_THIRDPARTY_INSTALL"):
pip_packages = ["psutil", "setproctitle==1.2.2", "colorama"]
subprocess.check_call(
[
sys.executable, "-m", "pip", "install", "-q",
"--target=" + os.path.join(ROOT_DIR, THIRDPARTY_SUBDIR)
] + pip_packages,
env=dict(os.environ, CC="gcc"))
version_info = bazel_invoke(subprocess.check_output, ["--version"])
bazel_version_str = version_info.rstrip().decode("utf-8").split(" ", 1)[1]
bazel_version_split = bazel_version_str.split(".")
bazel_version_digits = [
"".join(takewhile(str.isdigit, s)) for s in bazel_version_split
]
bazel_version = tuple(map(int, bazel_version_digits))
if bazel_version < SUPPORTED_BAZEL:
logger.warning("Expected Bazel version {} but found {}".format(
".".join(map(str, SUPPORTED_BAZEL)), bazel_version_str))
bazel_targets = []
bazel_targets += ["//:ray_pkg"] if build_python else []
bazel_targets += ["//cpp:ray_cpp_pkg"] if build_cpp else []
bazel_targets += ["//java:ray_java_pkg"] if build_java else []
bazel_flags = ["--verbose_failures"]
if setup_spec.build_type == BuildType.DEBUG:
bazel_flags.extend(["--config", "debug"])
if setup_spec.build_type == BuildType.ASAN:
bazel_flags.extend(["--config=asan-build"])
return bazel_invoke(
subprocess.check_call,
["build"] + bazel_flags + ["--"] + bazel_targets,
env=bazel_env)
def walk_directory(directory):
file_list = []
for (root, dirs, filenames) in os.walk(directory):
for name in filenames:
file_list.append(os.path.join(root, name))
return file_list
def copy_file(target_dir, filename, rootdir):
# TODO(rkn): This feels very brittle. It may not handle all cases. See
# https://github.com/apache/arrow/blob/master/python/setup.py for an
# example.
# File names can be absolute paths, e.g. from walk_directory().
source = os.path.relpath(filename, rootdir)
destination = os.path.join(target_dir, source)
# Create the target directory if it doesn't already exist.
os.makedirs(os.path.dirname(destination), exist_ok=True)
if not os.path.exists(destination):
if sys.platform == "win32":
# Does not preserve file mode (needed to avoid read-only bit)
shutil.copyfile(source, destination, follow_symlinks=True)
else:
# Preserves file mode (needed to copy executable bit)
shutil.copy(source, destination, follow_symlinks=True)
return 1
return 0
def pip_run(build_ext):
build(True, BUILD_JAVA, True)
if setup_spec.type == SetupType.RAY:
setup_spec.files_to_include += ray_files
# We also need to install pickle5 along with Ray, so make sure that the
# relevant non-Python pickle5 files get copied.
pickle5_dir = os.path.join(ROOT_DIR, PICKLE5_SUBDIR)
setup_spec.files_to_include += walk_directory(
os.path.join(pickle5_dir, "pickle5"))
thirdparty_dir = os.path.join(ROOT_DIR, THIRDPARTY_SUBDIR)
setup_spec.files_to_include += walk_directory(thirdparty_dir)
# Copy over the autogenerated protobuf Python bindings.
for directory in generated_python_directories:
for filename in os.listdir(directory):
if filename[-3:] == ".py":
setup_spec.files_to_include.append(
os.path.join(directory, filename))
copied_files = 0
for filename in setup_spec.files_to_include:
copied_files += copy_file(build_ext.build_lib, filename, ROOT_DIR)
print("# of files copied to {}: {}".format(build_ext.build_lib,
copied_files))
def api_main(program, *args):
parser = argparse.ArgumentParser()
choices = ["build", "bazel_version", "python_versions", "clean", "help"]
parser.add_argument("command", type=str, choices=choices)
parser.add_argument(
"-l",
"--language",
default="python,cpp",
type=str,
help="A list of languages to build native libraries. "
"Supported languages include \"python\" and \"java\". "
"If not specified, only the Python library will be built.")
parsed_args = parser.parse_args(args)
result = None
if parsed_args.command == "build":
kwargs = dict(build_python=False, build_java=False, build_cpp=False)
for lang in parsed_args.language.split(","):
if "python" in lang:
kwargs.update(build_python=True)
elif "java" in lang:
kwargs.update(build_java=True)
elif "cpp" in lang:
kwargs.update(build_cpp=True)
else:
raise ValueError("invalid language: {!r}".format(lang))
result = build(**kwargs)
elif parsed_args.command == "bazel_version":
print(".".join(map(str, SUPPORTED_BAZEL)))
elif parsed_args.command == "python_versions":
for version in SUPPORTED_PYTHONS:
# NOTE: On Windows this will print "\r\n" on the command line.
# Strip it out by piping to tr -d "\r".
print(".".join(map(str, version)))
elif parsed_args.command == "clean":
def onerror(function, path, excinfo):
nonlocal result
if excinfo[1].errno != errno.ENOENT:
msg = excinfo[1].strerror
logger.error("cannot remove {}: {}".format(path, msg))
result = 1
for subdir in CLEANABLE_SUBDIRS:
shutil.rmtree(os.path.join(ROOT_DIR, subdir), onerror=onerror)
elif parsed_args.command == "help":
parser.print_help()
else:
raise ValueError("Invalid command: {!r}".format(parsed_args.command))
return result
if __name__ == "__api__":
api_main(*sys.argv)
if __name__ == "__main__":
import setuptools
import setuptools.command.build_ext
class build_ext(setuptools.command.build_ext.build_ext):
def run(self):
return pip_run(self)
class BinaryDistribution(setuptools.Distribution):
def has_ext_modules(self):
return True
# Ensure no remaining lib files.
build_dir = os.path.join(ROOT_DIR, "build")
if os.path.isdir(build_dir):
shutil.rmtree(build_dir)
setuptools.setup(
name=setup_spec.name,
version=setup_spec.version,
author="Ray Team",
author_email="ray-dev@googlegroups.com",
description=(setup_spec.description),
long_description=io.open(
os.path.join(ROOT_DIR, os.path.pardir, "README.rst"),
"r",
encoding="utf-8").read(),
url="https://github.com/ray-project/ray",
keywords=("ray distributed parallel machine-learning hyperparameter-tuning"
"reinforcement-learning deep-learning serving python"),
classifiers=[
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
packages=setup_spec.get_packages(),
2018-04-11 10:11:35 -07:00
cmdclass={"build_ext": build_ext},
# The BinaryDistribution argument triggers build_ext.
distclass=BinaryDistribution,
install_requires=setup_spec.install_requires,
setup_requires=["cython >= 0.29.15", "wheel"],
extras_require=setup_spec.extras,
2018-07-12 19:12:04 +02:00
entry_points={
"console_scripts": [
"ray=ray.scripts.scripts:main",
"rllib=ray.rllib.scripts:cli [rllib]",
"tune=ray.tune.scripts:cli",
"ray-operator=ray.ray_operator.operator:main",
"serve=ray.serve.scripts:cli",
2018-07-12 19:12:04 +02:00
]
},
2018-04-11 10:11:35 -07:00
include_package_data=True,
zip_safe=False,
license="Apache 2.0") if __name__ == "__main__" else None