[docker] Build Python3.6 & Python3.8 Docker Images (#13548)

This commit is contained in:
Ian Rodney 2021-01-28 15:24:50 -08:00 committed by GitHub
parent 0c906a8b93
commit 813a7ab0e2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 173 additions and 89 deletions

View file

@ -209,10 +209,32 @@ matrix:
- . ./ci/travis/ci.sh test_wheels
- export PATH="$HOME/miniconda3/bin:$PATH"
- python -m pip install docker
- if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py; fi
- if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY37; fi
- bash ./java/build-jar-multiplatform.sh linux
cache: false
# Build Py36 & Py38 Docker Images
- os: linux
env:
- LINUX_WHEELS=1
- DOCKER_BUILD_PY36_38=1
- PYTHONWARNINGS=ignore
language: java
jdk: openjdk8
install:
- . ./ci/travis/ci.sh init RAY_CI_LINUX_WHEELS_AFFECTED
before_script:
- . ./ci/travis/ci.sh build
script:
- wget --quiet "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O miniconda3.sh
- bash miniconda3.sh -b -p "$HOME/miniconda3"
- export PATH="$HOME/miniconda3/bin:$PATH"
- conda install -y python=3.7.6
- python -m pip install docker
- if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY36_PY38; fi
cache: false
# Build and deploy multi-platform jars.
- os: linux
env:
@ -491,7 +513,7 @@ deploy:
- provider: script
edge: true # This supposedly opts in to deploy v2.
script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py
script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY37
skip_cleanup: true
on:
repo: ray-project/ray
@ -530,3 +552,12 @@ deploy:
repo: ray-project/ray
branch: master
condition: $MULTIPLATFORM_JARS = 1 || $MAC_JARS = 1 || $LINUX_JARS = 1
- provider: script
edge: true # This supposedly opts in to deploy v2.
script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY36_PY38
skip_cleanup: true
on:
repo: ray-project/ray
all_branches: true
condition: $LINUX_WHEELS = 1

View file

@ -15,7 +15,7 @@ import docker
print = functools.partial(print, file=sys.stderr, flush=True)
DOCKER_USERNAME = "raytravisbot"
DOCKER_CLIENT = None
PYTHON_WHL_VERSION = "cp37m"
PYTHON_WHL_VERSION = "cp3"
DOCKER_HUB_DESCRIPTION = {
"base-deps": ("Internal Image, refer to "
@ -29,6 +29,8 @@ DOCKER_HUB_DESCRIPTION = {
"https://hub.docker.com/repository/docker/rayproject/ray-ml")
}
PY_MATRIX = {"-py36": "3.6.12", "-py37": "3.7.7", "-py38": "3.8.5"}
def _merge_build():
return os.environ.get("TRAVIS_PULL_REQUEST").lower() == "false"
@ -52,13 +54,18 @@ def _get_root_dir():
return os.path.join(_get_curr_dir(), "../../")
def _get_wheel_name():
matches = glob.glob(
f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}-manylinux*")
assert len(matches) == 1, (
f"Found ({len(matches)}) matches "
f"'*{PYTHON_WHL_VERSION}-manylinux*' instead of 1")
return os.path.basename(matches[0])
def _get_wheel_name(minor_version_number):
if minor_version_number:
matches = glob.glob(f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}"
f"{minor_version_number}*-manylinux*")
assert len(matches) == 1, (
f"Found ({len(matches)}) matches for '*{PYTHON_WHL_VERSION}"
f"{minor_version_number}*-manylinux*' instead of 1")
return os.path.basename(matches[0])
else:
matches = glob.glob(
f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}*-manylinux*")
return [os.path.basename(i) for i in matches]
def _docker_affected():
@ -81,64 +88,76 @@ def _docker_affected():
def _build_cpu_gpu_images(image_name, no_cache=True) -> List[str]:
built_images = []
for gpu in ["-cpu", "-gpu"]:
build_args = {}
if image_name == "base-deps":
build_args["BASE_IMAGE"] = (
"nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04"
if gpu == "-gpu" else "ubuntu:focal")
else:
build_args["GPU"] = gpu
for py_name, py_version in PY_MATRIX.items():
build_args = {}
build_args["PYTHON_VERSION"] = py_version
# I.e. "-py36"[-1] == 6
build_args["PYTHON_MINOR_VERSION"] = py_name[-1]
if "ray" in image_name:
build_args["WHEEL_PATH"] = f".whl/{_get_wheel_name()}"
tagged_name = f"rayproject/{image_name}:nightly{gpu}"
for i in range(2):
output = DOCKER_CLIENT.api.build(
path=os.path.join(_get_root_dir(), "docker", image_name),
tag=tagged_name,
nocache=no_cache,
buildargs=build_args)
full_output = ""
try:
start = datetime.datetime.now()
current_iter = start
for line in output:
if datetime.datetime.now(
) - current_iter >= datetime.timedelta(minutes=5):
current_iter = datetime.datetime.now()
elapsed = datetime.datetime.now() - start
print(f"Still building {tagged_name} after "
f"{elapsed.seconds} seconds")
full_output += line.decode("utf-8")
except Exception as e:
print(f"FAILURE with error {e}")
if len(DOCKER_CLIENT.api.images(tagged_name)) == 0:
print(f"ERROR building: {tagged_name} & error below:")
print(full_output)
if (i == 1):
raise Exception("FAILED TO BUILD IMAGE")
print("TRYING AGAIN")
if image_name == "base-deps":
build_args["BASE_IMAGE"] = (
"nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04"
if gpu == "-gpu" else "ubuntu:focal")
else:
break
# NOTE(ilr) This is a bit of an abuse of the name "GPU"
build_args["GPU"] = f"{py_name}{gpu}"
print("BUILT: ", tagged_name)
built_images.append(tagged_name)
if image_name in ["ray", "ray-deps"]:
wheel = _get_wheel_name(build_args["PYTHON_MINOR_VERSION"])
build_args["WHEEL_PATH"] = f".whl/{wheel}"
tagged_name = f"rayproject/{image_name}:nightly{py_name}{gpu}"
for i in range(2):
cleanup = DOCKER_CLIENT.containers.prune().get(
"SpaceReclaimed")
if cleanup is not None:
print(f"Cleaned up {cleanup / (2**20)}MB")
output = DOCKER_CLIENT.api.build(
path=os.path.join(_get_root_dir(), "docker", image_name),
tag=tagged_name,
nocache=no_cache,
buildargs=build_args)
full_output = ""
try:
start = datetime.datetime.now()
current_iter = start
for line in output:
if datetime.datetime.now(
) - current_iter >= datetime.timedelta(minutes=5):
current_iter = datetime.datetime.now()
elapsed = datetime.datetime.now() - start
print(f"Still building {tagged_name} after "
f"{elapsed.seconds} seconds")
full_output += line.decode("utf-8")
except Exception as e:
print(f"FAILURE with error {e}")
if len(DOCKER_CLIENT.api.images(tagged_name)) == 0:
print(f"ERROR building: {tagged_name} & error below:")
print(full_output)
if (i == 1):
raise Exception("FAILED TO BUILD IMAGE")
print("TRYING AGAIN")
else:
break
print("BUILT: ", tagged_name)
built_images.append(tagged_name)
return built_images
def copy_wheels():
root_dir = _get_root_dir()
wheel = _get_wheel_name()
source = os.path.join(root_dir, ".whl", wheel)
ray_dst = os.path.join(root_dir, "docker/ray/.whl/")
ray_dep_dst = os.path.join(root_dir, "docker/ray-deps/.whl/")
os.makedirs(ray_dst, exist_ok=True)
shutil.copy(source, ray_dst)
os.makedirs(ray_dep_dst, exist_ok=True)
shutil.copy(source, ray_dep_dst)
wheels = _get_wheel_name(None)
for wheel in wheels:
source = os.path.join(root_dir, ".whl", wheel)
ray_dst = os.path.join(root_dir, "docker/ray/.whl/")
ray_dep_dst = os.path.join(root_dir, "docker/ray-deps/.whl/")
os.makedirs(ray_dst, exist_ok=True)
shutil.copy(source, ray_dst)
os.makedirs(ray_dep_dst, exist_ok=True)
shutil.copy(source, ray_dep_dst)
def build_or_pull_base_images(is_docker_affected: bool) -> List[str]:
@ -239,31 +258,48 @@ def push_and_tag_images(push_base_images: bool):
image_list.extend(["base-deps", "ray-deps"])
for image in image_list:
full_image = f"rayproject/{image}"
for py_version in PY_MATRIX.keys():
full_image = f"rayproject/{image}"
# Generate <IMAGE_NAME>:nightly from nightly-cpu
DOCKER_CLIENT.api.tag(
image=f"{full_image}:nightly-cpu",
repository=full_image,
tag="nightly")
for arch_tag in ["-cpu", "-gpu", ""]:
full_arch_tag = f"nightly{arch_tag}"
# Do not tag release builds because they are no longer up to date
# after the branch cut.
if not _release_build():
# Tag and push rayproject/<image>:nightly<arch_tag>
docker_push(full_image, full_arch_tag)
# Ex: specific_tag == "1.0.1" or "<sha>" or "<date>"
specific_tag = get_new_tag(
full_arch_tag, date_tag if "-deps" in image else sha_tag)
# Tag and push rayproject/<image>:<sha/date><arch_tag>
# Tag "nightly-py3x" from "nightly-py3x-cpu"
DOCKER_CLIENT.api.tag(
image=f"{full_image}:{full_arch_tag}",
image=f"{full_image}:nightly{py_version}-cpu",
repository=full_image,
tag=specific_tag)
docker_push(full_image, specific_tag)
tag=f"nightly{py_version}")
for arch_tag in ["-cpu", "-gpu", ""]:
full_arch_tag = f"nightly{py_version}{arch_tag}"
# Do not tag release builds because they are no longer up to
# date after the branch cut.
if not _release_build():
# Tag and push rayproject/<image>:nightly<arch_tag>
docker_push(full_image, full_arch_tag)
# Ex: specific_tag == "1.0.1" or "<sha>" or "<date>"
specific_tag = get_new_tag(
full_arch_tag, date_tag if "-deps" in image else sha_tag)
# Tag and push rayproject/<image>:<sha/date><py_tag><arch_tag>
DOCKER_CLIENT.api.tag(
image=f"{full_image}:{full_arch_tag}",
repository=full_image,
tag=specific_tag)
docker_push(full_image, specific_tag)
if "-py37" in py_version:
non_python_specific_tag = specific_tag.replace("-py37", "")
DOCKER_CLIENT.api.tag(
image=f"{full_image}:{full_arch_tag}",
repository=full_image,
tag=non_python_specific_tag)
docker_push(full_image, non_python_specific_tag)
non_python_nightly_tag = full_arch_tag.replace("-py37", "")
DOCKER_CLIENT.api.tag(
image=f"{full_image}:{full_arch_tag}",
repository=full_image,
tag=non_python_nightly_tag)
docker_push(full_image, non_python_nightly_tag)
# Push infra here:
@ -306,6 +342,14 @@ def push_readmes():
if __name__ == "__main__":
print("RUNNING WITH: ", sys.version)
if len(sys.argv) == 2:
version_to_drop = sys.argv[1]
if version_to_drop == "PY37":
PY_MATRIX.pop("-py36")
PY_MATRIX.pop("-py38")
else:
PY_MATRIX.pop("-py37")
print("Building the following python versions: ", PY_MATRIX)
if os.environ.get("TRAVIS") == "true":
is_docker_affected = _docker_affected()
if _merge_build() or is_docker_affected:

View file

@ -124,6 +124,8 @@ if __name__ == "__main__":
for prefix in skip_prefix_list):
# nothing is run but linting in these cases
pass
elif changed_file.endswith("build-docker-images.py"):
RAY_CI_DOCKER_AFFECTED = 1
elif changed_file.startswith("src/"):
RAY_CI_TUNE_AFFECTED = 1
RAY_CI_SGD_AFFECTED = 1

View file

@ -30,6 +30,8 @@ RUN sudo apt-get update -y && sudo apt-get upgrade -y \
git \
wget \
cmake \
g++ \
zlib1g-dev \
$(if [ "$AUTOSCALER" = "autoscaler" ]; then echo \
tmux \
screen \
@ -52,12 +54,14 @@ RUN sudo apt-get update -y && sudo apt-get upgrade -y \
numpy==1.15.4 \
psutil \
blist \
atari-py \
# blist is needed for numpy (which is re-installed when ray is installed)
# atari-py is built from source for Python 3.8 (requires g++ & zlib1g-dev)
# To avoid the following error on Jenkins:
# AttributeError: 'numpy.ufunc' object has no attribute '__module__'
&& $HOME/anaconda3/bin/pip uninstall -y dask \
# We install cmake temporarily to get psutil
&& sudo apt-get autoremove -y cmake \
# We install cmake temporarily to get psutil, blist & atari-py
&& sudo apt-get autoremove -y cmake g++ zlib1g-dev \
# Either install kubectl or remove wget
&& (if [ "$AUTOSCALER" = "autoscaler" ]; \
then wget -O - -q https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - \

View file

@ -1,12 +1,13 @@
ARG GPU
FROM rayproject/ray:nightly"$GPU"
ARG PYTHON_MINOR_VERSION=7
# We have to uninstall wrapt this way for Tensorflow compatibility
COPY requirements.txt ./
COPY requirements_ml_docker.txt ./
COPY requirements_rllib.txt ./
# Docker image uses Python 3.7
COPY linux-py3.7-requirements_tune.txt ./requirements_tune.txt
COPY linux-py3."$PYTHON_MINOR_VERSION"-requirements_tune.txt ./requirements_tune.txt
RUN sudo apt-get update \
&& sudo apt-get install -y gcc \
@ -14,12 +15,13 @@ RUN sudo apt-get update \
libgtk2.0-dev \
zlib1g-dev \
libgl1-mesa-dev \
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_ml_docker.txt \
&& $HOME/anaconda3/bin/pip --use-deprecated=legacy-resolver --no-cache-dir install -r requirements.txt \
&& $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements_rllib.txt \
&& $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements_tune.txt \
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_ml_docker.txt \
# Remove dataclasses & typing because they are included in Py3.7
&& $HOME/anaconda3/bin/pip uninstall dataclasses typing -y \
# Remove dataclasses & typing because they are included in Python > 3.6
&& if [ $(python -c 'import sys; print(sys.version_info.minor)') != "6" ]; then \
$HOME/anaconda3/bin/pip uninstall dataclasses typing -y; fi \
&& sudo rm requirements.txt && sudo rm requirements_ml_docker.txt \
&& sudo rm requirements_tune.txt && sudo rm requirements_rllib.txt \
&& sudo apt-get clean

View file

@ -4,3 +4,4 @@ tensorflow-gpu>=2.4.0
torch==1.7.1+cu110
-f https://download.pytorch.org/whl/torch_stable.html
torchvision==0.8.2+cu110
pip; python_version > "3.7"