From 813a7ab0e260a4623ecf99deee959dde27aa81cb Mon Sep 17 00:00:00 2001 From: Ian Rodney Date: Thu, 28 Jan 2021 15:24:50 -0800 Subject: [PATCH] [docker] Build Python3.6 & Python3.8 Docker Images (#13548) --- .travis.yml | 35 ++++- ci/travis/build-docker-images.py | 204 +++++++++++++++++----------- ci/travis/determine_tests_to_run.py | 2 + docker/base-deps/Dockerfile | 8 +- docker/ray-ml/Dockerfile | 10 +- python/requirements_ml_docker.txt | 3 +- 6 files changed, 173 insertions(+), 89 deletions(-) diff --git a/.travis.yml b/.travis.yml index 4d8f8ddd1..8cff56d41 100644 --- a/.travis.yml +++ b/.travis.yml @@ -209,10 +209,32 @@ matrix: - . ./ci/travis/ci.sh test_wheels - export PATH="$HOME/miniconda3/bin:$PATH" - python -m pip install docker - - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py; fi + - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY37; fi - bash ./java/build-jar-multiplatform.sh linux cache: false + + # Build Py36 & Py38 Docker Images + - os: linux + env: + - LINUX_WHEELS=1 + - DOCKER_BUILD_PY36_38=1 + - PYTHONWARNINGS=ignore + language: java + jdk: openjdk8 + install: + - . ./ci/travis/ci.sh init RAY_CI_LINUX_WHEELS_AFFECTED + before_script: + - . ./ci/travis/ci.sh build + script: + - wget --quiet "https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh" -O miniconda3.sh + - bash miniconda3.sh -b -p "$HOME/miniconda3" + - export PATH="$HOME/miniconda3/bin:$PATH" + - conda install -y python=3.7.6 + - python -m pip install docker + - if [[ "$TRAVIS_PULL_REQUEST" != "false" ]]; then python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY36_PY38; fi + cache: false + # Build and deploy multi-platform jars. - os: linux env: @@ -491,7 +513,7 @@ deploy: - provider: script edge: true # This supposedly opts in to deploy v2. - script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py + script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY37 skip_cleanup: true on: repo: ray-project/ray @@ -530,3 +552,12 @@ deploy: repo: ray-project/ray branch: master condition: $MULTIPLATFORM_JARS = 1 || $MAC_JARS = 1 || $LINUX_JARS = 1 + + - provider: script + edge: true # This supposedly opts in to deploy v2. + script: export PATH="$HOME/miniconda3/bin:$PATH"; ./ci/keep_alive python $TRAVIS_BUILD_DIR/ci/travis/build-docker-images.py PY36_PY38 + skip_cleanup: true + on: + repo: ray-project/ray + all_branches: true + condition: $LINUX_WHEELS = 1 \ No newline at end of file diff --git a/ci/travis/build-docker-images.py b/ci/travis/build-docker-images.py index c549bc95e..ad69a15db 100644 --- a/ci/travis/build-docker-images.py +++ b/ci/travis/build-docker-images.py @@ -15,7 +15,7 @@ import docker print = functools.partial(print, file=sys.stderr, flush=True) DOCKER_USERNAME = "raytravisbot" DOCKER_CLIENT = None -PYTHON_WHL_VERSION = "cp37m" +PYTHON_WHL_VERSION = "cp3" DOCKER_HUB_DESCRIPTION = { "base-deps": ("Internal Image, refer to " @@ -29,6 +29,8 @@ DOCKER_HUB_DESCRIPTION = { "https://hub.docker.com/repository/docker/rayproject/ray-ml") } +PY_MATRIX = {"-py36": "3.6.12", "-py37": "3.7.7", "-py38": "3.8.5"} + def _merge_build(): return os.environ.get("TRAVIS_PULL_REQUEST").lower() == "false" @@ -52,13 +54,18 @@ def _get_root_dir(): return os.path.join(_get_curr_dir(), "../../") -def _get_wheel_name(): - matches = glob.glob( - f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}-manylinux*") - assert len(matches) == 1, ( - f"Found ({len(matches)}) matches " - f"'*{PYTHON_WHL_VERSION}-manylinux*' instead of 1") - return os.path.basename(matches[0]) +def _get_wheel_name(minor_version_number): + if minor_version_number: + matches = glob.glob(f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}" + f"{minor_version_number}*-manylinux*") + assert len(matches) == 1, ( + f"Found ({len(matches)}) matches for '*{PYTHON_WHL_VERSION}" + f"{minor_version_number}*-manylinux*' instead of 1") + return os.path.basename(matches[0]) + else: + matches = glob.glob( + f"{_get_root_dir()}/.whl/*{PYTHON_WHL_VERSION}*-manylinux*") + return [os.path.basename(i) for i in matches] def _docker_affected(): @@ -81,64 +88,76 @@ def _docker_affected(): def _build_cpu_gpu_images(image_name, no_cache=True) -> List[str]: built_images = [] for gpu in ["-cpu", "-gpu"]: - build_args = {} - if image_name == "base-deps": - build_args["BASE_IMAGE"] = ( - "nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04" - if gpu == "-gpu" else "ubuntu:focal") - else: - build_args["GPU"] = gpu + for py_name, py_version in PY_MATRIX.items(): + build_args = {} + build_args["PYTHON_VERSION"] = py_version + # I.e. "-py36"[-1] == 6 + build_args["PYTHON_MINOR_VERSION"] = py_name[-1] - if "ray" in image_name: - build_args["WHEEL_PATH"] = f".whl/{_get_wheel_name()}" - - tagged_name = f"rayproject/{image_name}:nightly{gpu}" - for i in range(2): - output = DOCKER_CLIENT.api.build( - path=os.path.join(_get_root_dir(), "docker", image_name), - tag=tagged_name, - nocache=no_cache, - buildargs=build_args) - - full_output = "" - try: - start = datetime.datetime.now() - current_iter = start - for line in output: - if datetime.datetime.now( - ) - current_iter >= datetime.timedelta(minutes=5): - current_iter = datetime.datetime.now() - elapsed = datetime.datetime.now() - start - print(f"Still building {tagged_name} after " - f"{elapsed.seconds} seconds") - full_output += line.decode("utf-8") - except Exception as e: - print(f"FAILURE with error {e}") - - if len(DOCKER_CLIENT.api.images(tagged_name)) == 0: - print(f"ERROR building: {tagged_name} & error below:") - print(full_output) - if (i == 1): - raise Exception("FAILED TO BUILD IMAGE") - print("TRYING AGAIN") + if image_name == "base-deps": + build_args["BASE_IMAGE"] = ( + "nvidia/cuda:11.0-cudnn8-runtime-ubuntu18.04" + if gpu == "-gpu" else "ubuntu:focal") else: - break + # NOTE(ilr) This is a bit of an abuse of the name "GPU" + build_args["GPU"] = f"{py_name}{gpu}" - print("BUILT: ", tagged_name) - built_images.append(tagged_name) + if image_name in ["ray", "ray-deps"]: + wheel = _get_wheel_name(build_args["PYTHON_MINOR_VERSION"]) + build_args["WHEEL_PATH"] = f".whl/{wheel}" + + tagged_name = f"rayproject/{image_name}:nightly{py_name}{gpu}" + for i in range(2): + cleanup = DOCKER_CLIENT.containers.prune().get( + "SpaceReclaimed") + if cleanup is not None: + print(f"Cleaned up {cleanup / (2**20)}MB") + output = DOCKER_CLIENT.api.build( + path=os.path.join(_get_root_dir(), "docker", image_name), + tag=tagged_name, + nocache=no_cache, + buildargs=build_args) + + full_output = "" + try: + start = datetime.datetime.now() + current_iter = start + for line in output: + if datetime.datetime.now( + ) - current_iter >= datetime.timedelta(minutes=5): + current_iter = datetime.datetime.now() + elapsed = datetime.datetime.now() - start + print(f"Still building {tagged_name} after " + f"{elapsed.seconds} seconds") + full_output += line.decode("utf-8") + except Exception as e: + print(f"FAILURE with error {e}") + + if len(DOCKER_CLIENT.api.images(tagged_name)) == 0: + print(f"ERROR building: {tagged_name} & error below:") + print(full_output) + if (i == 1): + raise Exception("FAILED TO BUILD IMAGE") + print("TRYING AGAIN") + else: + break + + print("BUILT: ", tagged_name) + built_images.append(tagged_name) return built_images def copy_wheels(): root_dir = _get_root_dir() - wheel = _get_wheel_name() - source = os.path.join(root_dir, ".whl", wheel) - ray_dst = os.path.join(root_dir, "docker/ray/.whl/") - ray_dep_dst = os.path.join(root_dir, "docker/ray-deps/.whl/") - os.makedirs(ray_dst, exist_ok=True) - shutil.copy(source, ray_dst) - os.makedirs(ray_dep_dst, exist_ok=True) - shutil.copy(source, ray_dep_dst) + wheels = _get_wheel_name(None) + for wheel in wheels: + source = os.path.join(root_dir, ".whl", wheel) + ray_dst = os.path.join(root_dir, "docker/ray/.whl/") + ray_dep_dst = os.path.join(root_dir, "docker/ray-deps/.whl/") + os.makedirs(ray_dst, exist_ok=True) + shutil.copy(source, ray_dst) + os.makedirs(ray_dep_dst, exist_ok=True) + shutil.copy(source, ray_dep_dst) def build_or_pull_base_images(is_docker_affected: bool) -> List[str]: @@ -239,31 +258,48 @@ def push_and_tag_images(push_base_images: bool): image_list.extend(["base-deps", "ray-deps"]) for image in image_list: - full_image = f"rayproject/{image}" + for py_version in PY_MATRIX.keys(): + full_image = f"rayproject/{image}" - # Generate :nightly from nightly-cpu - DOCKER_CLIENT.api.tag( - image=f"{full_image}:nightly-cpu", - repository=full_image, - tag="nightly") - - for arch_tag in ["-cpu", "-gpu", ""]: - full_arch_tag = f"nightly{arch_tag}" - # Do not tag release builds because they are no longer up to date - # after the branch cut. - if not _release_build(): - # Tag and push rayproject/:nightly - docker_push(full_image, full_arch_tag) - - # Ex: specific_tag == "1.0.1" or "" or "" - specific_tag = get_new_tag( - full_arch_tag, date_tag if "-deps" in image else sha_tag) - # Tag and push rayproject/: + # Tag "nightly-py3x" from "nightly-py3x-cpu" DOCKER_CLIENT.api.tag( - image=f"{full_image}:{full_arch_tag}", + image=f"{full_image}:nightly{py_version}-cpu", repository=full_image, - tag=specific_tag) - docker_push(full_image, specific_tag) + tag=f"nightly{py_version}") + + for arch_tag in ["-cpu", "-gpu", ""]: + full_arch_tag = f"nightly{py_version}{arch_tag}" + # Do not tag release builds because they are no longer up to + # date after the branch cut. + if not _release_build(): + # Tag and push rayproject/:nightly + docker_push(full_image, full_arch_tag) + + # Ex: specific_tag == "1.0.1" or "" or "" + specific_tag = get_new_tag( + full_arch_tag, date_tag if "-deps" in image else sha_tag) + + # Tag and push rayproject/: + DOCKER_CLIENT.api.tag( + image=f"{full_image}:{full_arch_tag}", + repository=full_image, + tag=specific_tag) + docker_push(full_image, specific_tag) + + if "-py37" in py_version: + non_python_specific_tag = specific_tag.replace("-py37", "") + DOCKER_CLIENT.api.tag( + image=f"{full_image}:{full_arch_tag}", + repository=full_image, + tag=non_python_specific_tag) + docker_push(full_image, non_python_specific_tag) + + non_python_nightly_tag = full_arch_tag.replace("-py37", "") + DOCKER_CLIENT.api.tag( + image=f"{full_image}:{full_arch_tag}", + repository=full_image, + tag=non_python_nightly_tag) + docker_push(full_image, non_python_nightly_tag) # Push infra here: @@ -306,6 +342,14 @@ def push_readmes(): if __name__ == "__main__": print("RUNNING WITH: ", sys.version) + if len(sys.argv) == 2: + version_to_drop = sys.argv[1] + if version_to_drop == "PY37": + PY_MATRIX.pop("-py36") + PY_MATRIX.pop("-py38") + else: + PY_MATRIX.pop("-py37") + print("Building the following python versions: ", PY_MATRIX) if os.environ.get("TRAVIS") == "true": is_docker_affected = _docker_affected() if _merge_build() or is_docker_affected: diff --git a/ci/travis/determine_tests_to_run.py b/ci/travis/determine_tests_to_run.py index 70eefc16a..cba016fcf 100644 --- a/ci/travis/determine_tests_to_run.py +++ b/ci/travis/determine_tests_to_run.py @@ -124,6 +124,8 @@ if __name__ == "__main__": for prefix in skip_prefix_list): # nothing is run but linting in these cases pass + elif changed_file.endswith("build-docker-images.py"): + RAY_CI_DOCKER_AFFECTED = 1 elif changed_file.startswith("src/"): RAY_CI_TUNE_AFFECTED = 1 RAY_CI_SGD_AFFECTED = 1 diff --git a/docker/base-deps/Dockerfile b/docker/base-deps/Dockerfile index 278fad1ec..e00ca141c 100644 --- a/docker/base-deps/Dockerfile +++ b/docker/base-deps/Dockerfile @@ -30,6 +30,8 @@ RUN sudo apt-get update -y && sudo apt-get upgrade -y \ git \ wget \ cmake \ + g++ \ + zlib1g-dev \ $(if [ "$AUTOSCALER" = "autoscaler" ]; then echo \ tmux \ screen \ @@ -52,12 +54,14 @@ RUN sudo apt-get update -y && sudo apt-get upgrade -y \ numpy==1.15.4 \ psutil \ blist \ + atari-py \ # blist is needed for numpy (which is re-installed when ray is installed) + # atari-py is built from source for Python 3.8 (requires g++ & zlib1g-dev) # To avoid the following error on Jenkins: # AttributeError: 'numpy.ufunc' object has no attribute '__module__' && $HOME/anaconda3/bin/pip uninstall -y dask \ - # We install cmake temporarily to get psutil - && sudo apt-get autoremove -y cmake \ + # We install cmake temporarily to get psutil, blist & atari-py + && sudo apt-get autoremove -y cmake g++ zlib1g-dev \ # Either install kubectl or remove wget && (if [ "$AUTOSCALER" = "autoscaler" ]; \ then wget -O - -q https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - \ diff --git a/docker/ray-ml/Dockerfile b/docker/ray-ml/Dockerfile index 25211085e..908351df1 100644 --- a/docker/ray-ml/Dockerfile +++ b/docker/ray-ml/Dockerfile @@ -1,12 +1,13 @@ ARG GPU FROM rayproject/ray:nightly"$GPU" +ARG PYTHON_MINOR_VERSION=7 # We have to uninstall wrapt this way for Tensorflow compatibility COPY requirements.txt ./ COPY requirements_ml_docker.txt ./ COPY requirements_rllib.txt ./ # Docker image uses Python 3.7 -COPY linux-py3.7-requirements_tune.txt ./requirements_tune.txt +COPY linux-py3."$PYTHON_MINOR_VERSION"-requirements_tune.txt ./requirements_tune.txt RUN sudo apt-get update \ && sudo apt-get install -y gcc \ @@ -14,12 +15,13 @@ RUN sudo apt-get update \ libgtk2.0-dev \ zlib1g-dev \ libgl1-mesa-dev \ + && $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_ml_docker.txt \ && $HOME/anaconda3/bin/pip --use-deprecated=legacy-resolver --no-cache-dir install -r requirements.txt \ && $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements_rllib.txt \ && $HOME/anaconda3/bin/pip --no-cache-dir install -r requirements_tune.txt \ - && $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_ml_docker.txt \ - # Remove dataclasses & typing because they are included in Py3.7 - && $HOME/anaconda3/bin/pip uninstall dataclasses typing -y \ + # Remove dataclasses & typing because they are included in Python > 3.6 + && if [ $(python -c 'import sys; print(sys.version_info.minor)') != "6" ]; then \ + $HOME/anaconda3/bin/pip uninstall dataclasses typing -y; fi \ && sudo rm requirements.txt && sudo rm requirements_ml_docker.txt \ && sudo rm requirements_tune.txt && sudo rm requirements_rllib.txt \ && sudo apt-get clean diff --git a/python/requirements_ml_docker.txt b/python/requirements_ml_docker.txt index c61ba0c05..bbecb5bd8 100644 --- a/python/requirements_ml_docker.txt +++ b/python/requirements_ml_docker.txt @@ -3,4 +3,5 @@ tensorflow-gpu>=2.4.0 -f https://download.pytorch.org/whl/torch_stable.html torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html -torchvision==0.8.2+cu110 \ No newline at end of file +torchvision==0.8.2+cu110 +pip; python_version > "3.7"