mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
This reverts commit e58fcca404
.
This commit is contained in:
parent
47744d282c
commit
db863aafc0
16 changed files with 458 additions and 229 deletions
|
@ -75,37 +75,69 @@
|
||||||
# # Upload to latest directory.
|
# # Upload to latest directory.
|
||||||
# - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
|
# - if [ "$BUILDKITE_BRANCH" == "master" ]; then python .buildkite/copy_files.py --destination wheels --path ./.whl; fi
|
||||||
|
|
||||||
- label: ":docker: Build Images: py36"
|
- label: ":docker: Build Images: py36 (1/2)"
|
||||||
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
commands:
|
commands:
|
||||||
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
- pip install -q docker aws_requests_auth boto3
|
- pip install -q docker aws_requests_auth boto3
|
||||||
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
- python ./ci/travis/build-docker-images.py --py-versions PY36 --build-type BUILDKITE --build-base
|
- python ./ci/travis/build-docker-images.py --py-versions py36 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
- label: ":docker: Build Images: py37"
|
- label: ":docker: Build Images: py36 (2/2)"
|
||||||
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
commands:
|
commands:
|
||||||
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
- pip install -q docker aws_requests_auth boto3
|
- pip install -q docker aws_requests_auth boto3
|
||||||
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
- python ./ci/travis/build-docker-images.py --py-versions PY37 --build-type BUILDKITE --build-base
|
- python ./ci/travis/build-docker-images.py --py-versions py36 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
- label: ":docker: Build Images: py38"
|
- label: ":docker: Build Images: py37 (1/2)"
|
||||||
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
commands:
|
commands:
|
||||||
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
- pip install -q docker aws_requests_auth boto3
|
- pip install -q docker aws_requests_auth boto3
|
||||||
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
- python ./ci/travis/build-docker-images.py --py-versions PY38 --build-type BUILDKITE --build-base
|
- python ./ci/travis/build-docker-images.py --py-versions py37 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
- label: ":docker: Build Images: py39"
|
- label: ":docker: Build Images: py37 (2/2)"
|
||||||
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
commands:
|
commands:
|
||||||
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
- pip install -q docker aws_requests_auth boto3
|
- pip install -q docker aws_requests_auth boto3
|
||||||
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
- python ./ci/travis/build-docker-images.py --py-versions PY39 --build-type BUILDKITE --build-base
|
- python ./ci/travis/build-docker-images.py --py-versions py37 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
|
- label: ":docker: Build Images: py38 (1/2)"
|
||||||
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
|
commands:
|
||||||
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
|
- pip install -q docker aws_requests_auth boto3
|
||||||
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
|
- python ./ci/travis/build-docker-images.py --py-versions py38 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
|
- label: ":docker: Build Images: py38 (2/2)"
|
||||||
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
|
commands:
|
||||||
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
|
- pip install -q docker aws_requests_auth boto3
|
||||||
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
|
- python ./ci/travis/build-docker-images.py --py-versions py38 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
|
- label: ":docker: Build Images: py39 (1/2)"
|
||||||
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
|
commands:
|
||||||
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
|
- pip install -q docker aws_requests_auth boto3
|
||||||
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
|
- python ./ci/travis/build-docker-images.py --py-versions py39 --device-types cpu cu101 cu102 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
|
- label: ":docker: Build Images: py39 (2/2)"
|
||||||
|
conditions: ["RAY_CI_LINUX_WHEELS_AFFECTED"]
|
||||||
|
commands:
|
||||||
|
- LINUX_WHEELS=1 ./ci/travis/ci.sh build
|
||||||
|
- pip install -q docker aws_requests_auth boto3
|
||||||
|
- if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then python .buildkite/copy_files.py --destination docker_login; fi
|
||||||
|
- python ./ci/travis/build-docker-images.py --py-versions py39 --device-types cu110 cu111 cu112 --build-type BUILDKITE --build-base
|
||||||
|
|
||||||
- label: ":book: Lint"
|
- label: ":book: Lint"
|
||||||
commands:
|
commands:
|
||||||
|
|
|
@ -3,11 +3,13 @@ import datetime
|
||||||
import json
|
import json
|
||||||
import functools
|
import functools
|
||||||
import glob
|
import glob
|
||||||
|
import itertools
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
import docker
|
import docker
|
||||||
|
@ -24,18 +26,39 @@ DOCKER_HUB_DESCRIPTION = {
|
||||||
"https://hub.docker.com/r/rayproject/ray"),
|
"https://hub.docker.com/r/rayproject/ray"),
|
||||||
"ray": "Official Docker Images for Ray, the distributed computing API.",
|
"ray": "Official Docker Images for Ray, the distributed computing API.",
|
||||||
"ray-ml": "Developer ready Docker Image for Ray.",
|
"ray-ml": "Developer ready Docker Image for Ray.",
|
||||||
"autoscaler": (
|
|
||||||
"Deprecated image, please use: "
|
|
||||||
"https://hub.docker.com/repository/docker/rayproject/ray-ml")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
PY_MATRIX = {
|
PY_MATRIX = {
|
||||||
"-py36": "3.6.12",
|
"py36": "3.6.12",
|
||||||
"-py37": "3.7.7",
|
"py37": "3.7.7",
|
||||||
"-py38": "3.8.5",
|
"py38": "3.8.5",
|
||||||
"-py39": "3.9.5"
|
"py39": "3.9.5"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BASE_IMAGES = {
|
||||||
|
"cu112": "nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04",
|
||||||
|
"cu111": "nvidia/cuda:11.1.1-cudnn8-devel-ubuntu18.04",
|
||||||
|
"cu110": "nvidia/cuda:11.0.3-cudnn8-devel-ubuntu18.04",
|
||||||
|
"cu102": "nvidia/cuda:10.2-cudnn8-devel-ubuntu18.04",
|
||||||
|
"cu101": "nvidia/cuda:10.1-cudnn8-devel-ubuntu18.04",
|
||||||
|
"cpu": "ubuntu:focal",
|
||||||
|
}
|
||||||
|
|
||||||
|
CUDA_FULL = {
|
||||||
|
"cu112": "CUDA 11.2",
|
||||||
|
"cu111": "CUDA 11.1",
|
||||||
|
"cu110": "CUDA 11.0",
|
||||||
|
"cu102": "CUDA 10.2",
|
||||||
|
"cu101": "CUDA 10.1"
|
||||||
|
}
|
||||||
|
|
||||||
|
# The CUDA version to use for the ML Docker image.
|
||||||
|
ML_CUDA_VERSION = "cu112"
|
||||||
|
|
||||||
|
DEFAULT_PYTHON_VERSION = "py37"
|
||||||
|
|
||||||
|
IMAGE_NAMES = list(DOCKER_HUB_DESCRIPTION.keys())
|
||||||
|
|
||||||
|
|
||||||
def _get_branch():
|
def _get_branch():
|
||||||
branch = (os.environ.get("TRAVIS_BRANCH")
|
branch = (os.environ.get("TRAVIS_BRANCH")
|
||||||
|
@ -119,83 +142,117 @@ def _check_if_docker_files_modified():
|
||||||
return affected
|
return affected
|
||||||
|
|
||||||
|
|
||||||
def _build_cpu_gpu_images(image_name, no_cache=True) -> List[str]:
|
def _build_docker_image(image_name: str,
|
||||||
built_images = []
|
py_version: str,
|
||||||
for gpu in ["-cpu", "-gpu"]:
|
image_type: str,
|
||||||
for py_name, py_version in PY_MATRIX.items():
|
no_cache=True):
|
||||||
# TODO(https://github.com/ray-project/ray/issues/16599):
|
"""Builds Docker image with the provided info.
|
||||||
# remove below after supporting ray-ml images with Python 3.9
|
|
||||||
if image_name in ["ray-ml", "autoscaler"
|
|
||||||
] and py_version.startswith("3.9"):
|
|
||||||
print(f"{image_name} image is currently unsupported with "
|
|
||||||
"Python 3.9")
|
|
||||||
continue
|
|
||||||
|
|
||||||
build_args = {}
|
image_name (str): The name of the image to build. Must be one of
|
||||||
build_args["PYTHON_VERSION"] = py_version
|
IMAGE_NAMES.
|
||||||
# I.e. "-py36"[-1] == 6
|
py_version (str): The Python version to build the image for.
|
||||||
build_args["PYTHON_MINOR_VERSION"] = py_name[-1]
|
Must be one of PY_MATRIX.keys()
|
||||||
|
image_type (str): The image type to build. Must be one of
|
||||||
|
BASE_IMAGES.keys()
|
||||||
|
no_cache (bool): If True, don't use caching when building the image.
|
||||||
|
"""
|
||||||
|
|
||||||
if image_name == "base-deps":
|
if image_name not in IMAGE_NAMES:
|
||||||
build_args["BASE_IMAGE"] = (
|
raise ValueError(
|
||||||
"nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04"
|
f"The provided image name {image_name} is not "
|
||||||
if gpu == "-gpu" else "ubuntu:focal")
|
f"recognized. Image names must be one of {IMAGE_NAMES}")
|
||||||
else:
|
|
||||||
# NOTE(ilr) This is a bit of an abuse of the name "GPU"
|
|
||||||
build_args["GPU"] = f"{py_name}{gpu}"
|
|
||||||
|
|
||||||
if image_name in ["ray", "ray-deps", "ray-worker-container"]:
|
if py_version not in PY_MATRIX.keys():
|
||||||
wheel = _get_wheel_name(build_args["PYTHON_MINOR_VERSION"])
|
raise ValueError(f"The provided python version {py_version} is not "
|
||||||
build_args["WHEEL_PATH"] = f".whl/{wheel}"
|
f"recognized. Python version must be one of"
|
||||||
# Add pip option "--find-links .whl/" to ensure ray-cpp wheel
|
f" {PY_MATRIX.keys()}")
|
||||||
# can be found.
|
|
||||||
build_args["FIND_LINKS_PATH"] = ".whl"
|
|
||||||
|
|
||||||
tagged_name = f"rayproject/{image_name}:nightly{py_name}{gpu}"
|
if image_type not in BASE_IMAGES.keys():
|
||||||
for i in range(2):
|
raise ValueError(f"The provided CUDA version {image_type} is not "
|
||||||
cleanup = DOCKER_CLIENT.containers.prune().get(
|
f"recognized. CUDA version must be one of"
|
||||||
"SpaceReclaimed")
|
f" {image_type.keys()}")
|
||||||
if cleanup is not None:
|
|
||||||
print(f"Cleaned up {cleanup / (2**20)}MB")
|
|
||||||
output = DOCKER_CLIENT.api.build(
|
|
||||||
path=os.path.join(_get_root_dir(), "docker", image_name),
|
|
||||||
tag=tagged_name,
|
|
||||||
nocache=no_cache,
|
|
||||||
buildargs=build_args)
|
|
||||||
|
|
||||||
cmd_output = []
|
# TODO(https://github.com/ray-project/ray/issues/16599):
|
||||||
try:
|
# remove below after supporting ray-ml images with Python 3.9
|
||||||
start = datetime.datetime.now()
|
if image_name == "ray-ml" and py_version == "py39":
|
||||||
current_iter = start
|
print(f"{image_name} image is currently unsupported with "
|
||||||
for line in output:
|
"Python 3.9")
|
||||||
cmd_output.append(line.decode("utf-8"))
|
return
|
||||||
if datetime.datetime.now(
|
|
||||||
) - current_iter >= datetime.timedelta(minutes=5):
|
|
||||||
current_iter = datetime.datetime.now()
|
|
||||||
elapsed = datetime.datetime.now() - start
|
|
||||||
print(f"Still building {tagged_name} after "
|
|
||||||
f"{elapsed.seconds} seconds")
|
|
||||||
if elapsed >= datetime.timedelta(minutes=15):
|
|
||||||
print("Additional build output:")
|
|
||||||
print(*cmd_output, sep="\n")
|
|
||||||
# Clear cmd_output after printing, so the next
|
|
||||||
# iteration will not print out the same lines.
|
|
||||||
cmd_output = []
|
|
||||||
except Exception as e:
|
|
||||||
print(f"FAILURE with error {e}")
|
|
||||||
|
|
||||||
if len(DOCKER_CLIENT.api.images(tagged_name)) == 0:
|
build_args = {}
|
||||||
print(f"ERROR building: {tagged_name}. Output below:")
|
build_args["PYTHON_VERSION"] = PY_MATRIX[py_version]
|
||||||
print(*cmd_output, sep="\n")
|
# I.e. "py36"[-1] == 6
|
||||||
if (i == 1):
|
build_args["PYTHON_MINOR_VERSION"] = py_version[-1]
|
||||||
raise Exception("FAILED TO BUILD IMAGE")
|
|
||||||
print("TRYING AGAIN")
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
|
|
||||||
print("BUILT: ", tagged_name)
|
device_tag = f"{image_type}"
|
||||||
built_images.append(tagged_name)
|
|
||||||
return built_images
|
if image_name == "base-deps":
|
||||||
|
base_image = BASE_IMAGES[image_type]
|
||||||
|
else:
|
||||||
|
base_image = f"-{py_version}-{device_tag}"
|
||||||
|
|
||||||
|
if image_name != "ray-worker-container":
|
||||||
|
build_args["BASE_IMAGE"] = base_image
|
||||||
|
|
||||||
|
if image_name in ["ray", "ray-deps", "ray-worker-container"]:
|
||||||
|
wheel = _get_wheel_name(build_args["PYTHON_MINOR_VERSION"])
|
||||||
|
build_args["WHEEL_PATH"] = f".whl/{wheel}"
|
||||||
|
# Add pip option "--find-links .whl/" to ensure ray-cpp wheel
|
||||||
|
# can be found.
|
||||||
|
build_args["FIND_LINKS_PATH"] = ".whl"
|
||||||
|
|
||||||
|
tagged_name = f"rayproject/{image_name}:nightly-{py_version}-{device_tag}"
|
||||||
|
|
||||||
|
for i in range(2):
|
||||||
|
cleanup = DOCKER_CLIENT.containers.prune().get("SpaceReclaimed")
|
||||||
|
if cleanup is not None:
|
||||||
|
print(f"Cleaned up {cleanup / (2 ** 20)}MB")
|
||||||
|
|
||||||
|
labels = {
|
||||||
|
"image-name": image_name,
|
||||||
|
"python-version": PY_MATRIX[py_version]
|
||||||
|
}
|
||||||
|
if image_type in CUDA_FULL:
|
||||||
|
labels["cuda-version"] = CUDA_FULL[image_type]
|
||||||
|
|
||||||
|
output = DOCKER_CLIENT.api.build(
|
||||||
|
path=os.path.join(_get_root_dir(), "docker", image_name),
|
||||||
|
tag=tagged_name,
|
||||||
|
nocache=no_cache,
|
||||||
|
labels=labels,
|
||||||
|
buildargs=build_args)
|
||||||
|
|
||||||
|
cmd_output = []
|
||||||
|
try:
|
||||||
|
start = datetime.datetime.now()
|
||||||
|
current_iter = start
|
||||||
|
for line in output:
|
||||||
|
cmd_output.append(line.decode("utf-8"))
|
||||||
|
if datetime.datetime.now(
|
||||||
|
) - current_iter >= datetime.timedelta(minutes=5):
|
||||||
|
current_iter = datetime.datetime.now()
|
||||||
|
elapsed = datetime.datetime.now() - start
|
||||||
|
print(f"Still building {tagged_name} after "
|
||||||
|
f"{elapsed.seconds} seconds")
|
||||||
|
if elapsed >= datetime.timedelta(minutes=15):
|
||||||
|
print("Additional build output:")
|
||||||
|
print(*cmd_output, sep="\n")
|
||||||
|
# Clear cmd_output after printing, so the next
|
||||||
|
# iteration will not print out the same lines.
|
||||||
|
cmd_output = []
|
||||||
|
except Exception as e:
|
||||||
|
print(f"FAILURE with error {e}")
|
||||||
|
|
||||||
|
if len(DOCKER_CLIENT.api.images(tagged_name)) == 0:
|
||||||
|
print(f"ERROR building: {tagged_name}. Output below:")
|
||||||
|
print(*cmd_output, sep="\n")
|
||||||
|
if i == 1:
|
||||||
|
raise Exception("FAILED TO BUILD IMAGE")
|
||||||
|
print("TRYING AGAIN")
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
print("BUILT: ", tagged_name)
|
||||||
|
|
||||||
|
|
||||||
def copy_wheels(human_build):
|
def copy_wheels(human_build):
|
||||||
|
@ -218,41 +275,66 @@ def copy_wheels(human_build):
|
||||||
shutil.copy(source, ray_worker_container_dst)
|
shutil.copy(source, ray_worker_container_dst)
|
||||||
|
|
||||||
|
|
||||||
def build_or_pull_base_images(rebuild_base_images: bool = True) -> List[str]:
|
def check_staleness(repository, tag):
|
||||||
"""Returns images to tag and build"""
|
DOCKER_CLIENT.api.pull(repository=repository, tag=tag)
|
||||||
DOCKER_CLIENT.api.pull(repository="rayproject/base-deps", tag="nightly")
|
|
||||||
|
|
||||||
age = DOCKER_CLIENT.api.inspect_image("rayproject/base-deps:nightly")[
|
age = DOCKER_CLIENT.api.inspect_image(f"{repository}:{tag}")["Created"]
|
||||||
"Created"]
|
|
||||||
short_date = datetime.datetime.strptime(age.split("T")[0], "%Y-%m-%d")
|
short_date = datetime.datetime.strptime(age.split("T")[0], "%Y-%m-%d")
|
||||||
is_stale = (
|
is_stale = (
|
||||||
datetime.datetime.now() - short_date) > datetime.timedelta(days=14)
|
datetime.datetime.now() - short_date) > datetime.timedelta(days=14)
|
||||||
|
return is_stale
|
||||||
|
|
||||||
print("Pulling images for caching")
|
|
||||||
|
|
||||||
DOCKER_CLIENT.api.pull(
|
def build_for_all_versions(image_name, py_versions, image_types, **kwargs):
|
||||||
repository="rayproject/base-deps", tag="nightly-cpu")
|
"""Builds the given Docker image for all Python & CUDA versions"""
|
||||||
DOCKER_CLIENT.api.pull(
|
for py_version in py_versions:
|
||||||
repository="rayproject/base-deps", tag="nightly-gpu")
|
for image_type in image_types:
|
||||||
|
_build_docker_image(
|
||||||
|
image_name,
|
||||||
|
py_version=py_version,
|
||||||
|
image_type=image_type,
|
||||||
|
**kwargs)
|
||||||
|
|
||||||
DOCKER_CLIENT.api.pull(repository="rayproject/ray-deps", tag="nightly-gpu")
|
|
||||||
DOCKER_CLIENT.api.pull(repository="rayproject/ray-deps", tag="nightly-cpu")
|
|
||||||
|
|
||||||
# TODO(ilr) See if any caching happens
|
def build_base_images(py_versions, image_types):
|
||||||
if (rebuild_base_images or is_stale or _release_build()):
|
build_for_all_versions(
|
||||||
for image in ["base-deps", "ray-deps"]:
|
"base-deps", py_versions, image_types, no_cache=False)
|
||||||
_build_cpu_gpu_images(image, no_cache=False)
|
build_for_all_versions(
|
||||||
|
"ray-deps", py_versions, image_types, no_cache=False)
|
||||||
|
|
||||||
|
|
||||||
|
def build_or_pull_base_images(py_versions: List[str],
|
||||||
|
image_types: List[str],
|
||||||
|
rebuild_base_images: bool = True) -> bool:
|
||||||
|
"""Returns images to tag and build."""
|
||||||
|
repositories = ["rayproject/base-deps", "rayproject/ray-deps"]
|
||||||
|
tags = [
|
||||||
|
f"nightly-{py_version}-{image_type}"
|
||||||
|
for py_version, image_type in itertools.product(
|
||||||
|
py_versions, image_types)
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
is_stale = check_staleness(repositories[0], tags[0])
|
||||||
|
|
||||||
|
# We still pull even if we have to rebuild the base images to help with
|
||||||
|
# caching.
|
||||||
|
for repository in repositories:
|
||||||
|
for tag in tags:
|
||||||
|
DOCKER_CLIENT.api.pull(repository=repository, tag=tag)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
is_stale = True
|
||||||
|
|
||||||
|
if rebuild_base_images or _release_build() or is_stale:
|
||||||
|
build_base_images(py_versions, image_types)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
print("Just pulling images!")
|
print("Just pulling images!")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def build_ray():
|
def prep_ray_ml():
|
||||||
return _build_cpu_gpu_images("ray")
|
|
||||||
|
|
||||||
|
|
||||||
def build_ray_ml():
|
|
||||||
root_dir = _get_root_dir()
|
root_dir = _get_root_dir()
|
||||||
requirement_files = glob.glob(
|
requirement_files = glob.glob(
|
||||||
f"{_get_root_dir()}/python/**/requirements*.txt", recursive=True)
|
f"{_get_root_dir()}/python/**/requirements*.txt", recursive=True)
|
||||||
|
@ -261,11 +343,6 @@ def build_ray_ml():
|
||||||
# Install atari roms script
|
# Install atari roms script
|
||||||
shutil.copy(f"{_get_root_dir()}/rllib/utils/install_atari_roms.sh",
|
shutil.copy(f"{_get_root_dir()}/rllib/utils/install_atari_roms.sh",
|
||||||
os.path.join(root_dir, "docker/ray-ml/"))
|
os.path.join(root_dir, "docker/ray-ml/"))
|
||||||
ray_ml_images = _build_cpu_gpu_images("ray-ml")
|
|
||||||
for img in ray_ml_images:
|
|
||||||
tag = img.split(":")[-1]
|
|
||||||
DOCKER_CLIENT.api.tag(
|
|
||||||
image=img, repository="rayproject/autoscaler", tag=tag)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_docker_creds() -> Tuple[str, str]:
|
def _get_docker_creds() -> Tuple[str, str]:
|
||||||
|
@ -274,39 +351,52 @@ def _get_docker_creds() -> Tuple[str, str]:
|
||||||
return DOCKER_USERNAME, docker_password
|
return DOCKER_USERNAME, docker_password
|
||||||
|
|
||||||
|
|
||||||
def build_ray_worker_container():
|
def _docker_push(image, tag):
|
||||||
return _build_cpu_gpu_images("ray-worker-container")
|
print(f"PUSHING: {image}:{tag}, result:")
|
||||||
|
# This docker API is janky. Without "stream=True" it returns a
|
||||||
|
# massive string filled with every progress bar update, which can
|
||||||
|
# cause CI to back up.
|
||||||
|
#
|
||||||
|
# With stream=True, it's a line-at-a-time generator of the same
|
||||||
|
# info. So we can slow it down by printing every couple hundred
|
||||||
|
# lines
|
||||||
|
i = 0
|
||||||
|
for progress_line in DOCKER_CLIENT.api.push(image, tag=tag, stream=True):
|
||||||
|
if i % 100 == 0:
|
||||||
|
print(progress_line)
|
||||||
|
|
||||||
|
|
||||||
|
def _tag_and_push(full_image_name, old_tag, new_tag, merge_build=False):
|
||||||
|
# Do not tag release builds because they are no longer up to
|
||||||
|
# date after the branch cut.
|
||||||
|
if "nightly" in new_tag and _release_build():
|
||||||
|
return
|
||||||
|
if old_tag != new_tag:
|
||||||
|
DOCKER_CLIENT.api.tag(
|
||||||
|
image=f"{full_image_name}:{old_tag}",
|
||||||
|
repository=full_image_name,
|
||||||
|
tag=new_tag)
|
||||||
|
if not merge_build:
|
||||||
|
print("This is a PR Build! On a merge build, we would normally push"
|
||||||
|
f"to: {full_image_name}:{new_tag}")
|
||||||
|
else:
|
||||||
|
_docker_push(full_image_name, new_tag)
|
||||||
|
|
||||||
|
|
||||||
|
def _create_new_tags(all_tags, old_str, new_str):
|
||||||
|
new_tags = []
|
||||||
|
for full_tag in all_tags:
|
||||||
|
new_tag = full_tag.replace(old_str, new_str)
|
||||||
|
new_tags.append(new_tag)
|
||||||
|
return new_tags
|
||||||
|
|
||||||
|
|
||||||
# For non-release builds, push "nightly" & "sha"
|
# For non-release builds, push "nightly" & "sha"
|
||||||
# For release builds, push "nightly" & "latest" & "x.x.x"
|
# For release builds, push "nightly" & "latest" & "x.x.x"
|
||||||
def push_and_tag_images(push_base_images: bool, merge_build: bool = False):
|
def push_and_tag_images(py_versions: List[str],
|
||||||
def docker_push(image, tag):
|
image_types: List[str],
|
||||||
# Do not tag release builds because they are no longer up to
|
push_base_images: bool,
|
||||||
# date after the branch cut.
|
merge_build: bool = False):
|
||||||
if "nightly" in tag and _release_build():
|
|
||||||
return
|
|
||||||
if merge_build:
|
|
||||||
print(f"PUSHING: {image}:{tag}, result:")
|
|
||||||
# This docker API is janky. Without "stream=True" it returns a
|
|
||||||
# massive string filled with every progress bar update, which can
|
|
||||||
# cause CI to back up.
|
|
||||||
#
|
|
||||||
# With stream=True, it's a line-at-a-time generator of the same
|
|
||||||
# info. So we can slow it down by printing every couple hundred
|
|
||||||
# lines
|
|
||||||
i = 0
|
|
||||||
for progress_line in DOCKER_CLIENT.api.push(
|
|
||||||
image, tag=tag, stream=True):
|
|
||||||
if i % 100 == 0:
|
|
||||||
print(progress_line)
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
"This is a PR Build! On a merge build, we would normally push "
|
|
||||||
f"to: {image}:{tag}")
|
|
||||||
|
|
||||||
def get_new_tag(old_tag, new_tag):
|
|
||||||
return old_tag.replace("nightly", new_tag)
|
|
||||||
|
|
||||||
date_tag = datetime.datetime.now().strftime("%Y-%m-%d")
|
date_tag = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||||
sha_tag = _get_commit_sha()
|
sha_tag = _get_commit_sha()
|
||||||
|
@ -316,61 +406,97 @@ def push_and_tag_images(push_base_images: bool, merge_build: bool = False):
|
||||||
date_tag = release_name
|
date_tag = release_name
|
||||||
sha_tag = release_name
|
sha_tag = release_name
|
||||||
|
|
||||||
image_list = ["ray", "ray-ml", "autoscaler"]
|
image_list = ["ray", "ray-ml"]
|
||||||
if push_base_images:
|
if push_base_images:
|
||||||
image_list.extend(["base-deps", "ray-deps"])
|
image_list.extend(["base-deps", "ray-deps"])
|
||||||
|
|
||||||
for image in image_list:
|
for image_name in image_list:
|
||||||
for py_name, py_version in PY_MATRIX.items():
|
full_image_name = f"rayproject/{image_name}"
|
||||||
# TODO(https://github.com/ray-project/ray/issues/16599):
|
|
||||||
# remove below after supporting ray-ml images with Python 3.9
|
|
||||||
if image in ["ray-ml", "autoscaler"
|
|
||||||
] and py_version.startswith("3.9"):
|
|
||||||
print(
|
|
||||||
f"{image} image is currently unsupported with Python 3.9")
|
|
||||||
continue
|
|
||||||
|
|
||||||
full_image = f"rayproject/{image}"
|
# Mapping from old tags to new tags.
|
||||||
|
# These are the tags we will push.
|
||||||
|
# The key is the full image name, and the values are all the tags
|
||||||
|
# for that image.
|
||||||
|
tag_mapping = defaultdict(list)
|
||||||
|
for py_name in py_versions:
|
||||||
|
for image_type in image_types:
|
||||||
|
if image_name == "ray-ml" and image_type != ML_CUDA_VERSION:
|
||||||
|
print("ML Docker image is not built for the following "
|
||||||
|
f"device type: {image_type}")
|
||||||
|
continue
|
||||||
|
|
||||||
# Tag "nightly-py3x" from "nightly-py3x-cpu"
|
# TODO(https://github.com/ray-project/ray/issues/16599):
|
||||||
DOCKER_CLIENT.api.tag(
|
# remove below after supporting ray-ml images with Python 3.9
|
||||||
image=f"{full_image}:nightly{py_name}-cpu",
|
if image_name in ["ray-ml"
|
||||||
repository=full_image,
|
] and PY_MATRIX[py_name].startswith("3.9"):
|
||||||
tag=f"nightly{py_name}")
|
print(f"{image_name} image is currently "
|
||||||
|
f"unsupported with Python 3.9")
|
||||||
|
continue
|
||||||
|
|
||||||
for arch_tag in ["-cpu", "-gpu", ""]:
|
tag = f"nightly-{py_name}-{image_type}"
|
||||||
full_arch_tag = f"nightly{py_name}{arch_tag}"
|
tag_mapping[tag].append(tag)
|
||||||
|
|
||||||
# Tag and push rayproject/<image>:nightly<py_tag><arch_tag>
|
# If no device is specified, it should map to CPU image.
|
||||||
docker_push(full_image, full_arch_tag)
|
# "-gpu" tag should refer to the ML_CUDA_VERSION
|
||||||
|
for old_tag in tag_mapping.keys():
|
||||||
|
if "cpu" in old_tag:
|
||||||
|
new_tags = _create_new_tags(
|
||||||
|
tag_mapping[old_tag], old_str="-cpu", new_str="")
|
||||||
|
tag_mapping[old_tag].extend(new_tags)
|
||||||
|
elif ML_CUDA_VERSION in old_tag:
|
||||||
|
new_tags = _create_new_tags(
|
||||||
|
tag_mapping[old_tag],
|
||||||
|
old_str=f"-{ML_CUDA_VERSION}",
|
||||||
|
new_str="-gpu")
|
||||||
|
tag_mapping[old_tag].extend(new_tags)
|
||||||
|
|
||||||
# Ex: specific_tag == "1.0.1" or "<sha>" or "<date>"
|
# No Python version specified should refer to DEFAULT_PYTHON_VERSION
|
||||||
specific_tag = get_new_tag(
|
for old_tag in tag_mapping.keys():
|
||||||
full_arch_tag, date_tag if "-deps" in image else sha_tag)
|
if DEFAULT_PYTHON_VERSION in old_tag:
|
||||||
|
new_tags = _create_new_tags(
|
||||||
|
tag_mapping[old_tag],
|
||||||
|
old_str=f"-{DEFAULT_PYTHON_VERSION}",
|
||||||
|
new_str="")
|
||||||
|
tag_mapping[old_tag].extend(new_tags)
|
||||||
|
|
||||||
# Tag and push rayproject/<image>:<sha/date><py_tag><arch_tag>
|
# For all tags, create Date/Sha tags
|
||||||
DOCKER_CLIENT.api.tag(
|
for old_tag in tag_mapping.keys():
|
||||||
image=f"{full_image}:{full_arch_tag}",
|
new_tags = _create_new_tags(
|
||||||
repository=full_image,
|
tag_mapping[old_tag],
|
||||||
tag=specific_tag)
|
old_str="nightly",
|
||||||
docker_push(full_image, specific_tag)
|
new_str=date_tag if "-deps" in image_name else sha_tag)
|
||||||
|
tag_mapping[old_tag].extend(new_tags)
|
||||||
|
|
||||||
if "-py37" in py_name:
|
# Sanity checking.
|
||||||
non_python_specific_tag = specific_tag.replace("-py37", "")
|
for old_tag in tag_mapping.keys():
|
||||||
DOCKER_CLIENT.api.tag(
|
if DEFAULT_PYTHON_VERSION in old_tag:
|
||||||
image=f"{full_image}:{full_arch_tag}",
|
if "-cpu" in old_tag:
|
||||||
repository=full_image,
|
assert "nightly-cpu" in tag_mapping[old_tag]
|
||||||
tag=non_python_specific_tag)
|
assert "nightly" in tag_mapping[old_tag]
|
||||||
# Tag and push rayproject/<image>:<sha/date><arch_tag>
|
if "-deps" in image_name:
|
||||||
docker_push(full_image, non_python_specific_tag)
|
assert f"{date_tag}-cpu" in tag_mapping[old_tag]
|
||||||
|
assert f"{date_tag}" in tag_mapping[old_tag]
|
||||||
|
else:
|
||||||
|
assert f"{sha_tag}-cpu" in tag_mapping[old_tag]
|
||||||
|
assert f"{sha_tag}" in tag_mapping[old_tag]
|
||||||
|
|
||||||
non_python_nightly_tag = full_arch_tag.replace("-py37", "")
|
elif ML_CUDA_VERSION in old_tag:
|
||||||
DOCKER_CLIENT.api.tag(
|
assert "nightly-gpu" in tag_mapping[old_tag]
|
||||||
image=f"{full_image}:{full_arch_tag}",
|
if "-deps" in image_name:
|
||||||
repository=full_image,
|
assert f"{date_tag}-gpu" in tag_mapping[old_tag]
|
||||||
tag=non_python_nightly_tag)
|
else:
|
||||||
# Tag and push rayproject/<image>:nightly<arch_tag>
|
assert f"{sha_tag}-gpu" in tag_mapping[old_tag]
|
||||||
docker_push(full_image, non_python_nightly_tag)
|
|
||||||
|
print(f"These tags will be created for {image_name}: ", tag_mapping)
|
||||||
|
|
||||||
|
# Tag and push all images.
|
||||||
|
for old_tag in tag_mapping.keys():
|
||||||
|
for new_tag in tag_mapping[old_tag]:
|
||||||
|
_tag_and_push(
|
||||||
|
full_image_name,
|
||||||
|
old_tag=old_tag,
|
||||||
|
new_tag=new_tag,
|
||||||
|
merge_build=merge_build)
|
||||||
|
|
||||||
|
|
||||||
# Push infra here:
|
# Push infra here:
|
||||||
|
@ -409,22 +535,30 @@ def push_readmes(merge_build: bool):
|
||||||
|
|
||||||
|
|
||||||
# Build base-deps/ray-deps only on file change, 2 weeks, per release
|
# Build base-deps/ray-deps only on file change, 2 weeks, per release
|
||||||
# Build ray, ray-ml, autoscaler every time
|
# Build ray, ray-ml every time
|
||||||
# build-docker-images.py --py-versions PY37 --build-type PR --rebuild-all
|
# build-docker-images.py --py-versions PY37 --build-type PR --rebuild-all
|
||||||
MERGE = "MERGE"
|
MERGE = "MERGE"
|
||||||
HUMAN = "HUMAN"
|
HUMAN = "HUMAN"
|
||||||
PR = "PR"
|
PR = "PR"
|
||||||
BUILDKITE = "BUILDKITE"
|
BUILDKITE = "BUILDKITE"
|
||||||
BUILD_TYPES = [MERGE, HUMAN, PR, BUILDKITE]
|
BUILD_TYPES = [MERGE, HUMAN, PR, BUILDKITE]
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--py-versions",
|
"--py-versions",
|
||||||
choices=["PY36", "PY37", "PY38", "PY39"],
|
choices=list(PY_MATRIX.keys()),
|
||||||
default="PY37",
|
default="py37",
|
||||||
nargs="*",
|
nargs="*",
|
||||||
help="Which python versions to build. "
|
help="Which python versions to build. "
|
||||||
"Must be in (PY36, PY37, PY38, PY39)")
|
"Must be in (py36, py37, py38, py39)")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device-types",
|
||||||
|
choices=list(BASE_IMAGES.keys()),
|
||||||
|
default=None,
|
||||||
|
nargs="*",
|
||||||
|
help="Which device types (CPU/CUDA versions) to build images for. "
|
||||||
|
"If not specified, images will be built for all device types.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--build-type",
|
"--build-type",
|
||||||
choices=BUILD_TYPES,
|
choices=BUILD_TYPES,
|
||||||
|
@ -448,26 +582,47 @@ if __name__ == "__main__":
|
||||||
py_versions = args.py_versions
|
py_versions = args.py_versions
|
||||||
py_versions = py_versions if isinstance(py_versions,
|
py_versions = py_versions if isinstance(py_versions,
|
||||||
list) else [py_versions]
|
list) else [py_versions]
|
||||||
for key in set(PY_MATRIX.keys()):
|
|
||||||
if key[1:].upper() not in py_versions:
|
|
||||||
PY_MATRIX.pop(key)
|
|
||||||
assert len(PY_MATRIX) == len(
|
|
||||||
py_versions
|
|
||||||
), f"Length of PY_MATRIX != args {PY_MATRIX} : {args.py_versions}"
|
|
||||||
|
|
||||||
print("Building the following python versions: ", PY_MATRIX)
|
image_types = args.device_types if args.device_types else list(
|
||||||
|
BASE_IMAGES.keys())
|
||||||
|
|
||||||
|
assert set(list(CUDA_FULL.keys()) + ["cpu"]) == set(BASE_IMAGES.keys())
|
||||||
|
|
||||||
|
# Make sure the python images and cuda versions we build here are
|
||||||
|
# consistent with the ones used with fix-latest-docker.sh script.
|
||||||
|
py_version_file = os.path.join(_get_root_dir(), "docker/retag-lambda",
|
||||||
|
"python_versions.txt")
|
||||||
|
with open(py_version_file) as f:
|
||||||
|
py_file_versions = f.read().splitlines()
|
||||||
|
assert set(PY_MATRIX.keys()) == set(py_file_versions), \
|
||||||
|
(PY_MATRIX.keys(), py_file_versions)
|
||||||
|
|
||||||
|
cuda_version_file = os.path.join(_get_root_dir(), "docker/retag-lambda",
|
||||||
|
"cuda_versions.txt")
|
||||||
|
|
||||||
|
with open(cuda_version_file) as f:
|
||||||
|
cuda_file_versions = f.read().splitlines()
|
||||||
|
assert set(BASE_IMAGES.keys()) == set(cuda_file_versions + ["cpu"]),\
|
||||||
|
(BASE_IMAGES.keys(), cuda_file_versions + ["cpu"])
|
||||||
|
|
||||||
|
print("Building the following python versions: ",
|
||||||
|
[PY_MATRIX[py_version] for py_version in py_versions])
|
||||||
|
print("Building images for the following devices: ", image_types)
|
||||||
print("Building base images: ", args.base)
|
print("Building base images: ", args.base)
|
||||||
|
|
||||||
build_type = args.build_type
|
build_type = args.build_type
|
||||||
is_buildkite = build_type == BUILDKITE
|
is_buildkite = build_type == BUILDKITE
|
||||||
|
|
||||||
if build_type == BUILDKITE:
|
if build_type == BUILDKITE:
|
||||||
if os.environ.get("BUILDKITE_PULL_REQUEST", "") == "false":
|
if os.environ.get("BUILDKITE_PULL_REQUEST", "") == "false":
|
||||||
build_type = MERGE
|
build_type = MERGE
|
||||||
else:
|
else:
|
||||||
build_type = PR
|
build_type = PR
|
||||||
|
|
||||||
if build_type == HUMAN:
|
if build_type == HUMAN:
|
||||||
|
# If manually triggered, request user for branch and SHA value to use.
|
||||||
_configure_human_version()
|
_configure_human_version()
|
||||||
if (build_type in {HUMAN, MERGE} or is_buildkite
|
if (build_type in {HUMAN, MERGE, BUILDKITE}
|
||||||
or _check_if_docker_files_modified()):
|
or _check_if_docker_files_modified()):
|
||||||
DOCKER_CLIENT = docker.from_env()
|
DOCKER_CLIENT = docker.from_env()
|
||||||
is_merge = build_type == MERGE
|
is_merge = build_type == MERGE
|
||||||
|
@ -478,25 +633,31 @@ if __name__ == "__main__":
|
||||||
username, password = _get_docker_creds()
|
username, password = _get_docker_creds()
|
||||||
DOCKER_CLIENT.api.login(username=username, password=password)
|
DOCKER_CLIENT.api.login(username=username, password=password)
|
||||||
copy_wheels(build_type == HUMAN)
|
copy_wheels(build_type == HUMAN)
|
||||||
base_images_built = build_or_pull_base_images(args.base)
|
is_base_images_built = build_or_pull_base_images(
|
||||||
|
py_versions, image_types, args.base)
|
||||||
|
|
||||||
if args.only_build_worker_container:
|
if args.only_build_worker_container:
|
||||||
build_ray_worker_container()
|
build_for_all_versions("ray-worker-container", py_versions,
|
||||||
|
image_types)
|
||||||
# TODO Currently don't push ray_worker_container
|
# TODO Currently don't push ray_worker_container
|
||||||
else:
|
else:
|
||||||
build_ray()
|
# Build Ray Docker images.
|
||||||
build_ray_ml()
|
build_for_all_versions("ray", py_versions, image_types)
|
||||||
if build_type in {MERGE, PR}:
|
|
||||||
valid_branch = _valid_branch()
|
if ML_CUDA_VERSION in image_types:
|
||||||
if (not valid_branch) and is_merge:
|
# Build Ray ML Docker images only if ML_CUDA_VERSION is
|
||||||
print(f"Invalid Branch found: {_get_branch()}")
|
# specified.
|
||||||
push_and_tag_images(base_images_built, valid_branch
|
prep_ray_ml()
|
||||||
and is_merge)
|
# Only build ML Docker for the ML_CUDA_VERSION
|
||||||
|
build_for_all_versions(
|
||||||
|
"ray-ml", py_versions, image_types=[ML_CUDA_VERSION])
|
||||||
|
|
||||||
if build_type in {MERGE, PR}:
|
if build_type in {MERGE, PR}:
|
||||||
valid_branch = _valid_branch()
|
valid_branch = _valid_branch()
|
||||||
if (not valid_branch) and is_merge:
|
if (not valid_branch) and is_merge:
|
||||||
print(f"Invalid Branch found: {_get_branch()}")
|
print(f"Invalid Branch found: {_get_branch()}")
|
||||||
push_and_tag_images(base_images_built, valid_branch
|
push_and_tag_images(py_versions, image_types,
|
||||||
|
is_base_images_built, valid_branch
|
||||||
and is_merge)
|
and is_merge)
|
||||||
|
|
||||||
# TODO(ilr) Re-Enable Push READMEs by using a normal password
|
# TODO(ilr) Re-Enable Push READMEs by using a normal password
|
||||||
|
|
|
@ -290,7 +290,7 @@ Image releases are `tagged` using the following format:
|
||||||
- A specific nightly build (uses a SHA from the Github ``master``).
|
- A specific nightly build (uses a SHA from the Github ``master``).
|
||||||
|
|
||||||
|
|
||||||
Each tag has `variants` that add or change functionality:
|
Some tags also have `variants` that add or change functionality:
|
||||||
|
|
||||||
.. list-table::
|
.. list-table::
|
||||||
:widths: 16 40
|
:widths: 16 40
|
||||||
|
@ -298,10 +298,12 @@ Each tag has `variants` that add or change functionality:
|
||||||
|
|
||||||
* - Variant
|
* - Variant
|
||||||
- Description
|
- Description
|
||||||
* - -gpu
|
|
||||||
- These are based off of an NVIDIA CUDA image. They require the Nvidia Docker Runtime.
|
|
||||||
* - -cpu
|
* - -cpu
|
||||||
- These are based off of an Ubuntu image.
|
- These are based off of an Ubuntu image.
|
||||||
|
* - -cuXX
|
||||||
|
- These are based off of an NVIDIA CUDA image with the specified CUDA version. They require the Nvidia Docker Runtime.
|
||||||
|
* - -gpu
|
||||||
|
- Aliases to a specific ``-cuXX`` tagged image.
|
||||||
* - <no tag>
|
* - <no tag>
|
||||||
- Aliases to ``-cpu`` tagged images
|
- Aliases to ``-cpu`` tagged images
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
# DEPRECATED -- Please use [`rayproject/ray-ml`](https://hub.docker.com/repository/docker/rayproject/ray-ml)
|
# DEPRECATED -- Please use [`rayproject/ray-ml`](https://hub.docker.com/repository/docker/rayproject/ray-ml)
|
||||||
## About
|
## About
|
||||||
This image used to be the base image for the Ray autoscaler, but it has been replaced by [`rayproject/ray-ml`](https://hub.docker.com/repository/docker/rayproject/ray-ml).
|
This image used to be the base image for the Ray autoscaler, but it has been replaced by [`rayproject/ray-ml`](https://hub.docker.com/repository/docker/rayproject/ray-ml).
|
||||||
Please use that instead, *this image will be removed in the near future*.
|
Please use that instead, *this image is deprecated*.
|
||||||
|
|
||||||
|
|
||||||
## Tags
|
## Tags
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# The base-deps Docker image installs main libraries needed to run Ray
|
# The base-deps Docker image installs main libraries needed to run Ray
|
||||||
|
|
||||||
# The GPU option is nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04
|
# The GPU options are NVIDIA CUDA developer images.
|
||||||
ARG BASE_IMAGE="ubuntu:focal"
|
ARG BASE_IMAGE="ubuntu:focal"
|
||||||
FROM ${BASE_IMAGE}
|
FROM ${BASE_IMAGE}
|
||||||
# FROM directive resets ARG
|
# FROM directive resets ARG
|
||||||
|
|
|
@ -14,7 +14,7 @@ This image has the system-level dependencies for `Ray` and the `Ray Autoscaler`
|
||||||
* `:DATE` - A specific build.
|
* `:DATE` - A specific build.
|
||||||
|
|
||||||
### Suffixes
|
### Suffixes
|
||||||
* `-gpu` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
* `-cuXXX` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
||||||
* `-cpu`- These are based off of an `Ubuntu` image.
|
* `-cpu`- These are based off of an `Ubuntu` image.
|
||||||
* Tags without a suffix refer to `-cpu` images
|
* Tags without a suffix refer to `-cpu` images
|
||||||
|
|
||||||
|
|
|
@ -29,8 +29,6 @@ AWS_ACCESS_KEY_ID=$(echo "$ASSUME_ROLE_CREDENTIALS" | jq -r .Credentials.AccessK
|
||||||
AWS_SECRET_ACCESS_KEY=$(echo "$ASSUME_ROLE_CREDENTIALS" | jq -r .Credentials.SecretAccessKey)
|
AWS_SECRET_ACCESS_KEY=$(echo "$ASSUME_ROLE_CREDENTIALS" | jq -r .Credentials.SecretAccessKey)
|
||||||
AWS_SESSION_TOKEN=$(echo "$ASSUME_ROLE_CREDENTIALS" | jq -r .Credentials.SessionToken)
|
AWS_SESSION_TOKEN=$(echo "$ASSUME_ROLE_CREDENTIALS" | jq -r .Credentials.SessionToken)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
echo -e "Invoking this lambda!\nView logs at https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logsV2:log-groups"
|
echo -e "Invoking this lambda!\nView logs at https://us-west-2.console.aws.amazon.com/cloudwatch/home?region=us-west-2#logsV2:log-groups"
|
||||||
AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN AWS_SECURITY_TOKEN='' aws \
|
AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN AWS_SECURITY_TOKEN='' aws \
|
||||||
lambda invoke --function-name DockerTagLatest \
|
lambda invoke --function-name DockerTagLatest \
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
ARG GPU=""
|
ARG BASE_IMAGE=""
|
||||||
FROM rayproject/base-deps:nightly"$GPU"
|
FROM rayproject/base-deps:nightly"$BASE_IMAGE"
|
||||||
# If this arg is not "autoscaler" then no autoscaler requirements will be included
|
# If this arg is not "autoscaler" then no autoscaler requirements will be included
|
||||||
ARG AUTOSCALER="autoscaler"
|
ARG AUTOSCALER="autoscaler"
|
||||||
ARG WHEEL_PATH
|
ARG WHEEL_PATH
|
||||||
|
|
|
@ -13,7 +13,7 @@ This has the python-level dependencies for `Ray` and the `Ray Autoscaler`. The `
|
||||||
* `:DATE` - A specific build.
|
* `:DATE` - A specific build.
|
||||||
|
|
||||||
### Suffixes
|
### Suffixes
|
||||||
* `-gpu` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
* `-cuXXX` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
||||||
* `-cpu`- These are based off of an `Ubuntu` image.
|
* `-cpu`- These are based off of an `Ubuntu` image.
|
||||||
* Tags without a suffix refer to `-cpu` images
|
* Tags without a suffix refer to `-cpu` images
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
ARG GPU
|
ARG BASE_IMAGE
|
||||||
FROM rayproject/ray:nightly"$GPU"
|
FROM rayproject/ray:nightly"$BASE_IMAGE"
|
||||||
ARG PYTHON_MINOR_VERSION=7
|
ARG PYTHON_MINOR_VERSION=7
|
||||||
|
|
||||||
# We have to uninstall wrapt this way for Tensorflow compatibility
|
# We have to uninstall wrapt this way for Tensorflow compatibility
|
||||||
|
@ -8,6 +8,7 @@ COPY requirements_dl.txt ./
|
||||||
COPY requirements_ml_docker.txt ./
|
COPY requirements_ml_docker.txt ./
|
||||||
COPY requirements_rllib.txt ./
|
COPY requirements_rllib.txt ./
|
||||||
COPY requirements_tune.txt ./requirements_tune.txt
|
COPY requirements_tune.txt ./requirements_tune.txt
|
||||||
|
COPY requirements_upstream.txt ./
|
||||||
COPY install_atari_roms.sh ./install_atari_roms.sh
|
COPY install_atari_roms.sh ./install_atari_roms.sh
|
||||||
|
|
||||||
RUN sudo apt-get update \
|
RUN sudo apt-get update \
|
||||||
|
@ -23,11 +24,13 @@ RUN sudo apt-get update \
|
||||||
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_rllib.txt \
|
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_rllib.txt \
|
||||||
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_tune.txt \
|
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_tune.txt \
|
||||||
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_ml_docker.txt \
|
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_ml_docker.txt \
|
||||||
|
&& $HOME/anaconda3/bin/pip --no-cache-dir install -U -r requirements_upstream.txt \
|
||||||
# Remove dataclasses & typing because they are included in Python > 3.6
|
# Remove dataclasses & typing because they are included in Python > 3.6
|
||||||
&& if [ $(python -c 'import sys; print(sys.version_info.minor)') != "6" ]; then \
|
&& if [ $(python -c 'import sys; print(sys.version_info.minor)') != "6" ]; then \
|
||||||
$HOME/anaconda3/bin/pip uninstall dataclasses typing -y; fi \
|
$HOME/anaconda3/bin/pip uninstall dataclasses typing -y; fi \
|
||||||
&& sudo rm requirements.txt && sudo rm requirements_ml_docker.txt \
|
&& sudo rm requirements.txt && sudo rm requirements_ml_docker.txt \
|
||||||
&& sudo rm requirements_tune.txt && sudo rm requirements_rllib.txt \
|
&& sudo rm requirements_tune.txt && sudo rm requirements_rllib.txt \
|
||||||
|
&& sudo rm requirements_upstream.txt \
|
||||||
&& sudo apt-get clean
|
&& sudo apt-get clean
|
||||||
|
|
||||||
# Make sure tfp is installed correctly and matches tf version.
|
# Make sure tfp is installed correctly and matches tf version.
|
||||||
|
|
|
@ -11,9 +11,7 @@ This image is an extension of the [`rayproject/ray`](https://hub.docker.com/repo
|
||||||
* `:SHA` - A specific nightly build.
|
* `:SHA` - A specific nightly build.
|
||||||
|
|
||||||
### Suffixes
|
### Suffixes
|
||||||
* `-gpu` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
* `-gpu` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
||||||
* `-cpu`- These are based off of an `Ubuntu` image.
|
|
||||||
* Tags without a suffix refer to `-cpu` images
|
|
||||||
|
|
||||||
## Other Images
|
## Other Images
|
||||||
* [`rayproject/ray`](https://hub.docker.com/repository/docker/rayproject/ray) - Ray and all of its dependencies.
|
* [`rayproject/ray`](https://hub.docker.com/repository/docker/rayproject/ray) - Ray and all of its dependencies.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
ARG GPU
|
ARG BASE_IMAGE
|
||||||
FROM rayproject/ray-deps:nightly"$GPU"
|
FROM rayproject/ray-deps:nightly"$BASE_IMAGE"
|
||||||
ARG WHEEL_PATH
|
ARG WHEEL_PATH
|
||||||
ARG FIND_LINKS_PATH=".whl"
|
ARG FIND_LINKS_PATH=".whl"
|
||||||
# For Click
|
# For Click
|
||||||
|
|
|
@ -12,7 +12,7 @@ everything needed to get started with running Ray! They work for both local deve
|
||||||
* `:SHA` - A specific nightly build.
|
* `:SHA` - A specific nightly build.
|
||||||
|
|
||||||
### Suffixes
|
### Suffixes
|
||||||
* `-gpu` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
* `-cuXXX` - These are based off of an `NVIDIA CUDA` image. They require the [Nvidia Docker Runtime](https://github.com/NVIDIA/nvidia-docker) to be installed on the host for the container to access GPUs.
|
||||||
* `-cpu`- These are based off of an `Ubuntu` image.
|
* `-cpu`- These are based off of an `Ubuntu` image.
|
||||||
* Tags without a suffix refer to `-cpu` images
|
* Tags without a suffix refer to `-cpu` images
|
||||||
|
|
||||||
|
|
5
docker/retag-lambda/cuda_versions.txt
Normal file
5
docker/retag-lambda/cuda_versions.txt
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
cu112
|
||||||
|
cu111
|
||||||
|
cu110
|
||||||
|
cu102
|
||||||
|
cu101
|
|
@ -1,4 +1,5 @@
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
|
@ -7,6 +8,10 @@ DOCKER_USER = None
|
||||||
DOCKER_PASS = None
|
DOCKER_PASS = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_curr_dir():
|
||||||
|
return os.path.dirname(os.path.realpath(__file__))
|
||||||
|
|
||||||
|
|
||||||
def get_secrets():
|
def get_secrets():
|
||||||
global DOCKER_PASS, DOCKER_USER
|
global DOCKER_PASS, DOCKER_USER
|
||||||
secret_name = "dockerRetagLatestCredentials"
|
secret_name = "dockerRetagLatestCredentials"
|
||||||
|
@ -36,15 +41,30 @@ def retag(repo: str, source: str, destination: str) -> str:
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def parse_versions(version_file):
|
||||||
|
with open(version_file) as f:
|
||||||
|
file_versions = f.read().splitlines()
|
||||||
|
return file_versions
|
||||||
|
|
||||||
|
|
||||||
def lambda_handler(event, context):
|
def lambda_handler(event, context):
|
||||||
source_image = event["source_tag"]
|
source_image = event["source_tag"]
|
||||||
destination_image = event["destination_tag"]
|
destination_image = event["destination_tag"]
|
||||||
total_results = []
|
total_results = []
|
||||||
for repo in ["ray", "ray-ml", "autoscaler"]:
|
python_versions = parse_versions(
|
||||||
|
os.path.join(_get_curr_dir(), "python_versions.txt"))
|
||||||
|
cuda_versions = parse_versions(
|
||||||
|
os.path.join(_get_curr_dir(), "cuda_versions.txt"))
|
||||||
|
for repo in ["ray", "ray-ml"]:
|
||||||
results = []
|
results = []
|
||||||
for pyversion in ["py36", "py37", "py38", "py39"]:
|
for pyversion in python_versions:
|
||||||
source_tag = f"{source_image}-{pyversion}"
|
source_tag = f"{source_image}-{pyversion}"
|
||||||
destination_tag = f"{destination_image}-{pyversion}"
|
destination_tag = f"{destination_image}-{pyversion}"
|
||||||
|
for cudaversion in cuda_versions:
|
||||||
|
cuda_source_tag = source_tag + f"-{cudaversion}"
|
||||||
|
cuda_destination_tag = destination_tag + f"-{cudaversion}"
|
||||||
|
results.append(
|
||||||
|
retag(repo, cuda_source_tag, cuda_destination_tag))
|
||||||
results.append(retag(repo, source_tag, destination_tag))
|
results.append(retag(repo, source_tag, destination_tag))
|
||||||
results.append(retag(repo, source_tag, destination_tag + "-cpu"))
|
results.append(retag(repo, source_tag, destination_tag + "-cpu"))
|
||||||
results.append(
|
results.append(
|
||||||
|
@ -54,7 +74,13 @@ def lambda_handler(event, context):
|
||||||
|
|
||||||
# Retag images without a python version specified (defaults to py37)
|
# Retag images without a python version specified (defaults to py37)
|
||||||
results = []
|
results = []
|
||||||
for repo in ["ray", "ray-ml", "autoscaler", "ray-deps", "base-deps"]:
|
for repo in ["ray", "ray-ml", "ray-deps", "base-deps"]:
|
||||||
|
for cudaversion in cuda_versions:
|
||||||
|
source_tag = f"{source_image}-{cudaversion}"
|
||||||
|
destination_tag = f"{destination_image}-{cudaversion}"
|
||||||
|
results.append(retag(repo, source_tag, destination_tag))
|
||||||
|
|
||||||
|
# ray:nightly -> ray:1.x
|
||||||
results.append(retag(repo, source_image, destination_image))
|
results.append(retag(repo, source_image, destination_image))
|
||||||
results.append(retag(repo, source_image, destination_image + "-cpu"))
|
results.append(retag(repo, source_image, destination_image + "-cpu"))
|
||||||
results.append(
|
results.append(
|
||||||
|
|
4
docker/retag-lambda/python_versions.txt
Normal file
4
docker/retag-lambda/python_versions.txt
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
py36
|
||||||
|
py37
|
||||||
|
py38
|
||||||
|
py39
|
Loading…
Add table
Reference in a new issue