[autoscaler] Add rsync_exclude and rsync_filter options to cluster config (#11512)

This commit is contained in:
Alan Guo 2020-10-21 14:28:33 -07:00 committed by GitHub
parent 9522918fa2
commit 8c82369cad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 273 additions and 25 deletions

View file

@ -526,6 +526,10 @@ class StandardAutoscaler:
file_mounts_contents_hash=self.file_mounts_contents_hash,
is_head_node=False,
cluster_synced_files=self.config["cluster_synced_files"],
rsync_options={
"rsync_exclude": self.config.get("rsync_exclude"),
"rsync_filter": self.config.get("rsync_filter")
},
process_runner=self.process_runner,
use_internal_ip=True,
docker_config=docker_config,

View file

@ -24,6 +24,7 @@ from ray.autoscaler._private.subprocess_output_util import (
run_cmd_redirected, ProcessRunnerError, is_output_redirected)
from ray.autoscaler._private.cli_logger import cli_logger, cf
from ray.util.debug import log_once
logger = logging.getLogger(__name__)
@ -180,6 +181,15 @@ class KubernetesCommandRunner(CommandRunnerInterface):
raise
def run_rsync_up(self, source, target, options=None):
options = options or {}
if options.get("rsync_exclude"):
if log_once("autoscaler_k8s_rsync_exclude"):
logger.warning("'rsync_exclude' detected but is currently "
"unsupported for k8s.")
if options.get("rsync_filter"):
if log_once("autoscaler_k8s_rsync_filter"):
logger.warning("'rsync_filter' detected but is currently "
"unsupported for k8s.")
if target.startswith("~"):
target = "/root" + target[1:]
@ -479,14 +489,35 @@ class SSHCommandRunner(CommandRunnerInterface):
else:
return self._run_helper(final_cmd, with_output, exit_on_fail)
def _create_rsync_filter_args(self, options):
rsync_excludes = options.get("rsync_exclude") or []
rsync_filters = options.get("rsync_filter") or []
exclude_args = [["--exclude", rsync_exclude]
for rsync_exclude in rsync_excludes]
filter_args = [["--filter", "dir-merge,- {}".format(rsync_filter)]
for rsync_filter in rsync_filters]
# Combine and flatten the two lists
return [
arg for args_list in exclude_args + filter_args
for arg in args_list
]
def run_rsync_up(self, source, target, options=None):
self._set_ssh_ip_if_required()
command = [
"rsync", "--rsh",
options = options or {}
command = ["rsync"]
command += [
"--rsh",
subprocess.list2cmdline(
["ssh"] + self.ssh_options.to_ssh_options_list(timeout=120)),
"-avz", source, "{}@{}:{}".format(self.ssh_user, self.ssh_ip,
target)
["ssh"] + self.ssh_options.to_ssh_options_list(timeout=120))
]
command += ["-avz"]
command += self._create_rsync_filter_args(options=options)
command += [
source, "{}@{}:{}".format(self.ssh_user, self.ssh_ip, target)
]
cli_logger.verbose("Running `{}`", cf.bold(" ".join(command)))
self._run_helper(command, silent=is_rsync_silent())
@ -494,12 +525,16 @@ class SSHCommandRunner(CommandRunnerInterface):
def run_rsync_down(self, source, target, options=None):
self._set_ssh_ip_if_required()
command = [
"rsync", "--rsh",
command = ["rsync"]
command += [
"--rsh",
subprocess.list2cmdline(
["ssh"] + self.ssh_options.to_ssh_options_list(timeout=120)),
"-avz", "{}@{}:{}".format(self.ssh_user, self.ssh_ip,
source), target
["ssh"] + self.ssh_options.to_ssh_options_list(timeout=120))
]
command += ["-avz"]
command += self._create_rsync_filter_args(options=options)
command += [
"{}@{}:{}".format(self.ssh_user, self.ssh_ip, source), target
]
cli_logger.verbose("Running `{}`", cf.bold(" ".join(command)))
self._run_helper(command, silent=is_rsync_silent())
@ -569,9 +604,9 @@ class DockerCommandRunner(CommandRunnerInterface):
f"mkdir -p {os.path.dirname(host_destination.rstrip('/'))}")
self.ssh_command_runner.run_rsync_up(
source, host_destination, options=None)
source, host_destination, options=options)
if self._check_container_status() and not options.get(
"file_mount", False):
"docker_mount_if_possible", False):
if os.path.isdir(source):
# Adding a "." means that docker copies the *contents*
# Without it, docker copies the source *into* the target
@ -589,12 +624,12 @@ class DockerCommandRunner(CommandRunnerInterface):
source += "."
# Adding a "." means that docker copies the *contents*
# Without it, docker copies the source *into* the target
if not options.get("file_mount", False):
if not options.get("docker_mount_if_possible", False):
self.ssh_command_runner.run("docker cp {}:{} {}".format(
self.container_name, self._docker_expand_user(source),
host_source))
self.ssh_command_runner.run_rsync_down(
host_source, target, options=None)
host_source, target, options=options)
def remote_shell_command_str(self):
inner_str = self.ssh_command_runner.remote_shell_command_str().replace(

View file

@ -698,6 +698,10 @@ def get_or_create_head_node(config: Dict[str, Any],
runtime_hash=runtime_hash,
file_mounts_contents_hash=file_mounts_contents_hash,
is_head_node=True,
rsync_options={
"rsync_exclude": config.get("rsync_exclude"),
"rsync_filter": config.get("rsync_filter")
},
docker_config=config.get("docker"))
updater.start()
updater.join()
@ -867,6 +871,10 @@ def exec_cluster(config_file: str,
runtime_hash="",
file_mounts_contents_hash="",
is_head_node=True,
rsync_options={
"rsync_exclude": config.get("rsync_exclude"),
"rsync_filter": config.get("rsync_filter")
},
docker_config=config.get("docker"))
shutdown_after_run = False
if cmd and stop:
@ -1002,6 +1010,10 @@ def rsync(config_file: str,
process_runner=_runner,
file_mounts_contents_hash="",
is_head_node=is_head_node,
rsync_options={
"rsync_exclude": config.get("rsync_exclude"),
"rsync_filter": config.get("rsync_filter")
},
docker_config=config.get("docker"))
if down:
rsync = updater.rsync_down

View file

@ -39,6 +39,7 @@ class NodeUpdater:
runtime_hash: Used to check for config changes
file_mounts_contents_hash: Used to check for changes to file mounts
is_head_node: Whether to use head start/setup commands
rsync_options: Extra options related to the rsync command.
process_runner: the module to use to run the commands
in the CommandRunner. E.g., subprocess.
use_internal_ip: Wwhether the node_id belongs to an internal ip
@ -61,6 +62,7 @@ class NodeUpdater:
is_head_node,
node_resources=None,
cluster_synced_files=None,
rsync_options=None,
process_runner=subprocess,
use_internal_ip=False,
docker_config=None):
@ -98,6 +100,7 @@ class NodeUpdater:
self.cluster_synced_files = [
os.path.expanduser(path) for path in cluster_synced_files
]
self.rsync_options = rsync_options or {}
self.auth_config = auth_config
self.is_head_node = is_head_node
self.docker_config = docker_config
@ -200,7 +203,8 @@ class NodeUpdater:
self.cmd_runner.run(
"mkdir -p {}".format(os.path.dirname(remote_path)),
run_env="host")
sync_cmd(local_path, remote_path, file_mount=True)
sync_cmd(
local_path, remote_path, docker_mount_if_possible=True)
if remote_path not in nolog_paths:
# todo: timed here?
@ -431,22 +435,26 @@ class NodeUpdater:
raise click.ClickException("Start command failed.")
def rsync_up(self, source, target, file_mount=False):
def rsync_up(self, source, target, docker_mount_if_possible=False):
cli_logger.old_info(logger, "{}Syncing {} to {}...", self.log_prefix,
source, target)
options = {}
options["file_mount"] = file_mount
options["docker_mount_if_possible"] = docker_mount_if_possible
options["rsync_exclude"] = self.rsync_options.get("rsync_exclude")
options["rsync_filter"] = self.rsync_options.get("rsync_filter")
self.cmd_runner.run_rsync_up(source, target, options=options)
cli_logger.verbose("`rsync`ed {} (local) to {} (remote)",
cf.bold(source), cf.bold(target))
def rsync_down(self, source, target, file_mount=False):
def rsync_down(self, source, target, docker_mount_if_possible=False):
cli_logger.old_info(logger, "{}Syncing {} from {}...", self.log_prefix,
source, target)
options = {}
options["file_mount"] = file_mount
options["docker_mount_if_possible"] = docker_mount_if_possible
options["rsync_exclude"] = self.rsync_options.get("rsync_exclude")
options["rsync_filter"] = self.rsync_options.get("rsync_filter")
self.cmd_runner.run_rsync_down(source, target, options=options)
cli_logger.verbose("`rsync`ed {} (remote) to {} (local)",
cf.bold(source), cf.bold(target))

View file

@ -104,6 +104,14 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter: []
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -117,6 +117,17 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
- "**/.git"
- "**/.git/**"
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
- ".gitignore"
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -98,6 +98,14 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter: []
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -111,6 +111,17 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
- "**/.git"
- "**/.git/**"
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
- ".gitignore"
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -113,6 +113,14 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter: []
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -126,6 +126,17 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
- "**/.git"
- "**/.git/**"
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
- ".gitignore"
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -275,6 +275,17 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down.
# This is not supported on kubernetes.
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
# This is not supported on kubernetes.
rsync_filter: []
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -275,6 +275,16 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down.
# This is not supported on kubernetes.
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
# This is not supported on kubernetes.
rsync_filter: []
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -66,6 +66,14 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter: []
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -72,6 +72,17 @@ cluster_synced_files: []
# should sync to the worker node continuously
file_mounts_sync_continuously: False
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
- "**/.git"
- "**/.git/**"
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
- ".gitignore"
# List of commands that will be run before `setup_commands`. If docker is
# enabled, these commands will run outside the container and before docker
# is setup.

View file

@ -253,6 +253,14 @@
"type": "boolean",
"description": "If enabled, file mounts will sync continously between the head node and the worker nodes. The nodes will not re-run setup commands if only the contents of the file mounts folders change."
},
"rsync_exclude": {
"type": "array",
"description": "File pattern to not sync up or down when using the rsync command. Matches the format of rsync's --exclude param."
},
"rsync_filter": {
"type": "array",
"description": "Pattern files to lookup patterns to exclude when using rsync up or rsync down. This file is checked for recursively in all directories. For example, if .gitignore is provided here, the behavior will match git's .gitignore behavior."
},
"metadata": {
"type": "object",
"description": "Metadata field that can be used to store user-defined data in the cluster config. Ray does not interpret these fields."

View file

@ -79,7 +79,7 @@ provider:
# 4-2. Select your branch in 'Release' tab
# 4-3. After build success, switch to 'Production'
# 4-4. Switch Launch permission to 'Public' if required
# 5. Change 'project' field to point your
# 5. Change 'project' field to point your
# repository and branch in this file
project: "GITHUB/open-datastudio/ray:master-staroid"
@ -283,6 +283,14 @@ cluster_synced_files: []
# is setup.
initialization_commands: []
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude: []
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter: []
# List of shell commands to run to set up nodes.
setup_commands: []

View file

@ -79,7 +79,7 @@ provider:
# 4-2. Select your branch in 'Release' tab
# 4-3. After build success, switch to 'Production'
# 4-4. Switch Launch permission to 'Public' if required
# 5. Change 'project' field to point your
# 5. Change 'project' field to point your
# repository and branch in this file
project: "GITHUB/open-datastudio/ray-cluster:master"
@ -304,6 +304,17 @@ initialization_commands: []
# List of shell commands to run to set up nodes.
setup_commands: []
# Patterns for files to exclude when running rsync up or rsync down
rsync_exclude:
- "**/.git"
- "**/.git/**"
# Pattern files to use for filtering out files when running rsync up or rsync down. The file is searched for
# in the source directory and recursively through all subdirectories. For example, if .gitignore is provided
# as a value, the behavior will match git's behavior for finding and using .gitignore files.
rsync_filter:
- ".gitignore"
# Custom commands that will be run on the head node after common setup.
head_setup_commands:
# install staroid and kubernetes packages. Staroid node provider depends on them which autoscaler will use.

View file

@ -250,7 +250,7 @@ def test_docker_rsync():
process_runner.respond_to_call("docker inspect -f", ["true"])
cmd_runner.run_rsync_up(
local_mount, remote_mount, options={"file_mount": True})
local_mount, remote_mount, options={"docker_mount_if_possible": True})
# Make sure we do not copy directly to raw destination
process_runner.assert_not_has_call(
@ -267,7 +267,7 @@ def test_docker_rsync():
process_runner.respond_to_call("docker inspect -f", ["true"])
cmd_runner.run_rsync_up(
local_file, remote_file, options={"file_mount": False})
local_file, remote_file, options={"docker_mount_if_possible": False})
# Make sure we do not copy directly to raw destination
process_runner.assert_not_has_call(
@ -282,7 +282,7 @@ def test_docker_rsync():
##############################
cmd_runner.run_rsync_down(
remote_mount, local_mount, options={"file_mount": True})
remote_mount, local_mount, options={"docker_mount_if_possible": True})
process_runner.assert_not_has_call("1.2.3.4", pattern=f"docker cp")
process_runner.assert_not_has_call(
@ -295,7 +295,7 @@ def test_docker_rsync():
##############################
cmd_runner.run_rsync_down(
remote_file, local_file, options={"file_mount": False})
remote_file, local_file, options={"docker_mount_if_possible": False})
process_runner.assert_has_call("1.2.3.4", pattern=f"docker cp")
process_runner.assert_not_has_call(
@ -304,6 +304,71 @@ def test_docker_rsync():
"1.2.3.4", pattern=f"-avz ray@1.2.3.4:{remote_host_file} {local_file}")
def test_rsync_exclude_and_filter():
process_runner = MockProcessRunner()
provider = MockProvider()
provider.create_node({}, {}, 1)
cluster_name = "cluster"
args = {
"log_prefix": "prefix",
"node_id": 0,
"provider": provider,
"auth_config": auth_config,
"cluster_name": cluster_name,
"process_runner": process_runner,
"use_internal_ip": False,
}
cmd_runner = SSHCommandRunner(**args)
local_mount = "/home/ubuntu/base/mount/"
remote_mount = "/root/protected_mount/"
process_runner.respond_to_call("docker inspect -f", ["true"])
cmd_runner.run_rsync_up(
local_mount,
remote_mount,
options={
"docker_mount_if_possible": True,
"rsync_exclude": ["test"],
"rsync_filter": [".ignore"]
})
process_runner.assert_has_call(
"1.2.3.4", pattern=f"--exclude test --filter dir-merge,- .ignore")
def test_rsync_without_exclude_and_filter():
process_runner = MockProcessRunner()
provider = MockProvider()
provider.create_node({}, {}, 1)
cluster_name = "cluster"
args = {
"log_prefix": "prefix",
"node_id": 0,
"provider": provider,
"auth_config": auth_config,
"cluster_name": cluster_name,
"process_runner": process_runner,
"use_internal_ip": False,
}
cmd_runner = SSHCommandRunner(**args)
local_mount = "/home/ubuntu/base/mount/"
remote_mount = "/root/protected_mount/"
process_runner.respond_to_call("docker inspect -f", ["true"])
cmd_runner.run_rsync_up(
local_mount,
remote_mount,
options={
"docker_mount_if_possible": True,
})
process_runner.assert_not_has_call("1.2.3.4", pattern=f"--exclude test")
process_runner.assert_not_has_call(
"1.2.3.4", pattern=f"--filter dir-merge,- .ignore")
if __name__ == "__main__":
import sys
sys.exit(pytest.main(["-v", __file__]))