[Projects] Refactor cluster specification (#6488)

This commit is contained in:
Philipp Moritz 2019-12-14 22:43:06 -08:00 committed by GitHub
parent 9cc0ecc6ff
commit f5d10eea0b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
23 changed files with 130 additions and 52 deletions

View file

@ -1,7 +1,8 @@
name: long-running-tests
description: "Ray's long running stress tests"
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
commands:
- name: run

View file

@ -6,7 +6,8 @@ description: "Example of how to use Cython with ray"
tags: ["ray-example", "cython"]
documentation: https://ray.readthedocs.io/en/latest/advanced.html#cython-code-in-ray
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -6,7 +6,8 @@ description: "Parallelizing the L-BFGS algorithm in ray"
tags: ["ray-example", "optimization", "lbfgs"]
documentation: https://ray.readthedocs.io/en/latest/auto_examples/plot_lbfgs.html
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -6,7 +6,8 @@ description: "A simple news reader example that uses ray actors to serve request
tags: ["ray-example", "flask", "rss", "newsreader"]
documentation: https://ray.readthedocs.io/en/latest/auto_examples/plot_newsreader.html
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -6,7 +6,8 @@ description: "A simple parameter server example implemented with ray actors"
tags: ["ray-example", "parameter-server", "machine-learning"]
documentation: https://ray.readthedocs.io/en/latest/auto_examples/plot_parameter_server.html
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -5,7 +5,8 @@ name: ray-example-streaming
description: "A simple ray example for a streaming wordcount"
tags: ["ray-example", "streaming", "wordcount", "data-processing"]
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -54,7 +54,8 @@ Here is an example for a minimal project format:
repo: https://github.com/ray-project/ray
# Cluster to be instantiated by default when starting the project.
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
# Commands/information to build the environment, once the cluster is
# instantiated. This can include the versions of python libraries etc.

View file

@ -4,7 +4,8 @@ name: open-tacotron
description: "A TensorFlow implementation of Google's Tacotron speech synthesis with pre-trained model (unofficial)"
repo: https://github.com/keithito/tacotron
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -4,7 +4,8 @@ name: pytorch-transformers
description: "A library of state-of-the-art pretrained models for Natural Language Processing (NLP)"
repo: https://github.com/huggingface/pytorch-transformers
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: ray-project/requirements.txt

View file

@ -10,6 +10,45 @@ import os
import yaml
def make_argument_parser(name, params, wildcards):
"""Build argument parser dynamically to parse parameter arguments.
Args:
name (str): Name of the command to parse.
params (dict): Parameter specification used to construct
the argparse parser.
wildcards (bool): Whether wildcards are allowed as arguments.
Returns:
The argparse parser.
A dictionary from argument name to list of valid choices.
"""
parser = argparse.ArgumentParser(prog=name)
# For argparse arguments that have a 'choices' list associated
# with them, save it in the following dictionary.
choices = {}
for param in params:
# Construct arguments to pass into argparse's parser.add_argument.
argparse_kwargs = copy.deepcopy(param)
name = argparse_kwargs.pop("name")
if wildcards and "choices" in param:
choices[name] = param["choices"]
argparse_kwargs["choices"] = param["choices"] + ["*"]
if "type" in param:
types = {"int": int, "str": str, "float": float}
if param["type"] in types:
argparse_kwargs["type"] = types[param["type"]]
else:
raise ValueError(
"Parameter {} has type {} which is not supported. "
"Type must be one of {}".format(name, param["type"],
list(types.keys())))
parser.add_argument("--" + name, dest=name, **argparse_kwargs)
return parser, choices
class ProjectDefinition:
def __init__(self, current_dir):
"""Finds ray-project folder for current project, parse and validates it.
@ -41,7 +80,7 @@ class ProjectDefinition:
def cluster_yaml(self):
"""Return the project's cluster configuration filename."""
return self.config["cluster"]
return self.config["cluster"]["config"]
def working_directory(self):
"""Return the project's working directory on a cluster session."""
@ -86,29 +125,7 @@ class ProjectDefinition:
"Cannot find the command named '{}' in commmands section "
"of the project file.".format(command_name))
# Build argument parser dynamically to parse parameter arguments.
parser = argparse.ArgumentParser(prog=command_name)
# For argparse arguments that have a 'choices' list associated
# with them, save it in the following dictionary.
choices = {}
for param in params:
# Construct arguments to pass into argparse's parser.add_argument.
argparse_kwargs = copy.deepcopy(param)
name = argparse_kwargs.pop("name")
if wildcards and "choices" in param:
choices[name] = param["choices"]
argparse_kwargs["choices"] = param["choices"] + ["*"]
if "type" in param:
types = {"int": int, "str": str, "float": float}
if param["type"] in types:
argparse_kwargs["type"] = types[param["type"]]
else:
raise ValueError(
"Parameter {} has type {} which is not supported. "
"Type must be one of {}".format(
name, param["type"], list(types.keys())))
parser.add_argument("--" + name, dest=name, **argparse_kwargs)
parser, choices = make_argument_parser(command_name, params, wildcards)
parsed_args = vars(parser.parse_args(list(args)))
if wildcards:
@ -174,11 +191,11 @@ def check_project_config(project_root, project_config):
validate_project_schema(project_config)
# Make sure the cluster yaml file exists
if "cluster" in project_config:
cluster_file = os.path.join(project_root, project_config["cluster"])
if not os.path.exists(cluster_file):
raise ValueError("'cluster' file does not exist "
"in {}".format(project_root))
cluster_file = os.path.join(project_root,
project_config["cluster"]["config"])
if not os.path.exists(cluster_file):
raise ValueError("'cluster' file does not exist "
"in {}".format(project_root))
if "environment" in project_config:
env = project_config["environment"]

View file

@ -25,8 +25,48 @@
"type": "string"
},
"cluster": {
"description": "Path to a .yaml cluster configuration file (relative to the project root)",
"type": "string"
"type": "object",
"properties": {
"config": {
"type": "string",
"description": "Path to a .yaml cluster configuration file (relative to the project root)"
},
"params": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"help": {
"type": "string"
},
"choices": {
"type": "array"
},
"default": {
},
"type": {
"type": "string",
"enum": [
"int",
"float",
"str"
]
}
},
"required": [
"name"
],
"additionalProperties": false
}
}
},
"required": [
"config"
],
"additionalProperties": false
},
"environment": {
"description": "The environment that needs to be set up to run the project",

View file

@ -6,7 +6,8 @@ name: {{name}}
# The URL of the repo this project is part of.
{{repo_string}}
cluster: {{cluster}}
cluster:
config: {{cluster}}
environment:
# dockerfile: The dockerfile to be built and ran the commands with.

View file

@ -4,4 +4,5 @@ description: "Test project for docker environment"
environment:
docker: "Dockerfile"
cluster: "cluster.yaml"
cluster:
config: cluster.yaml

View file

@ -1,3 +1,4 @@
name: testmissingyaml
cluster: "cluster.yaml"
cluster:
config: cluster.yaml

View file

@ -5,4 +5,5 @@ environment:
dockerimage: "some docker image"
cluster: "cluster.yaml"
cluster:
config: cluster.yaml

View file

@ -1,6 +1,7 @@
name: "project1"
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
requirements: requirements.txt

View file

@ -3,4 +3,5 @@ name: testproject2
environment:
requirements: "requirements.txt"
cluster: "cluster.yaml"
cluster:
config: cluster.yaml

View file

@ -5,7 +5,8 @@ name: commands-test
# description: A short description of the project.
repo: https://github.com/ray-project/not-exist
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
shell:

View file

@ -5,7 +5,8 @@ name: git-repo-pass
# description: A short description of the project.
repo: https://github.com/ray-project/not-exist
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
# dockerfile: The dockerfile to be built and ran the commands with.

View file

@ -5,7 +5,8 @@ name: invalid-config-fail
# description: A short description of the project.
# repo: The URL of the repo this project is part of.
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
# NOTE: The following is invalid because you can't have both dockerfile

View file

@ -5,7 +5,8 @@ name: project-pass
# description: A short description of the project.
# repo: The URL of the repo this project is part of.
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
# dockerfile: The dockerfile to be built and ran the commands with.

View file

@ -5,7 +5,8 @@ name: with-docker-fail
# description: A short description of the project.
# repo: The URL of the repo this project is part of.
cluster: ray-project/cluster.yaml
cluster:
config: ray-project/cluster.yaml
environment:
# dockerfile: The dockerfile to be built and ran the commands with.

View file

@ -7,4 +7,5 @@ environment:
- second command
- third command
cluster: "cluster.yaml"
cluster:
config: cluster.yaml