From f1dcce5a472fba1c77c4aa023589689efbfeb4f6 Mon Sep 17 00:00:00 2001 From: Philipp Moritz Date: Mon, 26 Aug 2019 14:16:17 -0700 Subject: [PATCH] [projects] Add named commands to sessions (#5525) --- doc/source/projects.rst | 15 ++- .../.rayproject/project.yaml | 14 ++- python/ray/projects/schema.json | 21 +++++ python/ray/projects/scripts.py | 52 ++++++++-- .../commands-test/.rayproject/cluster.yaml | 0 .../commands-test/.rayproject/project.yaml | 26 +++++ .../git-repo-pass/.rayproject/project.yaml | 2 +- .../project-pass/.rayproject/project.yaml | 2 +- .../with-docker-fail/.rayproject/project.yaml | 2 +- python/ray/tests/test_projects.py | 94 +++++++++---------- 10 files changed, 159 insertions(+), 69 deletions(-) create mode 100644 python/ray/tests/project_files/session-tests/commands-test/.rayproject/cluster.yaml create mode 100644 python/ray/tests/project_files/session-tests/commands-test/.rayproject/project.yaml diff --git a/doc/source/projects.rst b/doc/source/projects.rst index 9f16de580..1cd9a0435 100644 --- a/doc/source/projects.rst +++ b/doc/source/projects.rst @@ -18,7 +18,7 @@ Quick start (CLI) # Create a new session from the given project. # Launch a cluster and run the appropriate command. - $ ray session start + $ ray session start [arguments] # Open a console for the given session. $ ray session attach @@ -63,8 +63,19 @@ Here is an example for a minimal project format: # and the environment is set up. # A command can also specify a cluster that overwrites the default cluster. commands: + - name: default + command: python default.py + help: "The command that will be executed if no command name is specified" - name: test - command: python test.py + command: python test.py --param1={{param1}} --param2={{param2}} + help: "A test command" + params: + - name: "param1" + help: "The first parameter" + # The following line indicates possible values this parameter can take. + choices: ["1", "2"] + - name: "param2" + help: "The second parameter" Project files have to adhere to the following schema: diff --git a/python/ray/projects/examples/pytorch-transformers/.rayproject/project.yaml b/python/ray/projects/examples/pytorch-transformers/.rayproject/project.yaml index 0d548819c..e62b98157 100644 --- a/python/ray/projects/examples/pytorch-transformers/.rayproject/project.yaml +++ b/python/ray/projects/examples/pytorch-transformers/.rayproject/project.yaml @@ -10,21 +10,25 @@ environment: requirements: requirements.txt commands: - - name: train_sst_2 + - name: train command: | - wget https://raw.githubusercontent.com/nyu-mll/GLUE-baselines/master/download_glue_data.py && \ - python download_glue_data.py -d /tmp -t SST && \ + wget https://raw.githubusercontent.com/ray-project/project-data/master/download_glue_data.py && \ + python download_glue_data.py -d /tmp -t {{dataset}} && \ python ./examples/run_glue.py \ --model_type bert \ --model_name_or_path bert-base-uncased \ - --task_name SST-2 \ + --task_name {{dataset}} \ --do_train \ --do_eval \ --do_lower_case \ - --data_dir /tmp/SST-2 \ + --data_dir /tmp/{{dataset}} \ --max_seq_length 128 \ --per_gpu_eval_batch_size=8 \ --per_gpu_train_batch_size=8 \ --learning_rate 2e-5 \ --num_train_epochs 3.0 \ --output_dir /tmp/output/ + params: + - name: "dataset" + help: "The GLUE dataset to fine-tune on" + choices: ["CoLA", "SST-2", "MRPC", "STS-B", "QQP", "MNLI", "QNLI", "RTE", "WNLI"] diff --git a/python/ray/projects/schema.json b/python/ray/projects/schema.json index 0f05179e6..8f37a5c9c 100644 --- a/python/ray/projects/schema.json +++ b/python/ray/projects/schema.json @@ -55,6 +55,27 @@ "command": { "description": "Shell command to run on the cluster", "type": "string" + }, + "params" : { + "type": "array", + "items": { + "description": "Possible parameters in the command", + "type": "object", + "properties": { + "name": { + "description": "Name of the parameter", + "type": "string" + }, + "help": { + "description": "Help string for the parameter", + "type": "string" + }, + "choices": { + "description": "Possible values the parameter can take", + "type": "array" + } + } + } } } } diff --git a/python/ray/projects/scripts.py b/python/ray/projects/scripts.py index 6a47a434a..ef8a63a52 100644 --- a/python/ray/projects/scripts.py +++ b/python/ray/projects/scripts.py @@ -2,6 +2,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import argparse import logging import os import sys @@ -153,10 +154,20 @@ def stop(): override_cluster_name=None) -@session_cli.command(help="Start a session based on current project config") -def start(): +@session_cli.command( + context_settings=dict(ignore_unknown_options=True, ), + help="Start a session based on current project config") +@click.argument("command", required=False) +@click.argument("args", nargs=-1, type=click.UNPROCESSED) +def start(command, args): project_definition = load_project_or_throw() + if command: + command_to_run = _get_command_to_run(command, project_definition, args) + else: + command_to_run = _get_command_to_run("default", project_definition, + args) + # Check for features we don't support right now project_environment = project_definition["environment"] need_docker = ("dockerfile" in project_environment @@ -202,9 +213,9 @@ def start(): _setup_environment( cluster_yaml, project_definition["environment"], cwd=working_directory) - logger.info("[4/4] Running commands") - _run_commands( - cluster_yaml, project_definition["commands"], cwd=working_directory) + logger.info("[4/4] Running command") + logger.debug("Running {}".format(command)) + session_exec_cluster(cluster_yaml, command_to_run, cwd=working_directory) def session_exec_cluster(cluster_yaml, cmd, cwd=None): @@ -249,7 +260,30 @@ def _setup_environment(cluster_yaml, project_environment, cwd): session_exec_cluster(cluster_yaml, cmd, cwd=cwd) -def _run_commands(cluster_yaml, commands, cwd): - for cmd in commands: - logger.debug("Running {}".format(cmd["name"])) - session_exec_cluster(cluster_yaml, cmd["command"], cwd=cwd) +def _get_command_to_run(command, project_definition, args): + command_to_run = None + params = None + + for command_definition in project_definition["commands"]: + if command_definition["name"] == command: + command_to_run = command_definition["command"] + params = command_definition.get("params", []) + if not command_to_run: + raise click.ClickException( + "Cannot find the command '" + command + + "' in commmands section of the project file.") + + # Build argument parser dynamically to parse parameter arguments. + parser = argparse.ArgumentParser(prog=command) + for param in params: + parser.add_argument( + "--" + param["name"], + required=True, + help=param.get("help"), + choices=param.get("choices")) + + result = parser.parse_args(list(args)) + for key, val in result.__dict__.items(): + command_to_run = command_to_run.replace("{{" + key + "}}", val) + + return command_to_run diff --git a/python/ray/tests/project_files/session-tests/commands-test/.rayproject/cluster.yaml b/python/ray/tests/project_files/session-tests/commands-test/.rayproject/cluster.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/python/ray/tests/project_files/session-tests/commands-test/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/commands-test/.rayproject/project.yaml new file mode 100644 index 000000000..c000ea81b --- /dev/null +++ b/python/ray/tests/project_files/session-tests/commands-test/.rayproject/project.yaml @@ -0,0 +1,26 @@ +# This file is generated by `ray project create`. + +name: commands-test + +# description: A short description of the project. +repo: https://github.com/ray-project/not-exist + +cluster: .rayproject/cluster.yaml + +environment: + shell: + - echo "Setting up" + +commands: + - name: first + command: echo "Starting ray job with {{a}} and {{b}}" + params: + - name: a + help: "This is the first parameter" + choices: ["1", "2"] + - name: b + help: "This is the second parameter" + choices: ["1", "2"] + + - name: second + command: echo "Some command" diff --git a/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml index e3dcc8f03..bbb8a39dc 100644 --- a/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml +++ b/python/ray/tests/project_files/session-tests/git-repo-pass/.rayproject/project.yaml @@ -16,5 +16,5 @@ environment: - echo "Setting up the environment" commands: - - name: first-command + - name: default command: echo "Starting ray job" diff --git a/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml index 853096da8..dd64916e5 100644 --- a/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml +++ b/python/ray/tests/project_files/session-tests/project-pass/.rayproject/project.yaml @@ -16,5 +16,5 @@ environment: - echo "Setting up the environment" commands: - - name: first-command + - name: default command: echo "Starting ray job" diff --git a/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml b/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml index fee651853..38b5e6509 100644 --- a/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml +++ b/python/ray/tests/project_files/session-tests/with-docker-fail/.rayproject/project.yaml @@ -15,5 +15,5 @@ environment: - echo "Setting up the environment" commands: - - name: first-command + - name: default command: echo "Starting ray job" diff --git a/python/ray/tests/test_projects.py b/python/ray/tests/test_projects.py index 9ea319a60..cd285b0f0 100644 --- a/python/ray/tests/test_projects.py +++ b/python/ray/tests/test_projects.py @@ -87,10 +87,9 @@ def _chdir_and_back(d): os.chdir(old_dir) -def test_session_start_default_project(): +def run_test_project(project_dir, command, args): # Run the CLI commands with patching - test_dir = os.path.join(TEST_DIR, - "project_files/session-tests/project-pass") + test_dir = os.path.join(TEST_DIR, "project_files", project_dir) with _chdir_and_back(test_dir): runner = CliRunner() with patch.multiple( @@ -99,11 +98,17 @@ def test_session_start_default_project(): rsync=DEFAULT, exec_cluster=DEFAULT, ) as mock_calls: - result = runner.invoke(start, []) - assert result.exit_code == 0 + result = runner.invoke(command, args) + + return result, mock_calls, test_dir + + +def test_session_start_default_project(): + result, mock_calls, test_dir = run_test_project( + "session-tests/project-pass", start, []) - # Check we are calling autoscaler correctly loaded_project = ray.projects.load_project(test_dir) + assert result.exit_code == 0 # Part 1/3: Cluster Launching Call create_or_update_cluster_call = mock_calls["create_or_update_cluster"] @@ -150,39 +155,20 @@ def test_session_start_default_project(): def test_session_start_docker_fail(): - # Run the CLI commands with patching - test_dir = os.path.join(TEST_DIR, - "project_files/session-tests/with-docker-fail") - with _chdir_and_back(test_dir): - runner = CliRunner() - with patch.multiple( - "ray.projects.scripts", - create_or_update_cluster=DEFAULT, - rsync=DEFAULT, - exec_cluster=DEFAULT, - ) as _: - result = runner.invoke(start, []) - assert result.exit_code == 1 - assert ("Docker support in session is currently " - "not implemented") in result.output + result, _, _ = run_test_project("session-tests/with-docker-fail", start, + []) + + assert result.exit_code == 1 + assert ("Docker support in session is currently " + "not implemented") in result.output def test_session_git_repo_cloned(): - # Run the CLI commands with patching - test_dir = os.path.join(TEST_DIR, - "project_files/session-tests/git-repo-pass") - with _chdir_and_back(test_dir): - runner = CliRunner() - with patch.multiple( - "ray.projects.scripts", - create_or_update_cluster=DEFAULT, - rsync=DEFAULT, - exec_cluster=DEFAULT, - ) as mock_calls: - result = runner.invoke(start, []) - assert result.exit_code == 0 + result, mock_calls, test_dir = run_test_project( + "session-tests/git-repo-pass", start, []) loaded_project = ray.projects.load_project(test_dir) + assert result.exit_code == 0 exec_cluster_call = mock_calls["exec_cluster"] commands_executed = [] @@ -197,19 +183,27 @@ def test_session_git_repo_cloned(): def test_session_invalid_config_errored(): - # Run the CLI commands with patching - test_dir = os.path.join(TEST_DIR, - "project_files/session-tests/invalid-config-fail") - with _chdir_and_back(test_dir): - runner = CliRunner() - with patch.multiple( - "ray.projects.scripts", - create_or_update_cluster=DEFAULT, - rsync=DEFAULT, - exec_cluster=DEFAULT, - ) as _: - result = runner.invoke(start, []) - assert result.exit_code == 1 - assert "validation failed" in result.output - # check that we are displaying actional error message - assert "ray project validate" in result.output + result, _, _ = run_test_project("session-tests/invalid-config-fail", start, + []) + + assert result.exit_code == 1 + assert "validation failed" in result.output + # check that we are displaying actional error message + assert "ray project validate" in result.output + + +def test_session_create_command(): + result, mock_calls, test_dir = run_test_project( + "session-tests/commands-test", start, + ["first", "--a", "1", "--b", "2"]) + + # Verify the project can be loaded. + ray.projects.load_project(test_dir) + assert result.exit_code == 0 + + exec_cluster_call = mock_calls["exec_cluster"] + found_command = False + for _, kwargs in exec_cluster_call.call_args_list: + if "Starting ray job with 1 and 2" in kwargs["cmd"]: + found_command = True + assert found_command