diff --git a/ci/jenkins_tests/run_tune_tests.sh b/ci/jenkins_tests/run_tune_tests.sh index 1b1cd5a8a..6931aa900 100755 --- a/ci/jenkins_tests/run_tune_tests.sh +++ b/ci/jenkins_tests/run_tune_tests.sh @@ -74,6 +74,9 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} python /ray/python/ray/tune/examples/logging_example.py \ --smoke-test +$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ + python /ray/python/ray/tune/examples/mlflow_example.py + $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ python /ray/python/ray/tune/examples/bayesopt_example.py \ --smoke-test diff --git a/doc/source/tune-usage.rst b/doc/source/tune-usage.rst index a24e9c2bf..f6953d162 100644 --- a/doc/source/tune-usage.rst +++ b/doc/source/tune-usage.rst @@ -486,7 +486,12 @@ You can pass in your own logging mechanisms to output logs in custom formats as loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2) ) -These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface `__. Tune has default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py `__ for implementation details. An example can be found in `logging_example.py `__. +These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface `__. Tune enables default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py `__ for implementation details. An example can be found in `logging_example.py `__. + +MLFlow +~~~~~~ + +Tune also provides a default logger for `MLFlow `_. You can install MLFlow via ``pip install mlflow``. An example can be found `mlflow_example.py `__. Note that this currently does not include artifact logging support. For this, you can use the native MLFlow APIs inside your Trainable definition. Uploading/Syncing ----------------- diff --git a/docker/examples/Dockerfile b/docker/examples/Dockerfile index 9cbc3ae78..a4358eec7 100644 --- a/docker/examples/Dockerfile +++ b/docker/examples/Dockerfile @@ -12,5 +12,6 @@ RUN pip install -U h5py # Mutes FutureWarnings RUN pip install --upgrade bayesian-optimization RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace +RUN pip install -U mlflow RUN pip install -U pytest-remotedata>=0.3.1 RUN conda install pytorch-cpu torchvision-cpu -c pytorch diff --git a/docker/tune_test/Dockerfile b/docker/tune_test/Dockerfile index f30e156a1..dbd12ed8b 100644 --- a/docker/tune_test/Dockerfile +++ b/docker/tune_test/Dockerfile @@ -14,6 +14,7 @@ RUN pip install gym[atari]==0.10.11 opencv-python-headless tensorflow lz4 keras RUN pip install --upgrade bayesian-optimization RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace +RUN pip install -U mlflow RUN pip install -U pytest-remotedata>=0.3.1 RUN conda install pytorch-cpu torchvision-cpu -c pytorch diff --git a/python/ray/tune/examples/mlflow_example.py b/python/ray/tune/examples/mlflow_example.py new file mode 100644 index 000000000..5eb77fd08 --- /dev/null +++ b/python/ray/tune/examples/mlflow_example.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +"""Simple MLFLow Logger example. + +This uses a simple MLFlow logger. One limitation of this is that there is +no artifact support; to save artifacts with Tune and MLFlow, you will need to +start a MLFlow run inside the Trainable function/class. + +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import mlflow +from mlflow.tracking import MlflowClient +import time +import random + +from ray import tune +from ray.tune.logger import MLFLowLogger, DEFAULT_LOGGERS + + +def easy_objective(config): + for i in range(20): + result = dict( + timesteps_total=i, + mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3)) + tune.track.log(**result) + time.sleep(0.02) + tune.track.log(done=True) + + +if __name__ == "__main__": + client = MlflowClient() + experiment_id = client.create_experiment("test") + + trials = tune.run( + easy_objective, + name="mlflow", + num_samples=5, + loggers=DEFAULT_LOGGERS + (MLFLowLogger, ), + config={ + "mlflow_experiment_id": experiment_id, + "width": tune.sample_from( + lambda spec: 10 + int(90 * random.random())), + "height": tune.sample_from(lambda spec: int(100 * random.random())) + }) + + df = mlflow.search_runs([experiment_id]) + print(df) diff --git a/python/ray/tune/integration/keras.py b/python/ray/tune/integration/keras.py index 197a7eef9..587728ef6 100644 --- a/python/ray/tune/integration/keras.py +++ b/python/ray/tune/integration/keras.py @@ -7,7 +7,17 @@ from ray.tune import track class TuneReporterCallback(keras.callbacks.Callback): + """Tune Callback for Keras.""" + def __init__(self, reporter=None, freq="batch", logs={}): + """Initializer. + + Args: + reporter (StatusReporter|tune.track.log|None): Tune object for + returning results. + freq (str): Sets the frequency of reporting intermediate results. + One of ["batch", "epoch"]. + """ self.reporter = reporter or track.log self.iteration = 0 if freq not in ["batch", "epoch"]: diff --git a/python/ray/tune/logger.py b/python/ray/tune/logger.py index 54a79f43c..c14936d29 100644 --- a/python/ray/tune/logger.py +++ b/python/ray/tune/logger.py @@ -72,6 +72,34 @@ class NoopLogger(Logger): pass +class MLFLowLogger(Logger): + """MLFlow logger. + + Requires the experiment configuration to have a MLFlow Experiment ID + or manually set the proper environment variables. + + """ + + def _init(self): + from mlflow.tracking import MlflowClient + client = MlflowClient() + run = client.create_run(self.config.get("mlflow_experiment_id")) + self._run_id = run.info.run_id + for key, value in self.config.items(): + client.log_param(self._run_id, key, value) + self.client = client + + def on_result(self, result): + for key, value in result.items(): + if not isinstance(value, float): + continue + self.client.log_metric( + self._run_id, key, value, step=result.get(TRAINING_ITERATION)) + + def close(self): + self.client.set_terminated(self._run_id) + + class JsonLogger(Logger): def _init(self): self.update_config(self.config)