[tune] MLFlow Logger (#5438)

2025-03-05 10:01:43 -05:00 · 2019-08-14 15:58:18 -07:00 · 2019-08-14 15:58:18 -07:00 · d7b309223b
commit d7b309223b
parent 16acd18ce6
7 changed files with 98 additions and 1 deletions
--- a/ci/jenkins_tests/run_tune_tests.sh
+++ b/ci/jenkins_tests/run_tune_tests.sh
@ -74,6 +74,9 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
    python /ray/python/ray/tune/examples/logging_example.py \
    --smoke-test

+$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
+    python /ray/python/ray/tune/examples/mlflow_example.py
+
 $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
    python /ray/python/ray/tune/examples/bayesopt_example.py \
    --smoke-test
--- a/doc/source/tune-usage.rst
+++ b/doc/source/tune-usage.rst
@ -486,7 +486,12 @@ You can pass in your own logging mechanisms to output logs in custom formats as
        loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2)
    )

-These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface <tune-package-ref.html#ray.tune.logger.Logger>`__. Tune has default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/logger.py>`__ for implementation details. An example can be found in `logging_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__.
+These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface <tune-package-ref.html#ray.tune.logger.Logger>`__. Tune enables default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/logger.py>`__ for implementation details. An example can be found in `logging_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__.
+
+MLFlow
+~~~~~~
+
+Tune also provides a default logger for `MLFlow <https://mlflow.org>`_. You can install MLFlow via ``pip install mlflow``. An example can be found `mlflow_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mlflow_example.py>`__. Note that this currently does not include artifact logging support. For this, you can use the native MLFlow APIs inside your Trainable definition.

 Uploading/Syncing
 -----------------
--- a/docker/examples/Dockerfile
+++ b/docker/examples/Dockerfile
@ -12,5 +12,6 @@ RUN pip install -U h5py  # Mutes FutureWarnings
 RUN pip install --upgrade bayesian-optimization
 RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
 RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace
+RUN pip install -U mlflow
 RUN pip install -U pytest-remotedata>=0.3.1
 RUN conda install pytorch-cpu torchvision-cpu -c pytorch
--- a/docker/tune_test/Dockerfile
+++ b/docker/tune_test/Dockerfile
@ -14,6 +14,7 @@ RUN pip install gym[atari]==0.10.11 opencv-python-headless tensorflow lz4 keras
 RUN pip install --upgrade bayesian-optimization
 RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
 RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace
+RUN pip install -U mlflow
 RUN pip install -U pytest-remotedata>=0.3.1
 RUN conda install pytorch-cpu torchvision-cpu -c pytorch

--- a/python/ray/tune/examples/mlflow_example.py
+++ b/python/ray/tune/examples/mlflow_example.py
@ -0,0 +1,49 @@
+#!/usr/bin/env python
+"""Simple MLFLow Logger example.
+
+This uses a simple MLFlow logger. One limitation of this is that there is
+no artifact support; to save artifacts with Tune and MLFlow, you will need to
+start a MLFlow run inside the Trainable function/class.
+
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import mlflow
+from mlflow.tracking import MlflowClient
+import time
+import random
+
+from ray import tune
+from ray.tune.logger import MLFLowLogger, DEFAULT_LOGGERS
+
+
+def easy_objective(config):
+    for i in range(20):
+        result = dict(
+            timesteps_total=i,
+            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
+        tune.track.log(**result)
+        time.sleep(0.02)
+    tune.track.log(done=True)
+
+
+if __name__ == "__main__":
+    client = MlflowClient()
+    experiment_id = client.create_experiment("test")
+
+    trials = tune.run(
+        easy_objective,
+        name="mlflow",
+        num_samples=5,
+        loggers=DEFAULT_LOGGERS + (MLFLowLogger, ),
+        config={
+            "mlflow_experiment_id": experiment_id,
+            "width": tune.sample_from(
+                lambda spec: 10 + int(90 * random.random())),
+            "height": tune.sample_from(lambda spec: int(100 * random.random()))
+        })
+
+    df = mlflow.search_runs([experiment_id])
+    print(df)
--- a/python/ray/tune/integration/keras.py
+++ b/python/ray/tune/integration/keras.py
@ -7,7 +7,17 @@ from ray.tune import track


 class TuneReporterCallback(keras.callbacks.Callback):
+    """Tune Callback for Keras."""
+
    def __init__(self, reporter=None, freq="batch", logs={}):
+        """Initializer.
+
+        Args:
+            reporter (StatusReporter|tune.track.log|None): Tune object for
+                returning results.
+            freq (str): Sets the frequency of reporting intermediate results.
+                One of ["batch", "epoch"].
+        """
        self.reporter = reporter or track.log
        self.iteration = 0
        if freq not in ["batch", "epoch"]:
--- a/python/ray/tune/logger.py
+++ b/python/ray/tune/logger.py
@ -72,6 +72,34 @@ class NoopLogger(Logger):
        pass


+class MLFLowLogger(Logger):
+    """MLFlow logger.
+
+    Requires the experiment configuration to have a MLFlow Experiment ID
+    or manually set the proper environment variables.
+
+    """
+
+    def _init(self):
+        from mlflow.tracking import MlflowClient
+        client = MlflowClient()
+        run = client.create_run(self.config.get("mlflow_experiment_id"))
+        self._run_id = run.info.run_id
+        for key, value in self.config.items():
+            client.log_param(self._run_id, key, value)
+        self.client = client
+
+    def on_result(self, result):
+        for key, value in result.items():
+            if not isinstance(value, float):
+                continue
+            self.client.log_metric(
+                self._run_id, key, value, step=result.get(TRAINING_ITERATION))
+
+    def close(self):
+        self.client.set_terminated(self._run_id)
+
+
 class JsonLogger(Logger):
    def _init(self):
        self.update_config(self.config)