mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[tune] MLFlow Logger (#5438)
This commit is contained in:
parent
16acd18ce6
commit
d7b309223b
7 changed files with 98 additions and 1 deletions
|
@ -74,6 +74,9 @@ $SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE}
|
||||||
python /ray/python/ray/tune/examples/logging_example.py \
|
python /ray/python/ray/tune/examples/logging_example.py \
|
||||||
--smoke-test
|
--smoke-test
|
||||||
|
|
||||||
|
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||||
|
python /ray/python/ray/tune/examples/mlflow_example.py
|
||||||
|
|
||||||
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
$SUPPRESS_OUTPUT docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \
|
||||||
python /ray/python/ray/tune/examples/bayesopt_example.py \
|
python /ray/python/ray/tune/examples/bayesopt_example.py \
|
||||||
--smoke-test
|
--smoke-test
|
||||||
|
|
|
@ -486,7 +486,12 @@ You can pass in your own logging mechanisms to output logs in custom formats as
|
||||||
loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2)
|
loggers=DEFAULT_LOGGERS + (CustomLogger1, CustomLogger2)
|
||||||
)
|
)
|
||||||
|
|
||||||
These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface <tune-package-ref.html#ray.tune.logger.Logger>`__. Tune has default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/logger.py>`__ for implementation details. An example can be found in `logging_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__.
|
These loggers will be called along with the default Tune loggers. All loggers must inherit the `Logger interface <tune-package-ref.html#ray.tune.logger.Logger>`__. Tune enables default loggers for Tensorboard, CSV, and JSON formats. You can also check out `logger.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/logger.py>`__ for implementation details. An example can be found in `logging_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/logging_example.py>`__.
|
||||||
|
|
||||||
|
MLFlow
|
||||||
|
~~~~~~
|
||||||
|
|
||||||
|
Tune also provides a default logger for `MLFlow <https://mlflow.org>`_. You can install MLFlow via ``pip install mlflow``. An example can be found `mlflow_example.py <https://github.com/ray-project/ray/blob/master/python/ray/tune/examples/mlflow_example.py>`__. Note that this currently does not include artifact logging support. For this, you can use the native MLFlow APIs inside your Trainable definition.
|
||||||
|
|
||||||
Uploading/Syncing
|
Uploading/Syncing
|
||||||
-----------------
|
-----------------
|
||||||
|
|
|
@ -12,5 +12,6 @@ RUN pip install -U h5py # Mutes FutureWarnings
|
||||||
RUN pip install --upgrade bayesian-optimization
|
RUN pip install --upgrade bayesian-optimization
|
||||||
RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
|
RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
|
||||||
RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace
|
RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace
|
||||||
|
RUN pip install -U mlflow
|
||||||
RUN pip install -U pytest-remotedata>=0.3.1
|
RUN pip install -U pytest-remotedata>=0.3.1
|
||||||
RUN conda install pytorch-cpu torchvision-cpu -c pytorch
|
RUN conda install pytorch-cpu torchvision-cpu -c pytorch
|
||||||
|
|
|
@ -14,6 +14,7 @@ RUN pip install gym[atari]==0.10.11 opencv-python-headless tensorflow lz4 keras
|
||||||
RUN pip install --upgrade bayesian-optimization
|
RUN pip install --upgrade bayesian-optimization
|
||||||
RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
|
RUN pip install --upgrade git+git://github.com/hyperopt/hyperopt.git
|
||||||
RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace
|
RUN pip install --upgrade sigopt nevergrad scikit-optimize hpbandster ConfigSpace
|
||||||
|
RUN pip install -U mlflow
|
||||||
RUN pip install -U pytest-remotedata>=0.3.1
|
RUN pip install -U pytest-remotedata>=0.3.1
|
||||||
RUN conda install pytorch-cpu torchvision-cpu -c pytorch
|
RUN conda install pytorch-cpu torchvision-cpu -c pytorch
|
||||||
|
|
||||||
|
|
49
python/ray/tune/examples/mlflow_example.py
Normal file
49
python/ray/tune/examples/mlflow_example.py
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
"""Simple MLFLow Logger example.
|
||||||
|
|
||||||
|
This uses a simple MLFlow logger. One limitation of this is that there is
|
||||||
|
no artifact support; to save artifacts with Tune and MLFlow, you will need to
|
||||||
|
start a MLFlow run inside the Trainable function/class.
|
||||||
|
|
||||||
|
"""
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
|
||||||
|
import mlflow
|
||||||
|
from mlflow.tracking import MlflowClient
|
||||||
|
import time
|
||||||
|
import random
|
||||||
|
|
||||||
|
from ray import tune
|
||||||
|
from ray.tune.logger import MLFLowLogger, DEFAULT_LOGGERS
|
||||||
|
|
||||||
|
|
||||||
|
def easy_objective(config):
|
||||||
|
for i in range(20):
|
||||||
|
result = dict(
|
||||||
|
timesteps_total=i,
|
||||||
|
mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
|
||||||
|
tune.track.log(**result)
|
||||||
|
time.sleep(0.02)
|
||||||
|
tune.track.log(done=True)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
client = MlflowClient()
|
||||||
|
experiment_id = client.create_experiment("test")
|
||||||
|
|
||||||
|
trials = tune.run(
|
||||||
|
easy_objective,
|
||||||
|
name="mlflow",
|
||||||
|
num_samples=5,
|
||||||
|
loggers=DEFAULT_LOGGERS + (MLFLowLogger, ),
|
||||||
|
config={
|
||||||
|
"mlflow_experiment_id": experiment_id,
|
||||||
|
"width": tune.sample_from(
|
||||||
|
lambda spec: 10 + int(90 * random.random())),
|
||||||
|
"height": tune.sample_from(lambda spec: int(100 * random.random()))
|
||||||
|
})
|
||||||
|
|
||||||
|
df = mlflow.search_runs([experiment_id])
|
||||||
|
print(df)
|
|
@ -7,7 +7,17 @@ from ray.tune import track
|
||||||
|
|
||||||
|
|
||||||
class TuneReporterCallback(keras.callbacks.Callback):
|
class TuneReporterCallback(keras.callbacks.Callback):
|
||||||
|
"""Tune Callback for Keras."""
|
||||||
|
|
||||||
def __init__(self, reporter=None, freq="batch", logs={}):
|
def __init__(self, reporter=None, freq="batch", logs={}):
|
||||||
|
"""Initializer.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
reporter (StatusReporter|tune.track.log|None): Tune object for
|
||||||
|
returning results.
|
||||||
|
freq (str): Sets the frequency of reporting intermediate results.
|
||||||
|
One of ["batch", "epoch"].
|
||||||
|
"""
|
||||||
self.reporter = reporter or track.log
|
self.reporter = reporter or track.log
|
||||||
self.iteration = 0
|
self.iteration = 0
|
||||||
if freq not in ["batch", "epoch"]:
|
if freq not in ["batch", "epoch"]:
|
||||||
|
|
|
@ -72,6 +72,34 @@ class NoopLogger(Logger):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class MLFLowLogger(Logger):
|
||||||
|
"""MLFlow logger.
|
||||||
|
|
||||||
|
Requires the experiment configuration to have a MLFlow Experiment ID
|
||||||
|
or manually set the proper environment variables.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
def _init(self):
|
||||||
|
from mlflow.tracking import MlflowClient
|
||||||
|
client = MlflowClient()
|
||||||
|
run = client.create_run(self.config.get("mlflow_experiment_id"))
|
||||||
|
self._run_id = run.info.run_id
|
||||||
|
for key, value in self.config.items():
|
||||||
|
client.log_param(self._run_id, key, value)
|
||||||
|
self.client = client
|
||||||
|
|
||||||
|
def on_result(self, result):
|
||||||
|
for key, value in result.items():
|
||||||
|
if not isinstance(value, float):
|
||||||
|
continue
|
||||||
|
self.client.log_metric(
|
||||||
|
self._run_id, key, value, step=result.get(TRAINING_ITERATION))
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.client.set_terminated(self._run_id)
|
||||||
|
|
||||||
|
|
||||||
class JsonLogger(Logger):
|
class JsonLogger(Logger):
|
||||||
def _init(self):
|
def _init(self):
|
||||||
self.update_config(self.config)
|
self.update_config(self.config)
|
||||||
|
|
Loading…
Add table
Reference in a new issue