From 20a155d03d7b4101ea7e3acf111bc1951b40ebe2 Mon Sep 17 00:00:00 2001
From: markgoodhead <goodhead@gmail.com>
Date: Sun, 17 Mar 2019 06:11:30 +0000
Subject: [PATCH] [Tune] Support initial parameters for SkOpt search algorithm
 (#4341)

Similar to the recent change to HyperOpt (#https://github.com/ray-project/ray/pull/3944) this implements both:
1. The ability to pass in initial parameter suggestion(s) to be run through Tune first, before using the Optimiser's suggestions. This is for when you already know good parameters and want the Optimiser to be aware of these when it makes future parameter suggestions.
2. The same as 1. but if you already know the reward value for those parameters you can pass these in as well to avoid having to re-run the experiments. In the future it would be nice for Tune to potentially support this functionality directly by loading previously run Tune experiments and initialising the Optimiser with these (kind of like a top level checkpointing functionality) but this feature allows users to do this manually for now.
---
 python/ray/tune/examples/skopt_example.py | 16 +++++-
 python/ray/tune/suggest/skopt.py          | 64 +++++++++++++++++++++--
 2 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py
index 71ac60951..a120a329d 100644
--- a/python/ray/tune/examples/skopt_example.py
+++ b/python/ray/tune/examples/skopt_example.py
@@ -48,9 +48,23 @@ if __name__ == "__main__":
         }
     }
     optimizer = Optimizer([(0, 20), (-100, 100)])
+    previously_run_params = [[10, 0], [15, -20]]
+    known_rewards = [-189, -1144]
     algo = SkOptSearch(
         optimizer, ["width", "height"],
         max_concurrent=4,
-        reward_attr="neg_mean_loss")
+        reward_attr="neg_mean_loss",
+        points_to_evaluate=previously_run_params,
+        evaluated_rewards=known_rewards)
+    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+    run_experiments(config, search_alg=algo, scheduler=scheduler)
+
+    # Now run the experiment without known rewards
+
+    algo = SkOptSearch(
+        optimizer, ["width", "height"],
+        max_concurrent=4,
+        reward_attr="neg_mean_loss",
+        points_to_evaluate=previously_run_params)
     scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
     run_experiments(config, search_alg=algo, scheduler=scheduler)
diff --git a/python/ray/tune/suggest/skopt.py b/python/ray/tune/suggest/skopt.py
index 039c9d015..cff340f41 100644
--- a/python/ray/tune/suggest/skopt.py
+++ b/python/ray/tune/suggest/skopt.py
@@ -10,6 +10,36 @@ except Exception:
 from ray.tune.suggest.suggestion import SuggestionAlgorithm
 
 
+def _validate_warmstart(parameter_names, points_to_evaluate,
+                        evaluated_rewards):
+    if points_to_evaluate:
+        if not isinstance(points_to_evaluate, list):
+            raise TypeError(
+                "points_to_evaluate expected to be a list, got {}.".format(
+                    type(points_to_evaluate)))
+        for point in points_to_evaluate:
+            if not isinstance(point, list):
+                raise TypeError(
+                    "points_to_evaluate expected to include list, got {}.".
+                    format(point))
+
+            if not len(point) == len(parameter_names):
+                raise ValueError("Dim of point {}".format(point) +
+                                 " and parameter_names {}".format(
+                                     parameter_names) + " do not match.")
+
+    if points_to_evaluate and evaluated_rewards:
+        if not isinstance(evaluated_rewards, list):
+            raise TypeError(
+                "evaluated_rewards expected to be a list, got {}.".format(
+                    type(evaluated_rewards)))
+        if not len(evaluated_rewards) == len(points_to_evaluate):
+            raise ValueError(
+                "Dim of evaluated_rewards {}".format(evaluated_rewards) +
+                " and points_to_evaluate {}".format(points_to_evaluate) +
+                " do not match.")
+
+
 class SkOptSearch(SuggestionAlgorithm):
     """A wrapper around skopt to provide trial suggestions.
 
@@ -24,10 +54,22 @@ class SkOptSearch(SuggestionAlgorithm):
             to 10.
         reward_attr (str): The training result objective value attribute.
             This refers to an increasing value.
+        points_to_evaluate (list of lists): A list of points you'd like to run
+            first before sampling from the optimiser, e.g. these could be
+            parameter configurations you already know work well to help
+            the optimiser select good values. Each point is a list of the
+            parameters using the order definition given by parameter_names.
+        evaluated_rewards (list): If you have previously evaluated the
+            parameters passed in as points_to_evaluate you can avoid
+            re-running those trials by passing in the reward attributes
+            as a list so the optimiser can be told the results without
+            needing to re-compute the trial. Must be the same length as
+            points_to_evaluate. (See tune/examples/skopt_example.py)
 
     Example:
         >>> from skopt import Optimizer
         >>> optimizer = Optimizer([(0,20),(-100,100)])
+        >>> current_best_params = [[10, 0], [15, -20]]
         >>> config = {
         >>>     "my_exp": {
         >>>         "run": "exp",
@@ -38,8 +80,10 @@ class SkOptSearch(SuggestionAlgorithm):
         >>>     }
         >>> }
         >>> algo = SkOptSearch(optimizer,
-        >>>     ["width", "height"], max_concurrent=4,
-        >>>     reward_attr="neg_mean_loss")
+        >>>     ["width", "height"],
+        >>>     max_concurrent=4,
+        >>>     reward_attr="neg_mean_loss",
+        >>>     points_to_evaluate=current_best_params)
     """
 
     def __init__(self,
@@ -47,11 +91,21 @@ class SkOptSearch(SuggestionAlgorithm):
                  parameter_names,
                  max_concurrent=10,
                  reward_attr="episode_reward_mean",
+                 points_to_evaluate=None,
+                 evaluated_rewards=None,
                  **kwargs):
         assert skopt is not None, """skopt must be installed!
             You can install Skopt with the command:
             `pip install scikit-optimize`."""
         assert type(max_concurrent) is int and max_concurrent > 0
+        _validate_warmstart(parameter_names, points_to_evaluate,
+                            evaluated_rewards)
+
+        self._initial_points = []
+        if points_to_evaluate and evaluated_rewards:
+            optimizer.tell(points_to_evaluate, evaluated_rewards)
+        elif points_to_evaluate:
+            self._initial_points = points_to_evaluate
         self._max_concurrent = max_concurrent
         self._parameters = parameter_names
         self._reward_attr = reward_attr
@@ -62,7 +116,11 @@ class SkOptSearch(SuggestionAlgorithm):
     def _suggest(self, trial_id):
         if self._num_live_trials() >= self._max_concurrent:
             return None
-        suggested_config = self._skopt_opt.ask()
+        if self._initial_points:
+            suggested_config = self._initial_points[0]
+            del self._initial_points[0]
+        else:
+            suggested_config = self._skopt_opt.ask()
         self._live_trial_mapping[trial_id] = suggested_config
         return dict(zip(self._parameters, suggested_config))