[tune] last-n-avg

Co-authored-by: Kai Fricke <kai@anyscale.com>
2025-03-06 02:21:39 -05:00 · 2020-06-03 05:06:04 +02:00 · 2020-06-03 05:06:04 +02:00 · f4ee3e76d8
commit f4ee3e76d8
parent 7c43991100
3 changed files with 85 additions and 36 deletions
--- a/python/ray/tune/analysis/experiment_analysis.py
+++ b/python/ray/tune/analysis/experiment_analysis.py
@ -220,34 +220,35 @@ class ExperimentAnalysis(Analysis):
        Args:
            metric (str): Key for trial info to order on.
            mode (str): One of [min, max].
-            scope (str): One of [all, last, avg]. If `scope=last`, only look at
-                each trial's final step for `metric`, and compare across
-                trials based on `mode=[min,max]`. If `scope=avg`, consider the
-                simple average over all steps for `metric` and compare across
-                trials based on `mode=[min,max]`. If `scope=all`, find each
-                trial's min/max score for `metric` based on `mode`, and
-                compare trials based on `mode=[min,max]`.
+            scope (str): One of [all, last, avg, last-5-avg, last-10-avg].
+                If `scope=last`, only look at each trial's final step for
+                `metric`, and compare across trials based on `mode=[min,max]`.
+                If `scope=avg`, consider the simple average over all steps
+                for `metric` and compare across trials based on
+                `mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`,
+                consider the simple average over the last 5 or 10 steps for
+                `metric` and compare across trials based on `mode=[min,max]`.
+                If `scope=all`, find each trial's min/max score for `metric`
+                based on `mode`, and compare trials based on `mode=[min,max]`.
        """
        if mode not in ["max", "min"]:
            raise ValueError(
                "ExperimentAnalysis: attempting to get best trial for "
                "metric {} for mode {} not in [\"max\", \"min\"]".format(
                    metric, mode))
-        if scope not in ["all", "last", "avg"]:
+        if scope not in ["all", "last", "avg", "last-5-avg", "last-10-avg"]:
            raise ValueError(
                "ExperimentAnalysis: attempting to get best trial for "
-                "metric {} for scope {} not in [\"all\", \"last\", \"avg\"]".
-                format(metric, scope))
+                "metric {} for scope {} not in [\"all\", \"last\", \"avg\", "
+                "\"last-5-avg\", \"last-10-avg\"]".format(metric, scope))
        best_trial = None
        best_metric_score = None
        for trial in self.trials:
            if metric not in trial.metric_analysis:
                continue

-            if scope == "last":
-                metric_score = trial.metric_analysis[metric]["last"]
-            elif scope == "avg":
-                metric_score = trial.metric_analysis[metric]["avg"]
+            if scope in ["last", "avg", "last-5-avg", "last-10-avg"]:
+                metric_score = trial.metric_analysis[metric][scope]
            else:
                metric_score = trial.metric_analysis[metric][mode]

@ -273,13 +274,16 @@ class ExperimentAnalysis(Analysis):
        Args:
            metric (str): Key for trial info to order on.
            mode (str): One of [min, max].
-            scope (str): One of [all, last, avg]. If `scope=last`, only look at
-                each trial's final step for `metric`, and compare across
-                trials based on `mode=[min,max]`. If `scope=avg`, consider the
-                simple average over all steps for `metric` and compare across
-                trials based on `mode=[min,max]`. If `scope=all`, find each
-                trial's min/max score for `metric` based on `mode`, and
-                compare trials based on `mode=[min,max]`.
+            scope (str): One of [all, last, avg, last-5-avg, last-10-avg].
+                If `scope=last`, only look at each trial's final step for
+                `metric`, and compare across trials based on `mode=[min,max]`.
+                If `scope=avg`, consider the simple average over all steps
+                for `metric` and compare across trials based on
+                `mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`,
+                consider the simple average over the last 5 or 10 steps for
+                `metric` and compare across trials based on `mode=[min,max]`.
+                If `scope=all`, find each trial's min/max score for `metric`
+                based on `mode`, and compare trials based on `mode=[min,max]`.
        """
        best_trial = self.get_best_trial(metric, mode, scope)
        return best_trial.config if best_trial else None
@ -292,13 +296,16 @@ class ExperimentAnalysis(Analysis):
        Args:
            metric (str): Key for trial info to order on.
            mode (str): One of [min, max].
-            scope (str): One of [all, last, avg]. If `scope=last`, only look at
-                each trial's final step for `metric`, and compare across
-                trials based on `mode=[min,max]`. If `scope=avg`, consider the
-                simple average over all steps for `metric` and compare across
-                trials based on `mode=[min,max]`. If `scope=all`, find each
-                trial's min/max score for `metric` based on `mode`, and
-                compare trials based on `mode=[min,max]`.
+            scope (str): One of [all, last, avg, last-5-avg, last-10-avg].
+                If `scope=last`, only look at each trial's final step for
+                `metric`, and compare across trials based on `mode=[min,max]`.
+                If `scope=avg`, consider the simple average over all steps
+                for `metric` and compare across trials based on
+                `mode=[min,max]`. If `scope=last-5-avg` or `scope=last-10-avg`,
+                consider the simple average over the last 5 or 10 steps for
+                `metric` and compare across trials based on `mode=[min,max]`.
+                If `scope=all`, find each trial's min/max score for `metric`
+                based on `mode`, and compare trials based on `mode=[min,max]`.
        """
        best_trial = self.get_best_trial(metric, mode, scope)
        return best_trial.logdir if best_trial else None
--- a/python/ray/tune/tests/test_experiment_analysis_mem.py
+++ b/python/ray/tune/tests/test_experiment_analysis_mem.py
@ -14,11 +14,11 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase):
    def setUp(self):
        class MockTrainable(Trainable):
            scores_dict = {
-                0: [5, 4, 0],
-                1: [4, 3, 1],
-                2: [2, 1, 8],
-                3: [9, 7, 6],
-                4: [7, 5, 3]
+                0: [5, 4, 4, 4, 4, 4, 4, 4, 0],
+                1: [4, 3, 3, 3, 3, 3, 3, 3, 1],
+                2: [2, 1, 1, 1, 1, 1, 1, 1, 8],
+                3: [9, 7, 7, 7, 7, 7, 7, 7, 6],
+                4: [7, 5, 5, 5, 5, 5, 5, 5, 3]
            }

            def _setup(self, config):
@ -53,7 +53,7 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase):
            self.MockTrainable,
            name="analysis_exp",
            local_dir=self.test_dir,
-            stop={"training_iteration": 3},
+            stop={"training_iteration": len(scores[0])},
            num_samples=1,
            config={"id": grid_search(list(range(5)))})

@ -67,12 +67,33 @@ class ExperimentAnalysisInMemorySuite(unittest.TestCase):
                                    "avg").metric_analysis["score"]["avg"]
        min_avg = ea.get_best_trial("score", "min",
                                    "avg").metric_analysis["score"]["avg"]
+        max_avg_5 = ea.get_best_trial(
+            "score", "max",
+            "last-5-avg").metric_analysis["score"]["last-5-avg"]
+        min_avg_5 = ea.get_best_trial(
+            "score", "min",
+            "last-5-avg").metric_analysis["score"]["last-5-avg"]
+        max_avg_10 = ea.get_best_trial(
+            "score", "max",
+            "last-10-avg").metric_analysis["score"]["last-10-avg"]
+        min_avg_10 = ea.get_best_trial(
+            "score", "min",
+            "last-10-avg").metric_analysis["score"]["last-10-avg"]
        self.assertEqual(max_all, max(scores_all))
        self.assertEqual(min_all, min(scores_all))
        self.assertEqual(max_last, max(scores_last))
+        self.assertNotEqual(max_last, max(scores_all))
+
        self.assertAlmostEqual(max_avg, max(np.mean(scores, axis=1)))
        self.assertAlmostEqual(min_avg, min(np.mean(scores, axis=1)))
-        self.assertNotEqual(max_last, max(scores_all))
+
+        self.assertAlmostEqual(max_avg_5, max(np.mean(scores[:, -5:], axis=1)))
+        self.assertAlmostEqual(min_avg_5, min(np.mean(scores[:, -5:], axis=1)))
+
+        self.assertAlmostEqual(max_avg_10, max(
+            np.mean(scores[:, -10:], axis=1)))
+        self.assertAlmostEqual(min_avg_10, min(
+            np.mean(scores[:, -10:], axis=1)))


 class AnalysisSuite(unittest.TestCase):
--- a/python/ray/tune/trial.py
+++ b/python/ray/tune/trial.py
@ -1,4 +1,5 @@
 import ray.cloudpickle as cloudpickle
+from collections import deque
 import copy
 from datetime import datetime
 import logging
@ -214,9 +215,14 @@ class Trial:
        self.last_result = {}
        self.last_update_time = -float("inf")

-        # stores in memory max/min/avg/last result for each metric by trial
+        # stores in memory max/min/avg/last-n-avg/last result for each
+        # metric by trial
        self.metric_analysis = {}

+        # keep a moving average over these last n steps
+        self.n_steps = [5, 10]
+        self.metric_n_steps = {}
+
        self.export_formats = export_formats
        self.status = Trial.PENDING
        self.start_time = None
@ -470,6 +476,7 @@ class Trial:
        self.last_result = result
        self.last_update_time = time.time()
        self.result_logger.on_result(self.last_result)
+
        for metric, value in flatten_dict(result).items():
            if isinstance(value, Number):
                if metric not in self.metric_analysis:
@ -479,6 +486,13 @@ class Trial:
                        "avg": value,
                        "last": value
                    }
+                    self.metric_n_steps[metric] = {}
+                    for n in self.n_steps:
+                        key = "last-{:d}-avg".format(n)
+                        self.metric_analysis[metric][key] = value
+                        # Store n as string for correct restore.
+                        self.metric_n_steps[metric][str(n)] = deque(
+                            [value], maxlen=n)
                else:
                    step = result["training_iteration"] or 1
                    self.metric_analysis[metric]["max"] = max(
@ -490,6 +504,13 @@ class Trial:
                        (step - 1) * self.metric_analysis[metric]["avg"])
                    self.metric_analysis[metric]["last"] = value

+                    for n in self.n_steps:
+                        key = "last-{:d}-avg".format(n)
+                        self.metric_n_steps[metric][str(n)].append(value)
+                        self.metric_analysis[metric][key] = sum(
+                            self.metric_n_steps[metric][str(n)]) / len(
+                                self.metric_n_steps[metric][str(n)])
+
    def get_trainable_cls(self):
        return get_trainable_cls(self.trainable_name)