[RLlib] Cleanup some deprecated metric keys and classes. (#26036)

2025-03-04 17:41:43 -05:00 · 2022-06-23 21:30:01 +02:00 · 2022-06-23 21:30:01 +02:00 · 59a967a3a0
commit 59a967a3a0
parent 33b30aed15
20 changed files with 146 additions and 105 deletions
--- a/rllib/algorithms/a2c/a2c.py
+++ b/rllib/algorithms/a2c/a2c.py
@ -4,10 +4,6 @@ from typing import Optional

 from ray.rllib.algorithms.algorithm import Algorithm
 from ray.rllib.algorithms.a3c.a3c import A3CConfig, A3C
-from ray.rllib.execution.common import (
-    STEPS_TRAINED_COUNTER,
-    STEPS_TRAINED_THIS_ITER_COUNTER,
-)
 from ray.rllib.execution.rollout_ops import (
    synchronous_parallel_sample,
 )
@ -18,8 +14,10 @@ from ray.rllib.utils.metrics import (
    APPLY_GRADS_TIMER,
    COMPUTE_GRADS_TIMER,
    NUM_AGENT_STEPS_SAMPLED,
+    NUM_AGENT_STEPS_TRAINED,
    NUM_ENV_STEPS_SAMPLED,
-    WORKER_UPDATE_TIMER,
+    NUM_ENV_STEPS_TRAINED,
+    SYNCH_WORKER_WEIGHTS_TIMER,
 )
 from ray.rllib.utils.typing import (
    PartialAlgorithmConfigDict,
@ -188,8 +186,8 @@ class A2C(A3C):
        )
        if self._num_microbatches >= num_microbatches:
            # Update counters.
-            self._counters[STEPS_TRAINED_COUNTER] += self._microbatches_counts
-            self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = self._microbatches_counts
+            self._counters[NUM_ENV_STEPS_TRAINED] += self._microbatches_counts
+            self._counters[NUM_AGENT_STEPS_TRAINED] += self._microbatches_counts

            # Apply gradients.
            apply_timer = self._timers[APPLY_GRADS_TIMER]
@ -206,7 +204,7 @@ class A2C(A3C):
            global_vars = {
                "timestep": self._counters[NUM_AGENT_STEPS_SAMPLED],
            }
-            with self._timers[WORKER_UPDATE_TIMER]:
+            with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                self.workers.sync_weights(
                    policies=self.workers.local_worker().get_policies_to_train(),
                    global_vars=global_vars,
--- a/rllib/algorithms/algorithm.py
+++ b/rllib/algorithms/algorithm.py
@ -42,7 +42,9 @@ from ray.rllib.evaluation.metrics import (
 )
 from ray.rllib.evaluation.rollout_worker import RolloutWorker
 from ray.rllib.evaluation.worker_set import WorkerSet
-from ray.rllib.execution.common import WORKER_UPDATE_TIMER
+from ray.rllib.execution.common import (
+    STEPS_TRAINED_THIS_ITER_COUNTER,  # TODO: Backward compatibility.
+)
 from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
 from ray.rllib.execution.train_ops import multi_gpu_train_one_step, train_one_step
 from ray.rllib.offline import get_offline_io_resource_bundles
@ -73,6 +75,7 @@ from ray.rllib.utils.metrics import (
    NUM_ENV_STEPS_SAMPLED,
    NUM_ENV_STEPS_SAMPLED_THIS_ITER,
    NUM_ENV_STEPS_TRAINED,
+    SYNCH_WORKER_WEIGHTS_TIMER,
    TRAINING_ITERATION_TIMER,
 )
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
@ -569,7 +572,6 @@ class Algorithm(Trainable):
        # Results dict for training (and if appolicable: evaluation).
        results: ResultDict = {}

-        self._rollout_worker_metrics = []
        local_worker = (
            self.workers.local_worker()
            if hasattr(self.workers, "local_worker")
@ -593,13 +595,12 @@ class Algorithm(Trainable):
            results.update(self._run_one_evaluation(train_future=None))

        # Collect rollout worker metrics.
-        episodes, self._episodes_to_be_collected = collect_episodes(
+        episodes_this_iter, self._episodes_to_be_collected = collect_episodes(
            local_worker,
            self._remote_workers_for_metrics,
            self._episodes_to_be_collected,
            timeout_seconds=self.config["metrics_episode_collection_timeout_s"],
        )
-        self._rollout_worker_metrics.extend(episodes)

        # Attach latest available evaluation results to train results,
        # if necessary.
@ -613,9 +614,10 @@ class Algorithm(Trainable):
            # Sync filters on workers.
            self._sync_filters_if_needed(self.workers)

-            # Collect worker metrics.
+            # Collect worker metrics and add combine them with `results`.
            if self.config["_disable_execution_plan_api"]:
                results = self._compile_iteration_results(
+                    episodes_this_iter=episodes_this_iter,
                    step_ctx=train_iter_ctx,
                    iteration_results=results,
                )
@ -780,19 +782,20 @@ class Algorithm(Trainable):
                            < units_left_to_do
                        ]
                    )
-                    agent_steps_this_iter = sum(b.agent_steps() for b in batches)
-                    env_steps_this_iter = sum(b.env_steps() for b in batches)
+                    _agent_steps = sum(b.agent_steps() for b in batches)
+                    _env_steps = sum(b.env_steps() for b in batches)
                    # 1 episode per returned batch.
                    if unit == "episodes":
                        num_units_done += len(batches)
                    # n timesteps per returned batch.
                    else:
                        num_units_done += (
-                            agent_steps_this_iter
-                            if self._by_agent_steps
-                            else env_steps_this_iter
+                            _agent_steps if self._by_agent_steps else _env_steps
                        )

+                    agent_steps_this_iter += _agent_steps
+                    env_steps_this_iter += _env_steps
+
                    logger.info(
                        f"Ran round {round_} of parallel evaluation "
                        f"({num_units_done}/{duration if not auto else '?'} "
@ -862,7 +865,7 @@ class Algorithm(Trainable):
        global_vars = {
            "timestep": self._counters[NUM_ENV_STEPS_SAMPLED],
        }
-        with self._timers[WORKER_UPDATE_TIMER]:
+        with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
            self.workers.sync_weights(global_vars=global_vars)

        return train_results
@ -2366,7 +2369,9 @@ class Algorithm(Trainable):
                * eval_cfg["num_envs_per_worker"]
            )

-    def _compile_iteration_results(self, *, step_ctx, iteration_results=None):
+    def _compile_iteration_results(
+        self, *, episodes_this_iter, step_ctx, iteration_results=None
+    ):
        # Return dict.
        results: ResultDict = {}
        iteration_results = iteration_results or {}
@ -2382,18 +2387,33 @@ class Algorithm(Trainable):
        # Learner info.
        results["info"] = {LEARNER_INFO: iteration_results}

-        episodes = self._rollout_worker_metrics
-        orig_episodes = list(episodes)
-        missing = self.config["metrics_num_episodes_for_smoothing"] - len(episodes)
+        # Calculate how many (if any) of older, historical episodes we have to add to
+        # `episodes_this_iter` in order to reach the required smoothing window.
+        episodes_for_metrics = episodes_this_iter[:]
+        missing = self.config["metrics_num_episodes_for_smoothing"] - len(
+            episodes_this_iter
+        )
+        # We have to add some older episodes to reach the smoothing window size.
        if missing > 0:
-            episodes = self._episode_history[-missing:] + episodes
-            assert len(episodes) <= self.config["metrics_num_episodes_for_smoothing"]
-        self._episode_history.extend(orig_episodes)
+            episodes_for_metrics = self._episode_history[-missing:] + episodes_this_iter
+            assert (
+                len(episodes_for_metrics)
+                <= self.config["metrics_num_episodes_for_smoothing"]
+            )
+        # Note that when there are more than `metrics_num_episodes_for_smoothing`
+        # episodes in `episodes_for_metrics`, leave them as-is. In this case, we'll
+        # compute the stats over that larger number.
+
+        # Add new episodes to our history and make sure it doesn't grow larger than
+        # needed.
+        self._episode_history.extend(episodes_this_iter)
        self._episode_history = self._episode_history[
            -self.config["metrics_num_episodes_for_smoothing"] :
        ]
        results["sampler_results"] = summarize_episodes(
-            episodes, orig_episodes, self.config["keep_per_episode_custom_metrics"]
+            episodes_for_metrics,
+            episodes_this_iter,
+            self.config["keep_per_episode_custom_metrics"],
        )
        # TODO: Don't dump sampler results into top-level.
        results.update(results["sampler_results"])
@ -2413,11 +2433,15 @@ class Algorithm(Trainable):
            results[NUM_AGENT_STEPS_TRAINED + "_this_iter"] = step_ctx.trained
            # TODO: For CQL and other algos, count by trained steps.
            results["timesteps_total"] = self._counters[NUM_AGENT_STEPS_SAMPLED]
+            # TODO: Backward compatibility.
+            results[STEPS_TRAINED_THIS_ITER_COUNTER] = step_ctx.trained
        else:
            results[NUM_ENV_STEPS_SAMPLED + "_this_iter"] = step_ctx.sampled
            results[NUM_ENV_STEPS_TRAINED + "_this_iter"] = step_ctx.trained
            # TODO: For CQL and other algos, count by trained steps.
            results["timesteps_total"] = self._counters[NUM_ENV_STEPS_SAMPLED]
+            # TODO: Backward compatibility.
+            results[STEPS_TRAINED_THIS_ITER_COUNTER] = step_ctx.trained
        # TODO: Backward compatibility.
        results["agent_timesteps_total"] = self._counters[NUM_AGENT_STEPS_SAMPLED]

--- a/rllib/algorithms/algorithm_config.py
+++ b/rllib/algorithms/algorithm_config.py
@ -1111,7 +1111,14 @@ class AlgorithmConfig:
            metrics_episode_collection_timeout_s: Wait for metric batches for at most
                this many seconds. Those that have not returned in time will be
                collected in the next train iteration.
-            metrics_num_episodes_for_smoothing: Smooth metrics over this many episodes.
+            metrics_num_episodes_for_smoothing: Smooth rollout metrics over this many
+                episodes, if possible.
+                In case rollouts (sample collection) just started, there may be fewer
+                than this many episodes in the buffer and we'll compute metrics
+                over this smaller number of available episodes.
+                In case there are more than this many episodes collected in a single
+                training iteration, use all of these episodes for metrics computation,
+                meaning don't ever cut any "excess" episodes.
            min_time_s_per_iteration: Minimum time to accumulate within a single
                `train()` call. This value does not affect learning,
                only the number of times `Algorithm.training_step()` is called by
--- a/rllib/algorithms/apex_dqn/apex_dqn.py
+++ b/rllib/algorithms/apex_dqn/apex_dqn.py
@ -25,10 +25,6 @@ from ray.rllib.algorithms import Algorithm
 from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig
 from ray.rllib.algorithms.dqn.learner_thread import LearnerThread
 from ray.rllib.evaluation.rollout_worker import RolloutWorker
-from ray.rllib.execution.common import (
-    STEPS_TRAINED_COUNTER,
-    STEPS_TRAINED_THIS_ITER_COUNTER,
-)
 from ray.rllib.execution.parallel_requests import AsyncRequestsManager
 from ray.rllib.policy.sample_batch import MultiAgentBatch
 from ray.rllib.utils.actors import create_colocated_actors
@ -507,6 +503,7 @@ class ApexDQN(DQN):
        Args:
            _num_samples_ready: A mapping from ActorHandle (RolloutWorker) to
                the number of samples returned by the remote worker.
+
        Returns:
            The number of remote workers whose weights were updated.
        """
@ -517,6 +514,9 @@ class ApexDQN(DQN):
            self.learner_thread.weights_updated = False
            weights = self.workers.local_worker().get_weights()
            self.curr_learner_weights = ray.put(weights)
+
+        num_workers_updated = 0
+
        with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
            for (
                remote_sampler_worker,
@ -529,11 +529,21 @@ class ApexDQN(DQN):
                ):
                    remote_sampler_worker.set_weights.remote(
                        self.curr_learner_weights,
-                        {"timestep": self._counters[STEPS_TRAINED_COUNTER]},
+                        {
+                            "timestep": self._counters[
+                                NUM_AGENT_STEPS_TRAINED
+                                if self._by_agent_steps
+                                else NUM_ENV_STEPS_TRAINED
+                            ]
+                        },
                    )
                    self.steps_since_update[remote_sampler_worker] = 0
+                    num_workers_updated += 1
+
                self._counters["num_weight_syncs"] += 1

+        return num_workers_updated
+
    def sample_from_replay_buffer_place_on_learner_queue_non_blocking(
        self, num_samples_collected: Dict[ActorHandle, int]
    ) -> None:
@ -617,7 +627,6 @@ class ApexDQN(DQN):
            else:
                raise RuntimeError("The learner thread died in while training")

-        self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = num_samples_trained_this_itr
        self._timers["learner_dequeue"] = self.learner_thread.queue_timer
        self._timers["learner_grad"] = self.learner_thread.grad_timer
        self._timers["learner_overall"] = self.learner_thread.overall_timer
@ -637,7 +646,9 @@ class ApexDQN(DQN):
                )
            self._counters[NUM_TARGET_UPDATES] += 1
            self._counters[LAST_TARGET_UPDATE_TS] = self._counters[
-                STEPS_TRAINED_COUNTER
+                NUM_AGENT_STEPS_TRAINED
+                if self._by_agent_steps
+                else NUM_ENV_STEPS_TRAINED
            ]

    @override(Algorithm)
@ -657,10 +668,8 @@ class ApexDQN(DQN):
            self._sampling_actor_manager.add_workers(new_workers)

    @override(Algorithm)
-    def _compile_iteration_results(self, *, step_ctx, iteration_results=None):
-        result = super()._compile_iteration_results(
-            step_ctx=step_ctx, iteration_results=iteration_results
-        )
+    def _compile_iteration_results(self, *args, **kwargs):
+        result = super()._compile_iteration_results(*args, **kwargs)
        replay_stats = ray.get(
            self._replay_actors[0].stats.remote(self.config["optimizer"].get("debug"))
        )
--- a/rllib/algorithms/ddppo/ddppo.py
+++ b/rllib/algorithms/ddppo/ddppo.py
@ -24,9 +24,6 @@ import ray
 from ray.rllib.algorithms.ppo import PPOConfig, PPO
 from ray.rllib.evaluation.postprocessing import Postprocessing
 from ray.rllib.evaluation.rollout_worker import RolloutWorker
-from ray.rllib.execution.common import (
-    STEPS_TRAINED_THIS_ITER_COUNTER,
-)
 from ray.rllib.execution.parallel_requests import AsyncRequestsManager
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.deprecation import Deprecated
@ -297,10 +294,8 @@ class DDPPO(PPO):
        # - Update the worker's global_vars.
        # - Build info dict using a LearnerInfoBuilder object.
        learner_info_builder = LearnerInfoBuilder(num_devices=1)
-        steps_this_iter = 0
        for worker, results in sample_and_update_results.items():
            for result in results:
-                steps_this_iter += result["env_steps"]
                self._counters[NUM_AGENT_STEPS_SAMPLED] += result["agent_steps"]
                self._counters[NUM_AGENT_STEPS_TRAINED] += result["agent_steps"]
                self._counters[NUM_ENV_STEPS_SAMPLED] += result["env_steps"]
@ -315,8 +310,6 @@ class DDPPO(PPO):
            for worker in self.workers.remote_workers():
                worker.set_global_vars.remote(global_vars)

-        self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = steps_this_iter
-
        # Sync down the weights from 1st remote worker (only if we have received
        # some results from it).
        # As with the sync up, this is not really needed unless the user is
--- a/rllib/algorithms/dqn/learner_thread.py
+++ b/rllib/algorithms/dqn/learner_thread.py
@ -1,10 +1,10 @@
 import queue
 import threading

+from ray.util.timer import _Timer
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder
 from ray.rllib.utils.metrics.window_stat import WindowStat
-from ray.rllib.utils.timer import TimerStat

 LEARNER_QUEUE_MAX_SIZE = 16

@ -26,9 +26,9 @@ class LearnerThread(threading.Thread):
        self.local_worker = local_worker
        self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE)
        self.outqueue = queue.Queue()
-        self.queue_timer = TimerStat()
-        self.grad_timer = TimerStat()
-        self.overall_timer = TimerStat()
+        self.queue_timer = _Timer()
+        self.grad_timer = _Timer()
+        self.overall_timer = _Timer()
        self.daemon = True
        self.weights_updated = False
        self.stopped = False
--- a/rllib/algorithms/dreamer/dreamer.py
+++ b/rllib/algorithms/dreamer/dreamer.py
@ -16,6 +16,10 @@ from ray.rllib.execution.rollout_ops import (
 )
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.deprecation import Deprecated
+from ray.rllib.utils.metrics import (
+    NUM_AGENT_STEPS_SAMPLED,
+    NUM_ENV_STEPS_SAMPLED,
+)
 from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
 from ray.rllib.utils.typing import (
    PartialAlgorithmConfigDict,
@ -383,10 +387,11 @@ class Dreamer(Algorithm):
        # Number of sub-iterations for Dreamer
        dreamer_train_iters = self.config["dreamer_train_iters"]
        batch_size = self.config["batch_size"]
-        action_repeat = self.config["action_repeat"]

        # Collect SampleBatches from rollout workers.
        batch = synchronous_parallel_sample(worker_set=self.workers)
+        self._counters[NUM_AGENT_STEPS_SAMPLED] += batch.agent_steps()
+        self._counters[NUM_ENV_STEPS_SAMPLED] += batch.env_steps()

        fetches = {}

@ -398,25 +403,16 @@ class Dreamer(Algorithm):
            fetches = local_worker.learn_on_batch(batch)

        if fetches:
-            # Custom Logging
+            # Custom logging.
            policy_fetches = fetches[DEFAULT_POLICY_ID]["learner_stats"]
            if "log_gif" in policy_fetches:
                gif = policy_fetches["log_gif"]
                policy_fetches["log_gif"] = self._postprocess_gif(gif)

-        self._counters[STEPS_SAMPLED_COUNTER] = (
-            self.local_replay_buffer.timesteps * action_repeat
-        )
-
        self.local_replay_buffer.add(batch)

        return fetches

-    def _compile_iteration_results(self, *args, **kwargs):
-        results = super()._compile_iteration_results(*args, **kwargs)
-        results["timesteps_total"] = self._counters[STEPS_SAMPLED_COUNTER]
-        return results
-

 # Deprecated: Use ray.rllib.algorithms.dreamer.DreamerConfig instead!
 class _deprecated_default_config(dict):
--- a/rllib/algorithms/impala/impala.py
+++ b/rllib/algorithms/impala/impala.py
@ -37,6 +37,8 @@ from ray.rllib.utils.metrics import (
    NUM_AGENT_STEPS_TRAINED,
    NUM_ENV_STEPS_SAMPLED,
    NUM_ENV_STEPS_TRAINED,
+    NUM_SYNCH_WORKER_WEIGHTS,
+    NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS,
 )
 from ray.rllib.utils.replay_buffers.multi_agent_replay_buffer import ReplayMode
 from ray.rllib.utils.replay_buffers.replay_buffer import _ALL_POLICIES
@ -822,7 +824,6 @@ class Impala(Algorithm):
            final_learner_info = builder.finalize()

        # Update the steps trained counters.
-        self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = num_agent_steps_trained
        self._counters[NUM_ENV_STEPS_TRAINED] += num_env_steps_trained
        self._counters[NUM_AGENT_STEPS_TRAINED] += num_agent_steps_trained

@ -874,17 +875,17 @@ class Impala(Algorithm):
    def update_workers_if_necessary(self) -> None:
        # Only need to update workers if there are remote workers.
        global_vars = {"timestep": self._counters[NUM_AGENT_STEPS_TRAINED]}
-        self._counters["steps_since_broadcast"] += 1
+        self._counters[NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS] += 1
        if (
            self.workers.remote_workers()
-            and self._counters["steps_since_broadcast"]
+            and self._counters[NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS]
            >= self.config["broadcast_interval"]
            and self.workers_that_need_updates
        ):
            weights = ray.put(self.workers.local_worker().get_weights())
-            self._counters["steps_since_broadcast"] = 0
+            self._counters[NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS] = 0
            self._learner_thread.weights_updated = False
-            self._counters["num_weight_broadcasts"] += 1
+            self._counters[NUM_SYNCH_WORKER_WEIGHTS] += 1

            for worker in self.workers_that_need_updates:
                worker.set_weights.remote(weights, global_vars)
@ -910,10 +911,8 @@ class Impala(Algorithm):
            self._sampling_actor_manager.add_workers(new_workers)

    @override(Algorithm)
-    def _compile_iteration_results(self, *, step_ctx, iteration_results=None):
-        result = super()._compile_iteration_results(
-            step_ctx=step_ctx, iteration_results=iteration_results
-        )
+    def _compile_iteration_results(self, *args, **kwargs):
+        result = super()._compile_iteration_results(*args, **kwargs)
        result = self._learner_thread.add_learner_metrics(
            result, overwrite_learner_info=False
        )
--- a/rllib/algorithms/marwil/marwil.py
+++ b/rllib/algorithms/marwil/marwil.py
@ -17,7 +17,7 @@ from ray.rllib.utils.deprecation import Deprecated, DEPRECATED_VALUE
 from ray.rllib.utils.metrics import (
    NUM_AGENT_STEPS_SAMPLED,
    NUM_ENV_STEPS_SAMPLED,
-    WORKER_UPDATE_TIMER,
+    SYNCH_WORKER_WEIGHTS_TIMER,
 )
 from ray.rllib.utils.typing import (
    ResultDict,
@ -284,7 +284,7 @@ class MARWIL(Algorithm):
        # Update weights - after learning on the local worker - on all remote
        # workers.
        if self.workers.remote_workers():
-            with self._timers[WORKER_UPDATE_TIMER]:
+            with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                self.workers.sync_weights(global_vars=global_vars)

        # Update global vars on local worker as well.
--- a/rllib/algorithms/ppo/ppo.py
+++ b/rllib/algorithms/ppo/ppo.py
@ -38,7 +38,7 @@ from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
 from ray.rllib.utils.metrics import (
    NUM_AGENT_STEPS_SAMPLED,
    NUM_ENV_STEPS_SAMPLED,
-    WORKER_UPDATE_TIMER,
+    SYNCH_WORKER_WEIGHTS_TIMER,
 )

 logger = logging.getLogger(__name__)
@ -426,7 +426,7 @@ class PPO(Algorithm):
        # Update weights - after learning on the local worker - on all remote
        # workers.
        if self.workers.remote_workers():
-            with self._timers[WORKER_UPDATE_TIMER]:
+            with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                self.workers.sync_weights(global_vars=global_vars)

        # For each policy: update KL scale and warn about possible issues
--- a/rllib/evaluation/metrics.py
+++ b/rllib/evaluation/metrics.py
@ -139,9 +139,10 @@ def summarize_episodes(
    """Summarizes a set of episode metrics tuples.

    Args:
-        episodes: smoothed set of episodes including historical ones
-        new_episodes: just the new episodes in this iteration. This must be
-            a subset of `episodes`. If None, assumes all episodes are new.
+        episodes: List of most recent n episodes. This may include historical ones
+            (not newly collected in this iteration) in order to achieve the size of
+            the smoothing window.
+        new_episodes: All the episodes that were completed in this iteration.
    """

    if new_episodes is None:
--- a/rllib/execution/buffers/mixin_replay_buffer.py
+++ b/rllib/execution/buffers/mixin_replay_buffer.py
@ -3,11 +3,11 @@ import platform
 import random
 from typing import Optional

+from ray.util.timer import _Timer
 from ray.rllib.execution.replay_ops import SimpleReplayBuffer
 from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
 from ray.rllib.utils.replay_buffers.multi_agent_replay_buffer import ReplayMode
 from ray.rllib.utils.replay_buffers.replay_buffer import _ALL_POLICIES
-from ray.rllib.utils.timer import TimerStat
 from ray.rllib.utils.typing import PolicyID, SampleBatchType


@ -87,9 +87,9 @@ class MixInMultiAgentReplayBuffer:
        self.replay_buffers = collections.defaultdict(new_buffer)

        # Metrics.
-        self.add_batch_timer = TimerStat()
-        self.replay_timer = TimerStat()
-        self.update_priorities_timer = TimerStat()
+        self.add_batch_timer = _Timer()
+        self.replay_timer = _Timer()
+        self.update_priorities_timer = _Timer()

        # Added timesteps over lifetime.
        self.num_added = 0
--- a/rllib/execution/common.py
+++ b/rllib/execution/common.py
@ -9,7 +9,7 @@ from ray.rllib.utils.metrics import (  # noqa: F401
    NUM_TARGET_UPDATES,
    APPLY_GRADS_TIMER,
    COMPUTE_GRADS_TIMER,
-    WORKER_UPDATE_TIMER,
+    SYNCH_WORKER_WEIGHTS_TIMER as WORKER_UPDATE_TIMER,
    GRAD_WAIT_TIMER,
    SAMPLE_TIMER,
    LEARN_ON_BATCH_TIMER,
--- a/rllib/execution/learner_thread.py
+++ b/rllib/execution/learner_thread.py
@ -3,11 +3,11 @@ from six.moves import queue
 import threading
 from typing import Dict, Optional

+from ray.util.timer import _Timer
 from ray.rllib.evaluation.rollout_worker import RolloutWorker
 from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder, LEARNER_INFO
-from ray.rllib.utils.timer import TimerStat
 from ray.rllib.utils.metrics.window_stat import WindowStat
 from ray.util.iter import _NextValueNotReady

@ -56,10 +56,10 @@ class LearnerThread(threading.Thread):
            num_passes=num_sgd_iter,
            init_num_passes=num_sgd_iter,
        )
-        self.queue_timer = TimerStat()
-        self.grad_timer = TimerStat()
-        self.load_timer = TimerStat()
-        self.load_wait_timer = TimerStat()
+        self.queue_timer = _Timer()
+        self.grad_timer = _Timer()
+        self.load_timer = _Timer()
+        self.load_wait_timer = _Timer()
        self.daemon = True
        self.weights_updated = False
        self.learner_info = {}
--- a/rllib/execution/multi_gpu_learner_thread.py
+++ b/rllib/execution/multi_gpu_learner_thread.py
@ -2,6 +2,7 @@ import logging
 from six.moves import queue
 import threading

+from ray.util.timer import _Timer
 from ray.rllib.execution.learner_thread import LearnerThread
 from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
 from ray.rllib.policy.sample_batch import SampleBatch
@ -9,7 +10,6 @@ from ray.rllib.utils.annotations import override
 from ray.rllib.utils.deprecation import deprecation_warning
 from ray.rllib.utils.framework import try_import_tf
 from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder
-from ray.rllib.utils.timer import TimerStat
 from ray.rllib.evaluation.rollout_worker import RolloutWorker

 tf1, tf, tfv = try_import_tf()
@ -192,8 +192,8 @@ class _MultiGPULoaderThread(threading.Thread):
            self.queue_timer = multi_gpu_learner_thread.queue_timer
            self.load_timer = multi_gpu_learner_thread.load_timer
        else:
-            self.queue_timer = TimerStat()
-            self.load_timer = TimerStat()
+            self.queue_timer = _Timer()
+            self.load_timer = _Timer()

    def run(self) -> None:
        while True:
--- a/rllib/execution/train_ops.py
+++ b/rllib/execution/train_ops.py
@ -16,7 +16,6 @@ from ray.rllib.execution.common import (
    STEPS_SAMPLED_COUNTER,
    STEPS_TRAINED_COUNTER,
    STEPS_TRAINED_THIS_ITER_COUNTER,
-    WORKER_UPDATE_TIMER,
    _check_sample_batch_type,
    _get_global_vars,
    _get_shared_metrics,
@ -25,7 +24,11 @@ from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, MultiAgentBatch
 from ray.rllib.utils.annotations import DeveloperAPI
 from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning
 from ray.rllib.utils.framework import try_import_tf
-from ray.rllib.utils.metrics import NUM_ENV_STEPS_TRAINED, NUM_AGENT_STEPS_TRAINED
+from ray.rllib.utils.metrics import (
+    NUM_ENV_STEPS_TRAINED,
+    NUM_AGENT_STEPS_TRAINED,
+    SYNCH_WORKER_WEIGHTS_TIMER,
+)
 from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder, LEARNER_INFO
 from ray.rllib.utils.sgd import do_minibatch_sgd
 from ray.rllib.utils.typing import PolicyID, SampleBatchType, ModelGradients
@ -231,7 +234,7 @@ class TrainOneStep:
        # Update weights - after learning on the local worker - on all remote
        # workers.
        if self.workers.remote_workers():
-            with metrics.timers[WORKER_UPDATE_TIMER]:
+            with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                weights = ray.put(
                    lw.get_weights(self.policies or lw.get_policies_to_train(batch))
                )
@ -354,7 +357,7 @@ class MultiGPUTrainOneStep:
        metrics.info[LEARNER_INFO] = learner_info

        if self.workers.remote_workers():
-            with metrics.timers[WORKER_UPDATE_TIMER]:
+            with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                weights = ray.put(
                    self.workers.local_worker().get_weights(
                        self.local_worker.get_policies_to_train()
@ -453,7 +456,7 @@ class ApplyGradients:

        if self.update_all:
            if self.workers.remote_workers():
-                with metrics.timers[WORKER_UPDATE_TIMER]:
+                with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                    weights = ray.put(
                        self.local_worker.get_weights(
                            self.policies or self.local_worker.get_policies_to_train()
@ -468,7 +471,7 @@ class ApplyGradients:
                    "update_all=False, `current_actor` must be set "
                    "in the iterator context."
                )
-            with metrics.timers[WORKER_UPDATE_TIMER]:
+            with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
                weights = self.local_worker.get_weights(
                    self.policies or self.local_worker.get_policies_to_train()
                )
--- a/rllib/utils/metrics/init.py
+++ b/rllib/utils/metrics/init.py
@ -8,11 +8,18 @@ NUM_AGENT_STEPS_TRAINED = "num_agent_steps_trained"
 NUM_ENV_STEPS_TRAINED_THIS_ITER = "num_env_steps_trained_this_iter"
 NUM_AGENT_STEPS_TRAINED_THIS_ITER = "num_agent_steps_trained_this_iter"

+# Counters for keeping track of worker weight updates (synchronization
+# between local worker and remote workers).
+NUM_SYNCH_WORKER_WEIGHTS = "num_weight_broadcasts"
+NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS = (
+    "num_training_step_calls_since_last_synch_worker_weights"
+)
+
 # Counters to track target network updates.
 LAST_TARGET_UPDATE_TS = "last_target_update_ts"
 NUM_TARGET_UPDATES = "num_target_updates"

-# Performance timers (keys for Algorithm._timers or metrics.timers).
+# Performance timers (keys for Algorithm._timers).
 TRAINING_ITERATION_TIMER = "training_iteration"
 APPLY_GRADS_TIMER = "apply_grad"
 COMPUTE_GRADS_TIMER = "compute_grads"
@ -22,6 +29,3 @@ SAMPLE_TIMER = "sample"
 LEARN_ON_BATCH_TIMER = "learn"
 LOAD_BATCH_TIMER = "load"
 TARGET_NET_UPDATE_TIMER = "target_net_update"
-
-# Deprecated: Use `SYNCH_WORKER_WEIGHTS_TIMER` instead.
-WORKER_UPDATE_TIMER = "update"
--- a/rllib/utils/replay_buffers/multi_agent_prioritized_replay_buffer.py
+++ b/rllib/utils/replay_buffers/multi_agent_prioritized_replay_buffer.py
@ -2,6 +2,7 @@ from typing import Dict
 import logging
 import numpy as np

+from ray.util.timer import _Timer
 from ray.rllib.utils.annotations import override
 from ray.rllib.utils.replay_buffers.multi_agent_replay_buffer import (
    MultiAgentReplayBuffer,
@ -16,7 +17,6 @@ from ray.rllib.utils.replay_buffers.replay_buffer import (
 )
 from ray.rllib.utils.typing import PolicyID, SampleBatchType
 from ray.rllib.policy.sample_batch import SampleBatch
-from ray.rllib.utils.timer import TimerStat
 from ray.util.debug import log_once
 from ray.util.annotations import DeveloperAPI
 from ray.rllib.policy.rnn_sequencing import timeslice_along_seq_lens_with_overlap
@ -137,7 +137,7 @@ class MultiAgentPrioritizedReplayBuffer(
        )

        self.prioritized_replay_eps = prioritized_replay_eps
-        self.update_priorities_timer = TimerStat()
+        self.update_priorities_timer = _Timer()

    @DeveloperAPI
    @override(MultiAgentReplayBuffer)
--- a/rllib/utils/replay_buffers/multi_agent_replay_buffer.py
+++ b/rllib/utils/replay_buffers/multi_agent_replay_buffer.py
@ -3,6 +3,7 @@ import logging
 from enum import Enum
 from typing import Any, Dict, Optional

+from ray.util.timer import _Timer
 from ray.rllib.policy.rnn_sequencing import timeslice_along_seq_lens_with_overlap
 from ray.rllib.policy.sample_batch import MultiAgentBatch, SampleBatch
 from ray.rllib.utils.annotations import override
@ -13,7 +14,6 @@ from ray.rllib.utils.replay_buffers.replay_buffer import (
    ReplayBuffer,
    StorageUnit,
 )
-from ray.rllib.utils.timer import TimerStat
 from ray.rllib.utils.typing import PolicyID, SampleBatchType
 from ray.util.annotations import DeveloperAPI
 from ray.util.debug import log_once
@ -184,8 +184,8 @@ class MultiAgentReplayBuffer(ReplayBuffer):
        self.replay_buffers = collections.defaultdict(new_buffer)

        # Metrics.
-        self.add_batch_timer = TimerStat()
-        self.replay_timer = TimerStat()
+        self.add_batch_timer = _Timer()
+        self.replay_timer = _Timer()
        self._num_added = 0

    def __len__(self) -> int:
--- a/rllib/utils/timer.py
+++ b/rllib/utils/timer.py
@ -1,3 +1,10 @@
 from ray.util.timer import _Timer
+from ray.rllib.utils.deprecation import deprecation_warning
+
+deprecation_warning(
+    old="ray.rllib.utils.timer::TimerStat",
+    new="ray.util.timer::_Timer",
+    error=False,
+)

 TimerStat = _Timer  # backwards compatibility alias