diff --git a/rllib/agents/maml/maml.py b/rllib/agents/maml/maml.py index 4d7885cf1..5f41e345d 100644 --- a/rllib/agents/maml/maml.py +++ b/rllib/agents/maml/maml.py @@ -213,7 +213,7 @@ class MAMLTrainer(Trainer): # Metric Collector metric_collect = CollectMetrics( workers, - min_history=config["metrics_smoothing_episodes"], + min_history=config["metrics_num_episodes_for_smoothing"], timeout_seconds=config["collect_metrics_timeout"]) # Iterator for Inner Adaptation Data gathering (from pre->post diff --git a/rllib/agents/trainer.py b/rllib/agents/trainer.py index af367ba0a..b4b8736c6 100644 --- a/rllib/agents/trainer.py +++ b/rllib/agents/trainer.py @@ -364,7 +364,7 @@ COMMON_CONFIG: TrainerConfigDict = { # have not returned in time will be collected in the next train iteration. "collect_metrics_timeout": 180, # Smooth metrics over this many episodes. - "metrics_smoothing_episodes": 100, + "metrics_num_episodes_for_smoothing": 100, # Minimum time per train iteration (frequency of metrics reporting). "min_iter_time_s": 0, # Minimum env steps to optimize for per train call. This value does @@ -539,6 +539,8 @@ COMMON_CONFIG: TrainerConfigDict = { # Replaced by `evaluation_duration=10` and # `evaluation_duration_unit=episodes`. "evaluation_num_episodes": DEPRECATED_VALUE, + # Use `metrics_num_episodes_for_smoothing` instead. + "metrics_smoothing_episodes": DEPRECATED_VALUE, } # __sphinx_doc_end__ # yapf: enable @@ -1701,7 +1703,7 @@ class Trainer(Trainable): """ return self.optimizer.collect_metrics( self.config["collect_metrics_timeout"], - min_history=self.config["metrics_smoothing_episodes"], + min_history=self.config["metrics_num_episodes_for_smoothing"], selected_workers=selected_workers) @override(Trainable) @@ -2096,6 +2098,16 @@ class Trainer(Trainable): "`count_steps_by` must be one of [env_steps|agent_steps]! " "Got {}".format(config["multiagent"]["count_steps_by"])) + # Metrics settings. + if config["metrics_smoothing_episodes"] != DEPRECATED_VALUE: + deprecation_warning( + old="metrics_smoothing_episodes", + new="metrics_num_episodes_for_smoothing", + error=False, + ) + config["metrics_num_episodes_for_smoothing"] = \ + config["metrics_smoothing_episodes"] + # Evaluation settings. # Deprecated setting: `evaluation_num_episodes`. diff --git a/rllib/examples/custom_input_api.py b/rllib/examples/custom_input_api.py index e7f71ee15..72d65ee88 100644 --- a/rllib/examples/custom_input_api.py +++ b/rllib/examples/custom_input_api.py @@ -98,7 +98,7 @@ if __name__ == "__main__": "train_batch_size": 2000, "learning_starts": 0, "bc_iters": 100, - "metrics_smoothing_episodes": 5, + "metrics_num_episodes_for_smoothing": 5, "evaluation_interval": 1, "evaluation_num_workers": 2, "evaluation_duration": 10, diff --git a/rllib/examples/rock_paper_scissors_multiagent.py b/rllib/examples/rock_paper_scissors_multiagent.py index 2d115a59a..68e1b6689 100644 --- a/rllib/examples/rock_paper_scissors_multiagent.py +++ b/rllib/examples/rock_paper_scissors_multiagent.py @@ -101,7 +101,7 @@ def run_heuristic_vs_learned(args, use_lstm=False, trainer="PG"): "num_envs_per_worker": 4, "rollout_fragment_length": 10, "train_batch_size": 200, - "metrics_smoothing_episodes": 200, + "metrics_num_episodes_for_smoothing": 200, "multiagent": { "policies_to_train": ["learned"], "policies": { diff --git a/rllib/execution/metric_ops.py b/rllib/execution/metric_ops.py index 082fd6aa3..730f6e8e9 100644 --- a/rllib/execution/metric_ops.py +++ b/rllib/execution/metric_ops.py @@ -46,7 +46,7 @@ def StandardMetricsReporting( .filter(OncePerTimeInterval(config["min_iter_time_s"])) \ .for_each(CollectMetrics( workers, - min_history=config["metrics_smoothing_episodes"], + min_history=config["metrics_num_episodes_for_smoothing"], timeout_seconds=config["collect_metrics_timeout"], selected_workers=selected_workers, by_steps_trained=by_steps_trained)) diff --git a/rllib/tests/test_execution.py b/rllib/tests/test_execution.py index a59588272..be336b655 100644 --- a/rllib/tests/test_execution.py +++ b/rllib/tests/test_execution.py @@ -106,7 +106,7 @@ def test_metrics(ray_start_regular_shared): a, workers, { "min_iter_time_s": 2.5, "timesteps_per_iteration": 0, - "metrics_smoothing_episodes": 10, + "metrics_num_episodes_for_smoothing": 10, "collect_metrics_timeout": 10, })