[RLlib] Rename metrics_smoothing_episodes into metrics_num_episodes_for_smoothing for clarity. (#20983)

This commit is contained in:
Sven Mika 2021-12-11 20:33:35 +01:00 committed by GitHub
parent 596c8e2772
commit db058d0fb3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 19 additions and 7 deletions

View file

@ -213,7 +213,7 @@ class MAMLTrainer(Trainer):
# Metric Collector
metric_collect = CollectMetrics(
workers,
min_history=config["metrics_smoothing_episodes"],
min_history=config["metrics_num_episodes_for_smoothing"],
timeout_seconds=config["collect_metrics_timeout"])
# Iterator for Inner Adaptation Data gathering (from pre->post

View file

@ -364,7 +364,7 @@ COMMON_CONFIG: TrainerConfigDict = {
# have not returned in time will be collected in the next train iteration.
"collect_metrics_timeout": 180,
# Smooth metrics over this many episodes.
"metrics_smoothing_episodes": 100,
"metrics_num_episodes_for_smoothing": 100,
# Minimum time per train iteration (frequency of metrics reporting).
"min_iter_time_s": 0,
# Minimum env steps to optimize for per train call. This value does
@ -539,6 +539,8 @@ COMMON_CONFIG: TrainerConfigDict = {
# Replaced by `evaluation_duration=10` and
# `evaluation_duration_unit=episodes`.
"evaluation_num_episodes": DEPRECATED_VALUE,
# Use `metrics_num_episodes_for_smoothing` instead.
"metrics_smoothing_episodes": DEPRECATED_VALUE,
}
# __sphinx_doc_end__
# yapf: enable
@ -1701,7 +1703,7 @@ class Trainer(Trainable):
"""
return self.optimizer.collect_metrics(
self.config["collect_metrics_timeout"],
min_history=self.config["metrics_smoothing_episodes"],
min_history=self.config["metrics_num_episodes_for_smoothing"],
selected_workers=selected_workers)
@override(Trainable)
@ -2096,6 +2098,16 @@ class Trainer(Trainable):
"`count_steps_by` must be one of [env_steps|agent_steps]! "
"Got {}".format(config["multiagent"]["count_steps_by"]))
# Metrics settings.
if config["metrics_smoothing_episodes"] != DEPRECATED_VALUE:
deprecation_warning(
old="metrics_smoothing_episodes",
new="metrics_num_episodes_for_smoothing",
error=False,
)
config["metrics_num_episodes_for_smoothing"] = \
config["metrics_smoothing_episodes"]
# Evaluation settings.
# Deprecated setting: `evaluation_num_episodes`.

View file

@ -98,7 +98,7 @@ if __name__ == "__main__":
"train_batch_size": 2000,
"learning_starts": 0,
"bc_iters": 100,
"metrics_smoothing_episodes": 5,
"metrics_num_episodes_for_smoothing": 5,
"evaluation_interval": 1,
"evaluation_num_workers": 2,
"evaluation_duration": 10,

View file

@ -101,7 +101,7 @@ def run_heuristic_vs_learned(args, use_lstm=False, trainer="PG"):
"num_envs_per_worker": 4,
"rollout_fragment_length": 10,
"train_batch_size": 200,
"metrics_smoothing_episodes": 200,
"metrics_num_episodes_for_smoothing": 200,
"multiagent": {
"policies_to_train": ["learned"],
"policies": {

View file

@ -46,7 +46,7 @@ def StandardMetricsReporting(
.filter(OncePerTimeInterval(config["min_iter_time_s"])) \
.for_each(CollectMetrics(
workers,
min_history=config["metrics_smoothing_episodes"],
min_history=config["metrics_num_episodes_for_smoothing"],
timeout_seconds=config["collect_metrics_timeout"],
selected_workers=selected_workers,
by_steps_trained=by_steps_trained))

View file

@ -106,7 +106,7 @@ def test_metrics(ray_start_regular_shared):
a, workers, {
"min_iter_time_s": 2.5,
"timesteps_per_iteration": 0,
"metrics_smoothing_episodes": 10,
"metrics_num_episodes_for_smoothing": 10,
"collect_metrics_timeout": 10,
})