mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[RLlib] Rename metrics_smoothing_episodes
into metrics_num_episodes_for_smoothing
for clarity. (#20983)
This commit is contained in:
parent
596c8e2772
commit
db058d0fb3
6 changed files with 19 additions and 7 deletions
|
@ -213,7 +213,7 @@ class MAMLTrainer(Trainer):
|
|||
# Metric Collector
|
||||
metric_collect = CollectMetrics(
|
||||
workers,
|
||||
min_history=config["metrics_smoothing_episodes"],
|
||||
min_history=config["metrics_num_episodes_for_smoothing"],
|
||||
timeout_seconds=config["collect_metrics_timeout"])
|
||||
|
||||
# Iterator for Inner Adaptation Data gathering (from pre->post
|
||||
|
|
|
@ -364,7 +364,7 @@ COMMON_CONFIG: TrainerConfigDict = {
|
|||
# have not returned in time will be collected in the next train iteration.
|
||||
"collect_metrics_timeout": 180,
|
||||
# Smooth metrics over this many episodes.
|
||||
"metrics_smoothing_episodes": 100,
|
||||
"metrics_num_episodes_for_smoothing": 100,
|
||||
# Minimum time per train iteration (frequency of metrics reporting).
|
||||
"min_iter_time_s": 0,
|
||||
# Minimum env steps to optimize for per train call. This value does
|
||||
|
@ -539,6 +539,8 @@ COMMON_CONFIG: TrainerConfigDict = {
|
|||
# Replaced by `evaluation_duration=10` and
|
||||
# `evaluation_duration_unit=episodes`.
|
||||
"evaluation_num_episodes": DEPRECATED_VALUE,
|
||||
# Use `metrics_num_episodes_for_smoothing` instead.
|
||||
"metrics_smoothing_episodes": DEPRECATED_VALUE,
|
||||
}
|
||||
# __sphinx_doc_end__
|
||||
# yapf: enable
|
||||
|
@ -1701,7 +1703,7 @@ class Trainer(Trainable):
|
|||
"""
|
||||
return self.optimizer.collect_metrics(
|
||||
self.config["collect_metrics_timeout"],
|
||||
min_history=self.config["metrics_smoothing_episodes"],
|
||||
min_history=self.config["metrics_num_episodes_for_smoothing"],
|
||||
selected_workers=selected_workers)
|
||||
|
||||
@override(Trainable)
|
||||
|
@ -2096,6 +2098,16 @@ class Trainer(Trainable):
|
|||
"`count_steps_by` must be one of [env_steps|agent_steps]! "
|
||||
"Got {}".format(config["multiagent"]["count_steps_by"]))
|
||||
|
||||
# Metrics settings.
|
||||
if config["metrics_smoothing_episodes"] != DEPRECATED_VALUE:
|
||||
deprecation_warning(
|
||||
old="metrics_smoothing_episodes",
|
||||
new="metrics_num_episodes_for_smoothing",
|
||||
error=False,
|
||||
)
|
||||
config["metrics_num_episodes_for_smoothing"] = \
|
||||
config["metrics_smoothing_episodes"]
|
||||
|
||||
# Evaluation settings.
|
||||
|
||||
# Deprecated setting: `evaluation_num_episodes`.
|
||||
|
|
|
@ -98,7 +98,7 @@ if __name__ == "__main__":
|
|||
"train_batch_size": 2000,
|
||||
"learning_starts": 0,
|
||||
"bc_iters": 100,
|
||||
"metrics_smoothing_episodes": 5,
|
||||
"metrics_num_episodes_for_smoothing": 5,
|
||||
"evaluation_interval": 1,
|
||||
"evaluation_num_workers": 2,
|
||||
"evaluation_duration": 10,
|
||||
|
|
|
@ -101,7 +101,7 @@ def run_heuristic_vs_learned(args, use_lstm=False, trainer="PG"):
|
|||
"num_envs_per_worker": 4,
|
||||
"rollout_fragment_length": 10,
|
||||
"train_batch_size": 200,
|
||||
"metrics_smoothing_episodes": 200,
|
||||
"metrics_num_episodes_for_smoothing": 200,
|
||||
"multiagent": {
|
||||
"policies_to_train": ["learned"],
|
||||
"policies": {
|
||||
|
|
|
@ -46,7 +46,7 @@ def StandardMetricsReporting(
|
|||
.filter(OncePerTimeInterval(config["min_iter_time_s"])) \
|
||||
.for_each(CollectMetrics(
|
||||
workers,
|
||||
min_history=config["metrics_smoothing_episodes"],
|
||||
min_history=config["metrics_num_episodes_for_smoothing"],
|
||||
timeout_seconds=config["collect_metrics_timeout"],
|
||||
selected_workers=selected_workers,
|
||||
by_steps_trained=by_steps_trained))
|
||||
|
|
|
@ -106,7 +106,7 @@ def test_metrics(ray_start_regular_shared):
|
|||
a, workers, {
|
||||
"min_iter_time_s": 2.5,
|
||||
"timesteps_per_iteration": 0,
|
||||
"metrics_smoothing_episodes": 10,
|
||||
"metrics_num_episodes_for_smoothing": 10,
|
||||
"collect_metrics_timeout": 10,
|
||||
})
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue