mirror of
https://github.com/vale981/ray
synced 2025-03-04 17:41:43 -05:00
[RLlib] Cleanup some deprecated metric keys and classes. (#26036)
This commit is contained in:
parent
33b30aed15
commit
59a967a3a0
20 changed files with 146 additions and 105 deletions
|
@ -4,10 +4,6 @@ from typing import Optional
|
|||
|
||||
from ray.rllib.algorithms.algorithm import Algorithm
|
||||
from ray.rllib.algorithms.a3c.a3c import A3CConfig, A3C
|
||||
from ray.rllib.execution.common import (
|
||||
STEPS_TRAINED_COUNTER,
|
||||
STEPS_TRAINED_THIS_ITER_COUNTER,
|
||||
)
|
||||
from ray.rllib.execution.rollout_ops import (
|
||||
synchronous_parallel_sample,
|
||||
)
|
||||
|
@ -18,8 +14,10 @@ from ray.rllib.utils.metrics import (
|
|||
APPLY_GRADS_TIMER,
|
||||
COMPUTE_GRADS_TIMER,
|
||||
NUM_AGENT_STEPS_SAMPLED,
|
||||
NUM_AGENT_STEPS_TRAINED,
|
||||
NUM_ENV_STEPS_SAMPLED,
|
||||
WORKER_UPDATE_TIMER,
|
||||
NUM_ENV_STEPS_TRAINED,
|
||||
SYNCH_WORKER_WEIGHTS_TIMER,
|
||||
)
|
||||
from ray.rllib.utils.typing import (
|
||||
PartialAlgorithmConfigDict,
|
||||
|
@ -188,8 +186,8 @@ class A2C(A3C):
|
|||
)
|
||||
if self._num_microbatches >= num_microbatches:
|
||||
# Update counters.
|
||||
self._counters[STEPS_TRAINED_COUNTER] += self._microbatches_counts
|
||||
self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = self._microbatches_counts
|
||||
self._counters[NUM_ENV_STEPS_TRAINED] += self._microbatches_counts
|
||||
self._counters[NUM_AGENT_STEPS_TRAINED] += self._microbatches_counts
|
||||
|
||||
# Apply gradients.
|
||||
apply_timer = self._timers[APPLY_GRADS_TIMER]
|
||||
|
@ -206,7 +204,7 @@ class A2C(A3C):
|
|||
global_vars = {
|
||||
"timestep": self._counters[NUM_AGENT_STEPS_SAMPLED],
|
||||
}
|
||||
with self._timers[WORKER_UPDATE_TIMER]:
|
||||
with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
self.workers.sync_weights(
|
||||
policies=self.workers.local_worker().get_policies_to_train(),
|
||||
global_vars=global_vars,
|
||||
|
|
|
@ -42,7 +42,9 @@ from ray.rllib.evaluation.metrics import (
|
|||
)
|
||||
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
||||
from ray.rllib.evaluation.worker_set import WorkerSet
|
||||
from ray.rllib.execution.common import WORKER_UPDATE_TIMER
|
||||
from ray.rllib.execution.common import (
|
||||
STEPS_TRAINED_THIS_ITER_COUNTER, # TODO: Backward compatibility.
|
||||
)
|
||||
from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
|
||||
from ray.rllib.execution.train_ops import multi_gpu_train_one_step, train_one_step
|
||||
from ray.rllib.offline import get_offline_io_resource_bundles
|
||||
|
@ -73,6 +75,7 @@ from ray.rllib.utils.metrics import (
|
|||
NUM_ENV_STEPS_SAMPLED,
|
||||
NUM_ENV_STEPS_SAMPLED_THIS_ITER,
|
||||
NUM_ENV_STEPS_TRAINED,
|
||||
SYNCH_WORKER_WEIGHTS_TIMER,
|
||||
TRAINING_ITERATION_TIMER,
|
||||
)
|
||||
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
|
||||
|
@ -569,7 +572,6 @@ class Algorithm(Trainable):
|
|||
# Results dict for training (and if appolicable: evaluation).
|
||||
results: ResultDict = {}
|
||||
|
||||
self._rollout_worker_metrics = []
|
||||
local_worker = (
|
||||
self.workers.local_worker()
|
||||
if hasattr(self.workers, "local_worker")
|
||||
|
@ -593,13 +595,12 @@ class Algorithm(Trainable):
|
|||
results.update(self._run_one_evaluation(train_future=None))
|
||||
|
||||
# Collect rollout worker metrics.
|
||||
episodes, self._episodes_to_be_collected = collect_episodes(
|
||||
episodes_this_iter, self._episodes_to_be_collected = collect_episodes(
|
||||
local_worker,
|
||||
self._remote_workers_for_metrics,
|
||||
self._episodes_to_be_collected,
|
||||
timeout_seconds=self.config["metrics_episode_collection_timeout_s"],
|
||||
)
|
||||
self._rollout_worker_metrics.extend(episodes)
|
||||
|
||||
# Attach latest available evaluation results to train results,
|
||||
# if necessary.
|
||||
|
@ -613,9 +614,10 @@ class Algorithm(Trainable):
|
|||
# Sync filters on workers.
|
||||
self._sync_filters_if_needed(self.workers)
|
||||
|
||||
# Collect worker metrics.
|
||||
# Collect worker metrics and add combine them with `results`.
|
||||
if self.config["_disable_execution_plan_api"]:
|
||||
results = self._compile_iteration_results(
|
||||
episodes_this_iter=episodes_this_iter,
|
||||
step_ctx=train_iter_ctx,
|
||||
iteration_results=results,
|
||||
)
|
||||
|
@ -780,19 +782,20 @@ class Algorithm(Trainable):
|
|||
< units_left_to_do
|
||||
]
|
||||
)
|
||||
agent_steps_this_iter = sum(b.agent_steps() for b in batches)
|
||||
env_steps_this_iter = sum(b.env_steps() for b in batches)
|
||||
_agent_steps = sum(b.agent_steps() for b in batches)
|
||||
_env_steps = sum(b.env_steps() for b in batches)
|
||||
# 1 episode per returned batch.
|
||||
if unit == "episodes":
|
||||
num_units_done += len(batches)
|
||||
# n timesteps per returned batch.
|
||||
else:
|
||||
num_units_done += (
|
||||
agent_steps_this_iter
|
||||
if self._by_agent_steps
|
||||
else env_steps_this_iter
|
||||
_agent_steps if self._by_agent_steps else _env_steps
|
||||
)
|
||||
|
||||
agent_steps_this_iter += _agent_steps
|
||||
env_steps_this_iter += _env_steps
|
||||
|
||||
logger.info(
|
||||
f"Ran round {round_} of parallel evaluation "
|
||||
f"({num_units_done}/{duration if not auto else '?'} "
|
||||
|
@ -862,7 +865,7 @@ class Algorithm(Trainable):
|
|||
global_vars = {
|
||||
"timestep": self._counters[NUM_ENV_STEPS_SAMPLED],
|
||||
}
|
||||
with self._timers[WORKER_UPDATE_TIMER]:
|
||||
with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
self.workers.sync_weights(global_vars=global_vars)
|
||||
|
||||
return train_results
|
||||
|
@ -2366,7 +2369,9 @@ class Algorithm(Trainable):
|
|||
* eval_cfg["num_envs_per_worker"]
|
||||
)
|
||||
|
||||
def _compile_iteration_results(self, *, step_ctx, iteration_results=None):
|
||||
def _compile_iteration_results(
|
||||
self, *, episodes_this_iter, step_ctx, iteration_results=None
|
||||
):
|
||||
# Return dict.
|
||||
results: ResultDict = {}
|
||||
iteration_results = iteration_results or {}
|
||||
|
@ -2382,18 +2387,33 @@ class Algorithm(Trainable):
|
|||
# Learner info.
|
||||
results["info"] = {LEARNER_INFO: iteration_results}
|
||||
|
||||
episodes = self._rollout_worker_metrics
|
||||
orig_episodes = list(episodes)
|
||||
missing = self.config["metrics_num_episodes_for_smoothing"] - len(episodes)
|
||||
# Calculate how many (if any) of older, historical episodes we have to add to
|
||||
# `episodes_this_iter` in order to reach the required smoothing window.
|
||||
episodes_for_metrics = episodes_this_iter[:]
|
||||
missing = self.config["metrics_num_episodes_for_smoothing"] - len(
|
||||
episodes_this_iter
|
||||
)
|
||||
# We have to add some older episodes to reach the smoothing window size.
|
||||
if missing > 0:
|
||||
episodes = self._episode_history[-missing:] + episodes
|
||||
assert len(episodes) <= self.config["metrics_num_episodes_for_smoothing"]
|
||||
self._episode_history.extend(orig_episodes)
|
||||
episodes_for_metrics = self._episode_history[-missing:] + episodes_this_iter
|
||||
assert (
|
||||
len(episodes_for_metrics)
|
||||
<= self.config["metrics_num_episodes_for_smoothing"]
|
||||
)
|
||||
# Note that when there are more than `metrics_num_episodes_for_smoothing`
|
||||
# episodes in `episodes_for_metrics`, leave them as-is. In this case, we'll
|
||||
# compute the stats over that larger number.
|
||||
|
||||
# Add new episodes to our history and make sure it doesn't grow larger than
|
||||
# needed.
|
||||
self._episode_history.extend(episodes_this_iter)
|
||||
self._episode_history = self._episode_history[
|
||||
-self.config["metrics_num_episodes_for_smoothing"] :
|
||||
]
|
||||
results["sampler_results"] = summarize_episodes(
|
||||
episodes, orig_episodes, self.config["keep_per_episode_custom_metrics"]
|
||||
episodes_for_metrics,
|
||||
episodes_this_iter,
|
||||
self.config["keep_per_episode_custom_metrics"],
|
||||
)
|
||||
# TODO: Don't dump sampler results into top-level.
|
||||
results.update(results["sampler_results"])
|
||||
|
@ -2413,11 +2433,15 @@ class Algorithm(Trainable):
|
|||
results[NUM_AGENT_STEPS_TRAINED + "_this_iter"] = step_ctx.trained
|
||||
# TODO: For CQL and other algos, count by trained steps.
|
||||
results["timesteps_total"] = self._counters[NUM_AGENT_STEPS_SAMPLED]
|
||||
# TODO: Backward compatibility.
|
||||
results[STEPS_TRAINED_THIS_ITER_COUNTER] = step_ctx.trained
|
||||
else:
|
||||
results[NUM_ENV_STEPS_SAMPLED + "_this_iter"] = step_ctx.sampled
|
||||
results[NUM_ENV_STEPS_TRAINED + "_this_iter"] = step_ctx.trained
|
||||
# TODO: For CQL and other algos, count by trained steps.
|
||||
results["timesteps_total"] = self._counters[NUM_ENV_STEPS_SAMPLED]
|
||||
# TODO: Backward compatibility.
|
||||
results[STEPS_TRAINED_THIS_ITER_COUNTER] = step_ctx.trained
|
||||
# TODO: Backward compatibility.
|
||||
results["agent_timesteps_total"] = self._counters[NUM_AGENT_STEPS_SAMPLED]
|
||||
|
||||
|
|
|
@ -1111,7 +1111,14 @@ class AlgorithmConfig:
|
|||
metrics_episode_collection_timeout_s: Wait for metric batches for at most
|
||||
this many seconds. Those that have not returned in time will be
|
||||
collected in the next train iteration.
|
||||
metrics_num_episodes_for_smoothing: Smooth metrics over this many episodes.
|
||||
metrics_num_episodes_for_smoothing: Smooth rollout metrics over this many
|
||||
episodes, if possible.
|
||||
In case rollouts (sample collection) just started, there may be fewer
|
||||
than this many episodes in the buffer and we'll compute metrics
|
||||
over this smaller number of available episodes.
|
||||
In case there are more than this many episodes collected in a single
|
||||
training iteration, use all of these episodes for metrics computation,
|
||||
meaning don't ever cut any "excess" episodes.
|
||||
min_time_s_per_iteration: Minimum time to accumulate within a single
|
||||
`train()` call. This value does not affect learning,
|
||||
only the number of times `Algorithm.training_step()` is called by
|
||||
|
|
|
@ -25,10 +25,6 @@ from ray.rllib.algorithms import Algorithm
|
|||
from ray.rllib.algorithms.dqn.dqn import DQN, DQNConfig
|
||||
from ray.rllib.algorithms.dqn.learner_thread import LearnerThread
|
||||
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
||||
from ray.rllib.execution.common import (
|
||||
STEPS_TRAINED_COUNTER,
|
||||
STEPS_TRAINED_THIS_ITER_COUNTER,
|
||||
)
|
||||
from ray.rllib.execution.parallel_requests import AsyncRequestsManager
|
||||
from ray.rllib.policy.sample_batch import MultiAgentBatch
|
||||
from ray.rllib.utils.actors import create_colocated_actors
|
||||
|
@ -507,6 +503,7 @@ class ApexDQN(DQN):
|
|||
Args:
|
||||
_num_samples_ready: A mapping from ActorHandle (RolloutWorker) to
|
||||
the number of samples returned by the remote worker.
|
||||
|
||||
Returns:
|
||||
The number of remote workers whose weights were updated.
|
||||
"""
|
||||
|
@ -517,6 +514,9 @@ class ApexDQN(DQN):
|
|||
self.learner_thread.weights_updated = False
|
||||
weights = self.workers.local_worker().get_weights()
|
||||
self.curr_learner_weights = ray.put(weights)
|
||||
|
||||
num_workers_updated = 0
|
||||
|
||||
with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
for (
|
||||
remote_sampler_worker,
|
||||
|
@ -529,11 +529,21 @@ class ApexDQN(DQN):
|
|||
):
|
||||
remote_sampler_worker.set_weights.remote(
|
||||
self.curr_learner_weights,
|
||||
{"timestep": self._counters[STEPS_TRAINED_COUNTER]},
|
||||
{
|
||||
"timestep": self._counters[
|
||||
NUM_AGENT_STEPS_TRAINED
|
||||
if self._by_agent_steps
|
||||
else NUM_ENV_STEPS_TRAINED
|
||||
]
|
||||
},
|
||||
)
|
||||
self.steps_since_update[remote_sampler_worker] = 0
|
||||
num_workers_updated += 1
|
||||
|
||||
self._counters["num_weight_syncs"] += 1
|
||||
|
||||
return num_workers_updated
|
||||
|
||||
def sample_from_replay_buffer_place_on_learner_queue_non_blocking(
|
||||
self, num_samples_collected: Dict[ActorHandle, int]
|
||||
) -> None:
|
||||
|
@ -617,7 +627,6 @@ class ApexDQN(DQN):
|
|||
else:
|
||||
raise RuntimeError("The learner thread died in while training")
|
||||
|
||||
self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = num_samples_trained_this_itr
|
||||
self._timers["learner_dequeue"] = self.learner_thread.queue_timer
|
||||
self._timers["learner_grad"] = self.learner_thread.grad_timer
|
||||
self._timers["learner_overall"] = self.learner_thread.overall_timer
|
||||
|
@ -637,7 +646,9 @@ class ApexDQN(DQN):
|
|||
)
|
||||
self._counters[NUM_TARGET_UPDATES] += 1
|
||||
self._counters[LAST_TARGET_UPDATE_TS] = self._counters[
|
||||
STEPS_TRAINED_COUNTER
|
||||
NUM_AGENT_STEPS_TRAINED
|
||||
if self._by_agent_steps
|
||||
else NUM_ENV_STEPS_TRAINED
|
||||
]
|
||||
|
||||
@override(Algorithm)
|
||||
|
@ -657,10 +668,8 @@ class ApexDQN(DQN):
|
|||
self._sampling_actor_manager.add_workers(new_workers)
|
||||
|
||||
@override(Algorithm)
|
||||
def _compile_iteration_results(self, *, step_ctx, iteration_results=None):
|
||||
result = super()._compile_iteration_results(
|
||||
step_ctx=step_ctx, iteration_results=iteration_results
|
||||
)
|
||||
def _compile_iteration_results(self, *args, **kwargs):
|
||||
result = super()._compile_iteration_results(*args, **kwargs)
|
||||
replay_stats = ray.get(
|
||||
self._replay_actors[0].stats.remote(self.config["optimizer"].get("debug"))
|
||||
)
|
||||
|
|
|
@ -24,9 +24,6 @@ import ray
|
|||
from ray.rllib.algorithms.ppo import PPOConfig, PPO
|
||||
from ray.rllib.evaluation.postprocessing import Postprocessing
|
||||
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
||||
from ray.rllib.execution.common import (
|
||||
STEPS_TRAINED_THIS_ITER_COUNTER,
|
||||
)
|
||||
from ray.rllib.execution.parallel_requests import AsyncRequestsManager
|
||||
from ray.rllib.utils.annotations import override
|
||||
from ray.rllib.utils.deprecation import Deprecated
|
||||
|
@ -297,10 +294,8 @@ class DDPPO(PPO):
|
|||
# - Update the worker's global_vars.
|
||||
# - Build info dict using a LearnerInfoBuilder object.
|
||||
learner_info_builder = LearnerInfoBuilder(num_devices=1)
|
||||
steps_this_iter = 0
|
||||
for worker, results in sample_and_update_results.items():
|
||||
for result in results:
|
||||
steps_this_iter += result["env_steps"]
|
||||
self._counters[NUM_AGENT_STEPS_SAMPLED] += result["agent_steps"]
|
||||
self._counters[NUM_AGENT_STEPS_TRAINED] += result["agent_steps"]
|
||||
self._counters[NUM_ENV_STEPS_SAMPLED] += result["env_steps"]
|
||||
|
@ -315,8 +310,6 @@ class DDPPO(PPO):
|
|||
for worker in self.workers.remote_workers():
|
||||
worker.set_global_vars.remote(global_vars)
|
||||
|
||||
self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = steps_this_iter
|
||||
|
||||
# Sync down the weights from 1st remote worker (only if we have received
|
||||
# some results from it).
|
||||
# As with the sync up, this is not really needed unless the user is
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
import queue
|
||||
import threading
|
||||
|
||||
from ray.util.timer import _Timer
|
||||
from ray.rllib.utils.framework import try_import_tf
|
||||
from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder
|
||||
from ray.rllib.utils.metrics.window_stat import WindowStat
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
|
||||
LEARNER_QUEUE_MAX_SIZE = 16
|
||||
|
||||
|
@ -26,9 +26,9 @@ class LearnerThread(threading.Thread):
|
|||
self.local_worker = local_worker
|
||||
self.inqueue = queue.Queue(maxsize=LEARNER_QUEUE_MAX_SIZE)
|
||||
self.outqueue = queue.Queue()
|
||||
self.queue_timer = TimerStat()
|
||||
self.grad_timer = TimerStat()
|
||||
self.overall_timer = TimerStat()
|
||||
self.queue_timer = _Timer()
|
||||
self.grad_timer = _Timer()
|
||||
self.overall_timer = _Timer()
|
||||
self.daemon = True
|
||||
self.weights_updated = False
|
||||
self.stopped = False
|
||||
|
|
|
@ -16,6 +16,10 @@ from ray.rllib.execution.rollout_ops import (
|
|||
)
|
||||
from ray.rllib.utils.annotations import override
|
||||
from ray.rllib.utils.deprecation import Deprecated
|
||||
from ray.rllib.utils.metrics import (
|
||||
NUM_AGENT_STEPS_SAMPLED,
|
||||
NUM_ENV_STEPS_SAMPLED,
|
||||
)
|
||||
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO
|
||||
from ray.rllib.utils.typing import (
|
||||
PartialAlgorithmConfigDict,
|
||||
|
@ -383,10 +387,11 @@ class Dreamer(Algorithm):
|
|||
# Number of sub-iterations for Dreamer
|
||||
dreamer_train_iters = self.config["dreamer_train_iters"]
|
||||
batch_size = self.config["batch_size"]
|
||||
action_repeat = self.config["action_repeat"]
|
||||
|
||||
# Collect SampleBatches from rollout workers.
|
||||
batch = synchronous_parallel_sample(worker_set=self.workers)
|
||||
self._counters[NUM_AGENT_STEPS_SAMPLED] += batch.agent_steps()
|
||||
self._counters[NUM_ENV_STEPS_SAMPLED] += batch.env_steps()
|
||||
|
||||
fetches = {}
|
||||
|
||||
|
@ -398,25 +403,16 @@ class Dreamer(Algorithm):
|
|||
fetches = local_worker.learn_on_batch(batch)
|
||||
|
||||
if fetches:
|
||||
# Custom Logging
|
||||
# Custom logging.
|
||||
policy_fetches = fetches[DEFAULT_POLICY_ID]["learner_stats"]
|
||||
if "log_gif" in policy_fetches:
|
||||
gif = policy_fetches["log_gif"]
|
||||
policy_fetches["log_gif"] = self._postprocess_gif(gif)
|
||||
|
||||
self._counters[STEPS_SAMPLED_COUNTER] = (
|
||||
self.local_replay_buffer.timesteps * action_repeat
|
||||
)
|
||||
|
||||
self.local_replay_buffer.add(batch)
|
||||
|
||||
return fetches
|
||||
|
||||
def _compile_iteration_results(self, *args, **kwargs):
|
||||
results = super()._compile_iteration_results(*args, **kwargs)
|
||||
results["timesteps_total"] = self._counters[STEPS_SAMPLED_COUNTER]
|
||||
return results
|
||||
|
||||
|
||||
# Deprecated: Use ray.rllib.algorithms.dreamer.DreamerConfig instead!
|
||||
class _deprecated_default_config(dict):
|
||||
|
|
|
@ -37,6 +37,8 @@ from ray.rllib.utils.metrics import (
|
|||
NUM_AGENT_STEPS_TRAINED,
|
||||
NUM_ENV_STEPS_SAMPLED,
|
||||
NUM_ENV_STEPS_TRAINED,
|
||||
NUM_SYNCH_WORKER_WEIGHTS,
|
||||
NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS,
|
||||
)
|
||||
from ray.rllib.utils.replay_buffers.multi_agent_replay_buffer import ReplayMode
|
||||
from ray.rllib.utils.replay_buffers.replay_buffer import _ALL_POLICIES
|
||||
|
@ -822,7 +824,6 @@ class Impala(Algorithm):
|
|||
final_learner_info = builder.finalize()
|
||||
|
||||
# Update the steps trained counters.
|
||||
self._counters[STEPS_TRAINED_THIS_ITER_COUNTER] = num_agent_steps_trained
|
||||
self._counters[NUM_ENV_STEPS_TRAINED] += num_env_steps_trained
|
||||
self._counters[NUM_AGENT_STEPS_TRAINED] += num_agent_steps_trained
|
||||
|
||||
|
@ -874,17 +875,17 @@ class Impala(Algorithm):
|
|||
def update_workers_if_necessary(self) -> None:
|
||||
# Only need to update workers if there are remote workers.
|
||||
global_vars = {"timestep": self._counters[NUM_AGENT_STEPS_TRAINED]}
|
||||
self._counters["steps_since_broadcast"] += 1
|
||||
self._counters[NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS] += 1
|
||||
if (
|
||||
self.workers.remote_workers()
|
||||
and self._counters["steps_since_broadcast"]
|
||||
and self._counters[NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS]
|
||||
>= self.config["broadcast_interval"]
|
||||
and self.workers_that_need_updates
|
||||
):
|
||||
weights = ray.put(self.workers.local_worker().get_weights())
|
||||
self._counters["steps_since_broadcast"] = 0
|
||||
self._counters[NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS] = 0
|
||||
self._learner_thread.weights_updated = False
|
||||
self._counters["num_weight_broadcasts"] += 1
|
||||
self._counters[NUM_SYNCH_WORKER_WEIGHTS] += 1
|
||||
|
||||
for worker in self.workers_that_need_updates:
|
||||
worker.set_weights.remote(weights, global_vars)
|
||||
|
@ -910,10 +911,8 @@ class Impala(Algorithm):
|
|||
self._sampling_actor_manager.add_workers(new_workers)
|
||||
|
||||
@override(Algorithm)
|
||||
def _compile_iteration_results(self, *, step_ctx, iteration_results=None):
|
||||
result = super()._compile_iteration_results(
|
||||
step_ctx=step_ctx, iteration_results=iteration_results
|
||||
)
|
||||
def _compile_iteration_results(self, *args, **kwargs):
|
||||
result = super()._compile_iteration_results(*args, **kwargs)
|
||||
result = self._learner_thread.add_learner_metrics(
|
||||
result, overwrite_learner_info=False
|
||||
)
|
||||
|
|
|
@ -17,7 +17,7 @@ from ray.rllib.utils.deprecation import Deprecated, DEPRECATED_VALUE
|
|||
from ray.rllib.utils.metrics import (
|
||||
NUM_AGENT_STEPS_SAMPLED,
|
||||
NUM_ENV_STEPS_SAMPLED,
|
||||
WORKER_UPDATE_TIMER,
|
||||
SYNCH_WORKER_WEIGHTS_TIMER,
|
||||
)
|
||||
from ray.rllib.utils.typing import (
|
||||
ResultDict,
|
||||
|
@ -284,7 +284,7 @@ class MARWIL(Algorithm):
|
|||
# Update weights - after learning on the local worker - on all remote
|
||||
# workers.
|
||||
if self.workers.remote_workers():
|
||||
with self._timers[WORKER_UPDATE_TIMER]:
|
||||
with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
self.workers.sync_weights(global_vars=global_vars)
|
||||
|
||||
# Update global vars on local worker as well.
|
||||
|
|
|
@ -38,7 +38,7 @@ from ray.rllib.execution.rollout_ops import synchronous_parallel_sample
|
|||
from ray.rllib.utils.metrics import (
|
||||
NUM_AGENT_STEPS_SAMPLED,
|
||||
NUM_ENV_STEPS_SAMPLED,
|
||||
WORKER_UPDATE_TIMER,
|
||||
SYNCH_WORKER_WEIGHTS_TIMER,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -426,7 +426,7 @@ class PPO(Algorithm):
|
|||
# Update weights - after learning on the local worker - on all remote
|
||||
# workers.
|
||||
if self.workers.remote_workers():
|
||||
with self._timers[WORKER_UPDATE_TIMER]:
|
||||
with self._timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
self.workers.sync_weights(global_vars=global_vars)
|
||||
|
||||
# For each policy: update KL scale and warn about possible issues
|
||||
|
|
|
@ -139,9 +139,10 @@ def summarize_episodes(
|
|||
"""Summarizes a set of episode metrics tuples.
|
||||
|
||||
Args:
|
||||
episodes: smoothed set of episodes including historical ones
|
||||
new_episodes: just the new episodes in this iteration. This must be
|
||||
a subset of `episodes`. If None, assumes all episodes are new.
|
||||
episodes: List of most recent n episodes. This may include historical ones
|
||||
(not newly collected in this iteration) in order to achieve the size of
|
||||
the smoothing window.
|
||||
new_episodes: All the episodes that were completed in this iteration.
|
||||
"""
|
||||
|
||||
if new_episodes is None:
|
||||
|
|
|
@ -3,11 +3,11 @@ import platform
|
|||
import random
|
||||
from typing import Optional
|
||||
|
||||
from ray.util.timer import _Timer
|
||||
from ray.rllib.execution.replay_ops import SimpleReplayBuffer
|
||||
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
|
||||
from ray.rllib.utils.replay_buffers.multi_agent_replay_buffer import ReplayMode
|
||||
from ray.rllib.utils.replay_buffers.replay_buffer import _ALL_POLICIES
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
from ray.rllib.utils.typing import PolicyID, SampleBatchType
|
||||
|
||||
|
||||
|
@ -87,9 +87,9 @@ class MixInMultiAgentReplayBuffer:
|
|||
self.replay_buffers = collections.defaultdict(new_buffer)
|
||||
|
||||
# Metrics.
|
||||
self.add_batch_timer = TimerStat()
|
||||
self.replay_timer = TimerStat()
|
||||
self.update_priorities_timer = TimerStat()
|
||||
self.add_batch_timer = _Timer()
|
||||
self.replay_timer = _Timer()
|
||||
self.update_priorities_timer = _Timer()
|
||||
|
||||
# Added timesteps over lifetime.
|
||||
self.num_added = 0
|
||||
|
|
|
@ -9,7 +9,7 @@ from ray.rllib.utils.metrics import ( # noqa: F401
|
|||
NUM_TARGET_UPDATES,
|
||||
APPLY_GRADS_TIMER,
|
||||
COMPUTE_GRADS_TIMER,
|
||||
WORKER_UPDATE_TIMER,
|
||||
SYNCH_WORKER_WEIGHTS_TIMER as WORKER_UPDATE_TIMER,
|
||||
GRAD_WAIT_TIMER,
|
||||
SAMPLE_TIMER,
|
||||
LEARN_ON_BATCH_TIMER,
|
||||
|
|
|
@ -3,11 +3,11 @@ from six.moves import queue
|
|||
import threading
|
||||
from typing import Dict, Optional
|
||||
|
||||
from ray.util.timer import _Timer
|
||||
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
||||
from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
|
||||
from ray.rllib.utils.framework import try_import_tf
|
||||
from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder, LEARNER_INFO
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
from ray.rllib.utils.metrics.window_stat import WindowStat
|
||||
from ray.util.iter import _NextValueNotReady
|
||||
|
||||
|
@ -56,10 +56,10 @@ class LearnerThread(threading.Thread):
|
|||
num_passes=num_sgd_iter,
|
||||
init_num_passes=num_sgd_iter,
|
||||
)
|
||||
self.queue_timer = TimerStat()
|
||||
self.grad_timer = TimerStat()
|
||||
self.load_timer = TimerStat()
|
||||
self.load_wait_timer = TimerStat()
|
||||
self.queue_timer = _Timer()
|
||||
self.grad_timer = _Timer()
|
||||
self.load_timer = _Timer()
|
||||
self.load_wait_timer = _Timer()
|
||||
self.daemon = True
|
||||
self.weights_updated = False
|
||||
self.learner_info = {}
|
||||
|
|
|
@ -2,6 +2,7 @@ import logging
|
|||
from six.moves import queue
|
||||
import threading
|
||||
|
||||
from ray.util.timer import _Timer
|
||||
from ray.rllib.execution.learner_thread import LearnerThread
|
||||
from ray.rllib.execution.minibatch_buffer import MinibatchBuffer
|
||||
from ray.rllib.policy.sample_batch import SampleBatch
|
||||
|
@ -9,7 +10,6 @@ from ray.rllib.utils.annotations import override
|
|||
from ray.rllib.utils.deprecation import deprecation_warning
|
||||
from ray.rllib.utils.framework import try_import_tf
|
||||
from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
||||
|
||||
tf1, tf, tfv = try_import_tf()
|
||||
|
@ -192,8 +192,8 @@ class _MultiGPULoaderThread(threading.Thread):
|
|||
self.queue_timer = multi_gpu_learner_thread.queue_timer
|
||||
self.load_timer = multi_gpu_learner_thread.load_timer
|
||||
else:
|
||||
self.queue_timer = TimerStat()
|
||||
self.load_timer = TimerStat()
|
||||
self.queue_timer = _Timer()
|
||||
self.load_timer = _Timer()
|
||||
|
||||
def run(self) -> None:
|
||||
while True:
|
||||
|
|
|
@ -16,7 +16,6 @@ from ray.rllib.execution.common import (
|
|||
STEPS_SAMPLED_COUNTER,
|
||||
STEPS_TRAINED_COUNTER,
|
||||
STEPS_TRAINED_THIS_ITER_COUNTER,
|
||||
WORKER_UPDATE_TIMER,
|
||||
_check_sample_batch_type,
|
||||
_get_global_vars,
|
||||
_get_shared_metrics,
|
||||
|
@ -25,7 +24,11 @@ from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, MultiAgentBatch
|
|||
from ray.rllib.utils.annotations import DeveloperAPI
|
||||
from ray.rllib.utils.deprecation import DEPRECATED_VALUE, deprecation_warning
|
||||
from ray.rllib.utils.framework import try_import_tf
|
||||
from ray.rllib.utils.metrics import NUM_ENV_STEPS_TRAINED, NUM_AGENT_STEPS_TRAINED
|
||||
from ray.rllib.utils.metrics import (
|
||||
NUM_ENV_STEPS_TRAINED,
|
||||
NUM_AGENT_STEPS_TRAINED,
|
||||
SYNCH_WORKER_WEIGHTS_TIMER,
|
||||
)
|
||||
from ray.rllib.utils.metrics.learner_info import LearnerInfoBuilder, LEARNER_INFO
|
||||
from ray.rllib.utils.sgd import do_minibatch_sgd
|
||||
from ray.rllib.utils.typing import PolicyID, SampleBatchType, ModelGradients
|
||||
|
@ -231,7 +234,7 @@ class TrainOneStep:
|
|||
# Update weights - after learning on the local worker - on all remote
|
||||
# workers.
|
||||
if self.workers.remote_workers():
|
||||
with metrics.timers[WORKER_UPDATE_TIMER]:
|
||||
with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
weights = ray.put(
|
||||
lw.get_weights(self.policies or lw.get_policies_to_train(batch))
|
||||
)
|
||||
|
@ -354,7 +357,7 @@ class MultiGPUTrainOneStep:
|
|||
metrics.info[LEARNER_INFO] = learner_info
|
||||
|
||||
if self.workers.remote_workers():
|
||||
with metrics.timers[WORKER_UPDATE_TIMER]:
|
||||
with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
weights = ray.put(
|
||||
self.workers.local_worker().get_weights(
|
||||
self.local_worker.get_policies_to_train()
|
||||
|
@ -453,7 +456,7 @@ class ApplyGradients:
|
|||
|
||||
if self.update_all:
|
||||
if self.workers.remote_workers():
|
||||
with metrics.timers[WORKER_UPDATE_TIMER]:
|
||||
with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
weights = ray.put(
|
||||
self.local_worker.get_weights(
|
||||
self.policies or self.local_worker.get_policies_to_train()
|
||||
|
@ -468,7 +471,7 @@ class ApplyGradients:
|
|||
"update_all=False, `current_actor` must be set "
|
||||
"in the iterator context."
|
||||
)
|
||||
with metrics.timers[WORKER_UPDATE_TIMER]:
|
||||
with metrics.timers[SYNCH_WORKER_WEIGHTS_TIMER]:
|
||||
weights = self.local_worker.get_weights(
|
||||
self.policies or self.local_worker.get_policies_to_train()
|
||||
)
|
||||
|
|
|
@ -8,11 +8,18 @@ NUM_AGENT_STEPS_TRAINED = "num_agent_steps_trained"
|
|||
NUM_ENV_STEPS_TRAINED_THIS_ITER = "num_env_steps_trained_this_iter"
|
||||
NUM_AGENT_STEPS_TRAINED_THIS_ITER = "num_agent_steps_trained_this_iter"
|
||||
|
||||
# Counters for keeping track of worker weight updates (synchronization
|
||||
# between local worker and remote workers).
|
||||
NUM_SYNCH_WORKER_WEIGHTS = "num_weight_broadcasts"
|
||||
NUM_TRAINING_STEP_CALLS_SINCE_LAST_SYNCH_WORKER_WEIGHTS = (
|
||||
"num_training_step_calls_since_last_synch_worker_weights"
|
||||
)
|
||||
|
||||
# Counters to track target network updates.
|
||||
LAST_TARGET_UPDATE_TS = "last_target_update_ts"
|
||||
NUM_TARGET_UPDATES = "num_target_updates"
|
||||
|
||||
# Performance timers (keys for Algorithm._timers or metrics.timers).
|
||||
# Performance timers (keys for Algorithm._timers).
|
||||
TRAINING_ITERATION_TIMER = "training_iteration"
|
||||
APPLY_GRADS_TIMER = "apply_grad"
|
||||
COMPUTE_GRADS_TIMER = "compute_grads"
|
||||
|
@ -22,6 +29,3 @@ SAMPLE_TIMER = "sample"
|
|||
LEARN_ON_BATCH_TIMER = "learn"
|
||||
LOAD_BATCH_TIMER = "load"
|
||||
TARGET_NET_UPDATE_TIMER = "target_net_update"
|
||||
|
||||
# Deprecated: Use `SYNCH_WORKER_WEIGHTS_TIMER` instead.
|
||||
WORKER_UPDATE_TIMER = "update"
|
||||
|
|
|
@ -2,6 +2,7 @@ from typing import Dict
|
|||
import logging
|
||||
import numpy as np
|
||||
|
||||
from ray.util.timer import _Timer
|
||||
from ray.rllib.utils.annotations import override
|
||||
from ray.rllib.utils.replay_buffers.multi_agent_replay_buffer import (
|
||||
MultiAgentReplayBuffer,
|
||||
|
@ -16,7 +17,6 @@ from ray.rllib.utils.replay_buffers.replay_buffer import (
|
|||
)
|
||||
from ray.rllib.utils.typing import PolicyID, SampleBatchType
|
||||
from ray.rllib.policy.sample_batch import SampleBatch
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
from ray.util.debug import log_once
|
||||
from ray.util.annotations import DeveloperAPI
|
||||
from ray.rllib.policy.rnn_sequencing import timeslice_along_seq_lens_with_overlap
|
||||
|
@ -137,7 +137,7 @@ class MultiAgentPrioritizedReplayBuffer(
|
|||
)
|
||||
|
||||
self.prioritized_replay_eps = prioritized_replay_eps
|
||||
self.update_priorities_timer = TimerStat()
|
||||
self.update_priorities_timer = _Timer()
|
||||
|
||||
@DeveloperAPI
|
||||
@override(MultiAgentReplayBuffer)
|
||||
|
|
|
@ -3,6 +3,7 @@ import logging
|
|||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from ray.util.timer import _Timer
|
||||
from ray.rllib.policy.rnn_sequencing import timeslice_along_seq_lens_with_overlap
|
||||
from ray.rllib.policy.sample_batch import MultiAgentBatch, SampleBatch
|
||||
from ray.rllib.utils.annotations import override
|
||||
|
@ -13,7 +14,6 @@ from ray.rllib.utils.replay_buffers.replay_buffer import (
|
|||
ReplayBuffer,
|
||||
StorageUnit,
|
||||
)
|
||||
from ray.rllib.utils.timer import TimerStat
|
||||
from ray.rllib.utils.typing import PolicyID, SampleBatchType
|
||||
from ray.util.annotations import DeveloperAPI
|
||||
from ray.util.debug import log_once
|
||||
|
@ -184,8 +184,8 @@ class MultiAgentReplayBuffer(ReplayBuffer):
|
|||
self.replay_buffers = collections.defaultdict(new_buffer)
|
||||
|
||||
# Metrics.
|
||||
self.add_batch_timer = TimerStat()
|
||||
self.replay_timer = TimerStat()
|
||||
self.add_batch_timer = _Timer()
|
||||
self.replay_timer = _Timer()
|
||||
self._num_added = 0
|
||||
|
||||
def __len__(self) -> int:
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
from ray.util.timer import _Timer
|
||||
from ray.rllib.utils.deprecation import deprecation_warning
|
||||
|
||||
deprecation_warning(
|
||||
old="ray.rllib.utils.timer::TimerStat",
|
||||
new="ray.util.timer::_Timer",
|
||||
error=False,
|
||||
)
|
||||
|
||||
TimerStat = _Timer # backwards compatibility alias
|
||||
|
|
Loading…
Add table
Reference in a new issue