Checkpoint Adam momenta for DDPG (#7449)

2025-03-06 10:31:39 -05:00 · 2020-03-04 10:03:41 -08:00 · 2020-03-04 10:03:41 -08:00 · aa4861c2a0
commit aa4861c2a0
parent fe7820fec9
1 changed files with 2 additions and 1 deletions
--- a/rllib/agents/ddpg/ddpg_policy.py
+++ b/rllib/agents/ddpg/ddpg_policy.py
@ -295,7 +295,8 @@ class DDPGTFPolicy(DDPGPostprocessing, TFPolicy):
        # Note that this encompasses both the policy and Q-value networks and
        # their corresponding target networks
        self.variables = ray.experimental.tf_utils.TensorFlowVariables(
-            tf.group(q_t_det_policy, q_tp1), self.sess)
+            tf.group(q_t_det_policy, q_tp1, self._actor_optimizer.variables(),
+                     self._critic_optimizer.variables()), self.sess)

        # Hard initial update
        self.update_target(tau=1.0)