[RLlib] Update sac_tf_policy.py (add tf.cast to float32 for rewards) (#14843)

2025-03-06 02:21:39 -05:00 · 2021-03-24 16:12:55 +01:00 · 2021-03-24 16:12:55 +01:00 · 8874ccec2d
commit 8874ccec2d
parent 6708211b59
1 changed files with 1 additions and 1 deletions
--- a/rllib/agents/sac/sac_tf_policy.py
+++ b/rllib/agents/sac/sac_tf_policy.py
@ -323,7 +323,7 @@ def sac_actor_critic_loss(

    # Compute RHS of bellman equation for the Q-loss (critic(s)).
    q_t_selected_target = tf.stop_gradient(
-        train_batch[SampleBatch.REWARDS] +
+        tf.cast(train_batch[SampleBatch.REWARDS], tf.float32) +
        policy.config["gamma"]**policy.config["n_step"] * q_tp1_best_masked)

    # Compute the TD-error (potentially clipped).