[RLlib] Fix "Cannot convert a symbolic Tensor (default_policy/strided_slice_3:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported" (#17587)

2025-03-06 02:21:39 -05:00 · 2021-08-05 11:39:15 -04:00 · 2021-08-05 11:39:15 -04:00 · 3013d9b341
commit 3013d9b341
parent 3ae5229b44
3 changed files with 3 additions and 6 deletions
--- a/rllib/utils/exploration/gaussian_noise.py
+++ b/rllib/utils/exploration/gaussian_noise.py
@ -125,14 +125,13 @@ class GaussianNoise(Exploration):
        )

        # Chose by `explore` (main exploration switch).
-        batch_size = tf.shape(deterministic_actions)[0]
        action = tf.cond(
            pred=tf.constant(explore, dtype=tf.bool)
            if isinstance(explore, bool) else explore,
            true_fn=lambda: stochastic_actions,
            false_fn=lambda: deterministic_actions)
        # Logp=always zero.
-        logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
+        logp = tf.zeros_like(deterministic_actions, dtype=tf.float32)[:, 0]

        # Increment `last_timestep` by 1 (or set to `timestep`).
        if self.framework in ["tf2", "tfe"]:
--- a/rllib/utils/exploration/ornstein_uhlenbeck_noise.py
+++ b/rllib/utils/exploration/ornstein_uhlenbeck_noise.py
@ -134,8 +134,7 @@ class OrnsteinUhlenbeckNoise(GaussianNoise):
            true_fn=lambda: exploration_actions,
            false_fn=lambda: deterministic_actions)
        # Logp=always zero.
-        batch_size = tf.shape(deterministic_actions)[0]
-        logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
+        logp = tf.zeros_like(deterministic_actions, dtype=tf.float32)[:, 0]

        # Increment `last_timestep` by 1 (or set to `timestep`).
        if self.framework in ["tf2", "tfe"]:
--- a/rllib/utils/exploration/random.py
+++ b/rllib/utils/exploration/random.py
@ -130,8 +130,7 @@ class Random(Exploration):
            false_fn=false_fn)

        # TODO(sven): Move into (deterministic_)sample(logp=True|False)
-        batch_size = tf.shape(tree.flatten(action)[0])[0]
-        logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
+        logp = tf.zeros_like(tree.flatten(action)[0], dtype=tf.float32)[:1]
        return action, logp

    def get_torch_exploration_action(self, action_dist: ActionDistribution,