[RLlib] Fix "Cannot convert a symbolic Tensor (default_policy/strided_slice_3:0) to a numpy array. This error may indicate that you're trying to pass a Tensor to a NumPy call, which is not supported" (#17587)

This commit is contained in:
Sven Mika 2021-08-05 11:39:15 -04:00 committed by GitHub
parent 3ae5229b44
commit 3013d9b341
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 6 deletions

View file

@ -125,14 +125,13 @@ class GaussianNoise(Exploration):
)
# Chose by `explore` (main exploration switch).
batch_size = tf.shape(deterministic_actions)[0]
action = tf.cond(
pred=tf.constant(explore, dtype=tf.bool)
if isinstance(explore, bool) else explore,
true_fn=lambda: stochastic_actions,
false_fn=lambda: deterministic_actions)
# Logp=always zero.
logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
logp = tf.zeros_like(deterministic_actions, dtype=tf.float32)[:, 0]
# Increment `last_timestep` by 1 (or set to `timestep`).
if self.framework in ["tf2", "tfe"]:

View file

@ -134,8 +134,7 @@ class OrnsteinUhlenbeckNoise(GaussianNoise):
true_fn=lambda: exploration_actions,
false_fn=lambda: deterministic_actions)
# Logp=always zero.
batch_size = tf.shape(deterministic_actions)[0]
logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
logp = tf.zeros_like(deterministic_actions, dtype=tf.float32)[:, 0]
# Increment `last_timestep` by 1 (or set to `timestep`).
if self.framework in ["tf2", "tfe"]:

View file

@ -130,8 +130,7 @@ class Random(Exploration):
false_fn=false_fn)
# TODO(sven): Move into (deterministic_)sample(logp=True|False)
batch_size = tf.shape(tree.flatten(action)[0])[0]
logp = tf.zeros(shape=(batch_size, ), dtype=tf.float32)
logp = tf.zeros_like(tree.flatten(action)[0], dtype=tf.float32)[:1]
return action, logp
def get_torch_exploration_action(self, action_dist: ActionDistribution,