import numpy as np from ray.rllib.utils.framework import get_activation_fn, get_variable, \ try_import_tf tf = try_import_tf() class NoisyLayer(tf.keras.layers.Layer): """A Layer that adds learnable Noise a common dense layer: y = w^{T}x + b a noisy layer: y = (w + \\epsilon_w*\\sigma_w)^{T}x + (b+\\epsilon_b*\\sigma_b) where \epsilon are random variables sampled from factorized normal distributions and \\sigma are trainable variables which are expected to vanish along the training procedure """ def __init__(self, prefix, out_size, sigma0, activation="relu"): """Initializes a NoisyLayer object. Args: prefix: out_size: sigma0: non_linear: """ super().__init__() self.prefix = prefix self.out_size = out_size # TF noise generation can be unreliable on GPU # If generating the noise on the CPU, # lowering sigma0 to 0.1 may be helpful self.sigma0 = sigma0 # 0.5~GPU, 0.1~CPU self.activation = activation # Variables. self.w = None # Weight matrix. self.b = None # Biases. self.sigma_w = None # Noise for weight matrix self.sigma_b = None # Noise for biases. def build(self, input_shape): in_size = int(input_shape[1]) self.sigma_w = get_variable( value=tf.keras.initializers.RandomUniform( minval=-1.0 / np.sqrt(float(in_size)), maxval=1.0 / np.sqrt(float(in_size))), trainable=True, tf_name=self.prefix + "_sigma_w", shape=[in_size, self.out_size], dtype=tf.float32 ) self.sigma_b = get_variable( value=tf.keras.initializers.Constant( self.sigma0 / np.sqrt(float(in_size))), trainable=True, tf_name=self.prefix + "_sigma_b", shape=[self.out_size], dtype=tf.float32, ) self.w = get_variable( value=tf.keras.initializers.GlorotUniform(), tf_name=self.prefix + "_fc_w", trainable=True, shape=[in_size, self.out_size], dtype=tf.float32, ) self.b = get_variable( value=tf.keras.initializers.Zeros(), tf_name=self.prefix + "_fc_b", trainable=True, shape=[self.out_size], dtype=tf.float32, ) def call(self, inputs): in_size = int(inputs.shape[1]) epsilon_in = tf.random.normal(shape=[in_size]) epsilon_out = tf.random.normal(shape=[self.out_size]) epsilon_in = self._f_epsilon(epsilon_in) epsilon_out = self._f_epsilon(epsilon_out) epsilon_w = tf.matmul( a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0)) epsilon_b = epsilon_out action_activation = tf.matmul( inputs, self.w + self.sigma_w * epsilon_w) + \ self.b + self.sigma_b * epsilon_b fn = get_activation_fn(self.activation, framework="tf") if fn is not None: action_activation = fn(action_activation) return action_activation def _f_epsilon(self, x): return tf.math.sign(x) * tf.math.sqrt(tf.math.abs(x))