mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
105 lines
3.4 KiB
Python
105 lines
3.4 KiB
Python
import numpy as np
|
|
|
|
from ray.rllib.utils.framework import get_activation_fn, get_variable, \
|
|
try_import_tf
|
|
|
|
tf1, tf, tfv = try_import_tf()
|
|
|
|
|
|
class NoisyLayer(tf.keras.layers.Layer if tf else object):
|
|
"""A Layer that adds learnable Noise
|
|
a common dense layer: y = w^{T}x + b
|
|
a noisy layer: y = (w + \\epsilon_w*\\sigma_w)^{T}x +
|
|
(b+\\epsilon_b*\\sigma_b)
|
|
where \epsilon are random variables sampled from factorized normal
|
|
distributions and \\sigma are trainable variables which are expected to
|
|
vanish along the training procedure
|
|
"""
|
|
|
|
def __init__(self,
|
|
prefix,
|
|
out_size,
|
|
sigma0,
|
|
activation="relu"):
|
|
"""Initializes a NoisyLayer object.
|
|
|
|
Args:
|
|
prefix:
|
|
out_size:
|
|
sigma0:
|
|
non_linear:
|
|
"""
|
|
super().__init__()
|
|
self.prefix = prefix
|
|
self.out_size = out_size
|
|
# TF noise generation can be unreliable on GPU
|
|
# If generating the noise on the CPU,
|
|
# lowering sigma0 to 0.1 may be helpful
|
|
self.sigma0 = sigma0 # 0.5~GPU, 0.1~CPU
|
|
self.activation = activation
|
|
# Variables.
|
|
self.w = None # Weight matrix.
|
|
self.b = None # Biases.
|
|
self.sigma_w = None # Noise for weight matrix
|
|
self.sigma_b = None # Noise for biases.
|
|
|
|
def build(self, input_shape):
|
|
in_size = int(input_shape[1])
|
|
|
|
self.sigma_w = get_variable(
|
|
value=tf.keras.initializers.RandomUniform(
|
|
minval=-1.0 / np.sqrt(float(in_size)),
|
|
maxval=1.0 / np.sqrt(float(in_size))),
|
|
trainable=True,
|
|
tf_name=self.prefix + "_sigma_w",
|
|
shape=[in_size, self.out_size],
|
|
dtype=tf.float32
|
|
)
|
|
|
|
self.sigma_b = get_variable(
|
|
value=tf.keras.initializers.Constant(
|
|
self.sigma0 / np.sqrt(float(in_size))),
|
|
trainable=True,
|
|
tf_name=self.prefix + "_sigma_b",
|
|
shape=[self.out_size],
|
|
dtype=tf.float32,
|
|
)
|
|
|
|
self.w = get_variable(
|
|
value=tf.keras.initializers.GlorotUniform(),
|
|
tf_name=self.prefix + "_fc_w",
|
|
trainable=True,
|
|
shape=[in_size, self.out_size],
|
|
dtype=tf.float32,
|
|
)
|
|
|
|
self.b = get_variable(
|
|
value=tf.keras.initializers.Zeros(),
|
|
tf_name=self.prefix + "_fc_b",
|
|
trainable=True,
|
|
shape=[self.out_size],
|
|
dtype=tf.float32,
|
|
)
|
|
|
|
def call(self, inputs):
|
|
in_size = int(inputs.shape[1])
|
|
epsilon_in = tf.random.normal(shape=[in_size])
|
|
epsilon_out = tf.random.normal(shape=[self.out_size])
|
|
epsilon_in = self._f_epsilon(epsilon_in)
|
|
epsilon_out = self._f_epsilon(epsilon_out)
|
|
epsilon_w = tf.matmul(
|
|
a=tf.expand_dims(epsilon_in, -1), b=tf.expand_dims(epsilon_out, 0))
|
|
epsilon_b = epsilon_out
|
|
|
|
action_activation = tf.matmul(
|
|
inputs,
|
|
self.w + self.sigma_w * epsilon_w) + \
|
|
self.b + self.sigma_b * epsilon_b
|
|
|
|
fn = get_activation_fn(self.activation, framework="tf")
|
|
if fn is not None:
|
|
action_activation = fn(action_activation)
|
|
return action_activation
|
|
|
|
def _f_epsilon(self, x):
|
|
return tf.math.sign(x) * tf.math.sqrt(tf.math.abs(x))
|