ray/rllib/models/tf/tf_modelv2.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.utils.annotations import PublicAPI
from ray.rllib.utils import try_import_tf

tf = try_import_tf()


@PublicAPI
class TFModelV2(ModelV2):
    """TF version of ModelV2.

    Note that this class by itself is not a valid model unless you
    implement forward() in a subclass."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        """Initialize a TFModelV2.

        Here is an example implementation for a subclass
        ``MyModelClass(TFModelV2)``::

            def __init__(self, *args, **kwargs):
                super(MyModelClass, self).__init__(*args, **kwargs)
                input_layer = tf.keras.layers.Input(...)
                hidden_layer = tf.keras.layers.Dense(...)(input_layer)
                output_layer = tf.keras.layers.Dense(...)(hidden_layer)
                value_layer = tf.keras.layers.Dense(...)(hidden_layer)
                self.base_model = tf.keras.Model(
                    input_layer, [output_layer, value_layer])
                self.register_variables(self.base_model.variables)
        """

        ModelV2.__init__(
            self,
            obs_space,
            action_space,
            num_outputs,
            model_config,
            name,
            framework="tf")
        self.var_list = []

    def forward(self, input_dict, state, seq_lens):
        """Call the model with the given input tensors and state.

        Any complex observations (dicts, tuples, etc.) will be unpacked by
        __call__ before being passed to forward(). To access the flattened
        observation tensor, refer to input_dict["obs_flat"].

        This method can be called any number of times. In eager execution,
        each call to forward() will eagerly evaluate the model. In symbolic
        execution, each call to forward creates a computation graph that
        operates over the variables of this model (i.e., shares weights).

        Custom models should override this instead of __call__.

        Arguments:
            input_dict (dict): dictionary of input tensors, including "obs",
                "obs_flat", "prev_action", "prev_reward", "is_training"
            state (list): list of state tensors with sizes matching those
                returned by get_initial_state + the batch dimension
            seq_lens (Tensor): 1d tensor holding input sequence lengths

        Returns:
            (outputs, state): The model output tensor of size
                [BATCH, num_outputs]

        Sample implementation for the ``MyModelClass`` example::

            def forward(self, input_dict, state, seq_lens):
                model_out, self._value_out = self.base_model(input_dict["obs"])
                return model_out, state
        """
        raise NotImplementedError

    def value_function(self):
        """Return the value function estimate for the most recent forward pass.

        Returns:
            value estimate tensor of shape [BATCH].

        Sample implementation for the ``MyModelClass`` example::

            def value_function(self):
                return self._value_out
        """
        raise NotImplementedError

    def update_ops(self):
        """Return the list of update ops for this model.

        For example, this should include any BatchNorm update ops."""
        return []

    def register_variables(self, variables):
        """Register the given list of variables with this model."""
        self.var_list.extend(variables)

    def variables(self):
        """Returns the list of variables for this model."""
        return list(self.var_list)

    def trainable_variables(self):
        """Returns the list of trainable variables for this model."""
        return [v for v in self.variables() if v.trainable]
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`from __future__ import absolute_import`
			`from __future__ import division`
			`from __future__ import print_function`

			`from ray.rllib.models.modelv2 import ModelV2`
[rllib] ModelV2 support for pytorch (#5249) 2019-07-25 11:02:53 -07:00			`from ray.rllib.utils.annotations import PublicAPI`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`from ray.rllib.utils import try_import_tf`

			`tf = try_import_tf()`


[rllib] ModelV2 support for pytorch (#5249) 2019-07-25 11:02:53 -07:00			`@PublicAPI`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`class TFModelV2(ModelV2):`
[rllib] ModelV2 support for pytorch (#5249) 2019-07-25 11:02:53 -07:00			`"""TF version of ModelV2.`

			`Note that this class by itself is not a valid model unless you`
			`implement forward() in a subclass."""`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00
[rllib] Add Keras LSTM example with ModelV2 (#5258) 2019-07-24 13:09:41 -07:00			`def __init__(self, obs_space, action_space, num_outputs, model_config,`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`name):`
[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00			`"""Initialize a TFModelV2.`

			`Here is an example implementation for a subclass`
			``MyModelClass(TFModelV2)``::

			`def __init__(self, args, *kwargs):`
			`super(MyModelClass, self).__init__(args, *kwargs)`
			`input_layer = tf.keras.layers.Input(...)`
			`hidden_layer = tf.keras.layers.Dense(...)(input_layer)`
			`output_layer = tf.keras.layers.Dense(...)(hidden_layer)`
			`value_layer = tf.keras.layers.Dense(...)(hidden_layer)`
			`self.base_model = tf.keras.Model(`
			`input_layer, [output_layer, value_layer])`
			`self.register_variables(self.base_model.variables)`
			`"""`

[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`ModelV2.__init__(`
			`self,`
			`obs_space,`
			`action_space,`
[rllib] Add Keras LSTM example with ModelV2 (#5258) 2019-07-24 13:09:41 -07:00			`num_outputs,`
[rllib] ModelV2 API (#4926) 2019-07-03 15:59:47 -07:00			`model_config,`
			`name,`
			`framework="tf")`
[rllib] ModelV2 support for pytorch (#5249) 2019-07-25 11:02:53 -07:00			`self.var_list = []`
[rllib] Port DDPG to the build_tf_policy pattern (#5242) 2019-07-24 13:55:55 -07:00
[rllib] Document ModelV2 and clean up the models/ directory (#5277) 2019-07-27 02:08:16 -07:00			`def forward(self, input_dict, state, seq_lens):`
			`"""Call the model with the given input tensors and state.`

			`Any complex observations (dicts, tuples, etc.) will be unpacked by`
			`__call__ before being passed to forward(). To access the flattened`
			`observation tensor, refer to input_dict["obs_flat"].`

			`This method can be called any number of times. In eager execution,`
			`each call to forward() will eagerly evaluate the model. In symbolic`
			`execution, each call to forward creates a computation graph that`
			`operates over the variables of this model (i.e., shares weights).`

			`Custom models should override this instead of __call__.`

			`Arguments:`
			`input_dict (dict): dictionary of input tensors, including "obs",`
			`"obs_flat", "prev_action", "prev_reward", "is_training"`
			`state (list): list of state tensors with sizes matching those`
			`returned by get_initial_state + the batch dimension`
			`seq_lens (Tensor): 1d tensor holding input sequence lengths`

			`Returns:`
			`(outputs, state): The model output tensor of size`
			`[BATCH, num_outputs]`

			Sample implementation for the ``MyModelClass`` example::

			`def forward(self, input_dict, state, seq_lens):`
			`model_out, self._value_out = self.base_model(input_dict["obs"])`
			`return model_out, state`
			`"""`
			`raise NotImplementedError`

			`def value_function(self):`
			`"""Return the value function estimate for the most recent forward pass.`

			`Returns:`
			`value estimate tensor of shape [BATCH].`

			Sample implementation for the ``MyModelClass`` example::

			`def value_function(self):`
			`return self._value_out`
			`"""`
			`raise NotImplementedError`

[rllib] Port DDPG to the build_tf_policy pattern (#5242) 2019-07-24 13:55:55 -07:00			`def update_ops(self):`
			`"""Return the list of update ops for this model.`

			`For example, this should include any BatchNorm update ops."""`
			`return []`
[rllib] ModelV2 support for pytorch (#5249) 2019-07-25 11:02:53 -07:00
			`def register_variables(self, variables):`
			`"""Register the given list of variables with this model."""`
			`self.var_list.extend(variables)`

			`def variables(self):`
			`"""Returns the list of variables for this model."""`
			`return list(self.var_list)`

			`def trainable_variables(self):`
			`"""Returns the list of trainable variables for this model."""`
			`return [v for v in self.variables() if v.trainable]`