ray/rllib/examples/models/centralized_critic_models.py

from gym.spaces import Box

from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.tf.tf_modelv2 import TFModelV2
from ray.rllib.models.tf.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.misc import SlimFC
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC
from ray.rllib.utils.annotations import override
from ray.rllib.utils.framework import try_import_tf, try_import_torch

tf1, tf, tfv = try_import_tf()
torch, nn = try_import_torch()


class CentralizedCriticModel(TFModelV2):
    """Multi-agent model that implements a centralized value function."""

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        super(CentralizedCriticModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name
        )
        # Base of the model
        self.model = FullyConnectedNetwork(
            obs_space, action_space, num_outputs, model_config, name
        )

        # Central VF maps (obs, opp_obs, opp_act) -> vf_pred
        obs = tf.keras.layers.Input(shape=(6,), name="obs")
        opp_obs = tf.keras.layers.Input(shape=(6,), name="opp_obs")
        opp_act = tf.keras.layers.Input(shape=(2,), name="opp_act")
        concat_obs = tf.keras.layers.Concatenate(axis=1)([obs, opp_obs, opp_act])
        central_vf_dense = tf.keras.layers.Dense(
            16, activation=tf.nn.tanh, name="c_vf_dense"
        )(concat_obs)
        central_vf_out = tf.keras.layers.Dense(1, activation=None, name="c_vf_out")(
            central_vf_dense
        )
        self.central_vf = tf.keras.Model(
            inputs=[obs, opp_obs, opp_act], outputs=central_vf_out
        )

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
        return self.model.forward(input_dict, state, seq_lens)

    def central_value_function(self, obs, opponent_obs, opponent_actions):
        return tf.reshape(
            self.central_vf(
                [obs, opponent_obs, tf.one_hot(tf.cast(opponent_actions, tf.int32), 2)]
            ),
            [-1],
        )

    @override(ModelV2)
    def value_function(self):
        return self.model.value_function()  # not used


class YetAnotherCentralizedCriticModel(TFModelV2):
    """Multi-agent model that implements a centralized value function.

    It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the
    former of which can be used for computing actions (i.e., decentralized
    execution), and the latter for optimization (i.e., centralized learning).

    This model has two parts:
    - An action model that looks at just 'own_obs' to compute actions
    - A value model that also looks at the 'opponent_obs' / 'opponent_action'
      to compute the value (it does this by using the 'obs_flat' tensor).
    """

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        super(YetAnotherCentralizedCriticModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name
        )

        self.action_model = FullyConnectedNetwork(
            Box(low=0, high=1, shape=(6,)),  # one-hot encoded Discrete(6)
            action_space,
            num_outputs,
            model_config,
            name + "_action",
        )

        self.value_model = FullyConnectedNetwork(
            obs_space, action_space, 1, model_config, name + "_vf"
        )

    def forward(self, input_dict, state, seq_lens):
        self._value_out, _ = self.value_model(
            {"obs": input_dict["obs_flat"]}, state, seq_lens
        )
        return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)

    def value_function(self):
        return tf.reshape(self._value_out, [-1])


class TorchCentralizedCriticModel(TorchModelV2, nn.Module):
    """Multi-agent model that implements a centralized VF."""

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)

        # Base of the model
        self.model = TorchFC(obs_space, action_space, num_outputs, model_config, name)

        # Central VF maps (obs, opp_obs, opp_act) -> vf_pred
        input_size = 6 + 6 + 2  # obs + opp_obs + opp_act
        self.central_vf = nn.Sequential(
            SlimFC(input_size, 16, activation_fn=nn.Tanh),
            SlimFC(16, 1),
        )

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
        model_out, _ = self.model(input_dict, state, seq_lens)
        return model_out, []

    def central_value_function(self, obs, opponent_obs, opponent_actions):
        input_ = torch.cat(
            [
                obs,
                opponent_obs,
                torch.nn.functional.one_hot(opponent_actions.long(), 2).float(),
            ],
            1,
        )
        return torch.reshape(self.central_vf(input_), [-1])

    @override(ModelV2)
    def value_function(self):
        return self.model.value_function()  # not used


class YetAnotherTorchCentralizedCriticModel(TorchModelV2, nn.Module):
    """Multi-agent model that implements a centralized value function.

    It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the
    former of which can be used for computing actions (i.e., decentralized
    execution), and the latter for optimization (i.e., centralized learning).

    This model has two parts:
    - An action model that looks at just 'own_obs' to compute actions
    - A value model that also looks at the 'opponent_obs' / 'opponent_action'
      to compute the value (it does this by using the 'obs_flat' tensor).
    """

    def __init__(self, obs_space, action_space, num_outputs, model_config, name):
        TorchModelV2.__init__(
            self, obs_space, action_space, num_outputs, model_config, name
        )
        nn.Module.__init__(self)

        self.action_model = TorchFC(
            Box(low=0, high=1, shape=(6,)),  # one-hot encoded Discrete(6)
            action_space,
            num_outputs,
            model_config,
            name + "_action",
        )

        self.value_model = TorchFC(
            obs_space, action_space, 1, model_config, name + "_vf"
        )
        self._model_in = None

    def forward(self, input_dict, state, seq_lens):
        # Store model-input for possible `value_function()` call.
        self._model_in = [input_dict["obs_flat"], state, seq_lens]
        return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)

    def value_function(self):
        value_out, _ = self.value_model(
            {"obs": self._model_in[0]}, self._model_in[1], self._model_in[2]
        )
        return torch.reshape(value_out, [-1])
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`from gym.spaces import Box`

			`from ray.rllib.models.modelv2 import ModelV2`
			`from ray.rllib.models.tf.tf_modelv2 import TFModelV2`
[RLlib] Attention Net integration into ModelV2 and learning RL example. (#8371) 2020-05-18 17:26:40 +02:00			`from ray.rllib.models.tf.fcnet import FullyConnectedNetwork`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`from ray.rllib.models.torch.misc import SlimFC`
			`from ray.rllib.models.torch.torch_modelv2 import TorchModelV2`
			`from ray.rllib.models.torch.fcnet import FullyConnectedNetwork as TorchFC`
			`from ray.rllib.utils.annotations import override`
			`from ray.rllib.utils.framework import try_import_tf, try_import_torch`

[RLlib] Tf2x preparation; part 2 (upgrading `try_import_tf()`). (#9136) * WIP. * Fixes. * LINT. * WIP. * WIP. * Fixes. * Fixes. * Fixes. * Fixes. * WIP. * Fixes. * Test * Fix. * Fixes and LINT. * Fixes and LINT. * LINT. 2020-06-30 10:13:20 +02:00			`tf1, tf, tfv = try_import_tf()`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`torch, nn = try_import_torch()`


			`class CentralizedCriticModel(TFModelV2):`
			`"""Multi-agent model that implements a centralized value function."""`

[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`def __init__(self, obs_space, action_space, num_outputs, model_config, name):`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`super(CentralizedCriticModel, self).__init__(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`obs_space, action_space, num_outputs, model_config, name`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`# Base of the model`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self.model = FullyConnectedNetwork(`
			`obs_space, action_space, num_outputs, model_config, name`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`# Central VF maps (obs, opp_obs, opp_act) -> vf_pred`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`obs = tf.keras.layers.Input(shape=(6,), name="obs")`
			`opp_obs = tf.keras.layers.Input(shape=(6,), name="opp_obs")`
			`opp_act = tf.keras.layers.Input(shape=(2,), name="opp_act")`
			`concat_obs = tf.keras.layers.Concatenate(axis=1)([obs, opp_obs, opp_act])`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`central_vf_dense = tf.keras.layers.Dense(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`16, activation=tf.nn.tanh, name="c_vf_dense"`
			`)(concat_obs)`
			`central_vf_out = tf.keras.layers.Dense(1, activation=None, name="c_vf_out")(`
			`central_vf_dense`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`self.central_vf = tf.keras.Model(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`inputs=[obs, opp_obs, opp_act], outputs=central_vf_out`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`@override(ModelV2)`
			`def forward(self, input_dict, state, seq_lens):`
			`return self.model.forward(input_dict, state, seq_lens)`

			`def central_value_function(self, obs, opponent_obs, opponent_actions):`
			`return tf.reshape(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self.central_vf(`
			`[obs, opponent_obs, tf.one_hot(tf.cast(opponent_actions, tf.int32), 2)]`
			`),`
			`[-1],`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`@override(ModelV2)`
			`def value_function(self):`
			`return self.model.value_function() # not used`


			`class YetAnotherCentralizedCriticModel(TFModelV2):`
			`"""Multi-agent model that implements a centralized value function.`

			`It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the`
			`former of which can be used for computing actions (i.e., decentralized`
			`execution), and the latter for optimization (i.e., centralized learning).`

			`This model has two parts:`
			`- An action model that looks at just 'own_obs' to compute actions`
			`- A value model that also looks at the 'opponent_obs' / 'opponent_action'`
			`to compute the value (it does this by using the 'obs_flat' tensor).`
			`"""`

[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`def __init__(self, obs_space, action_space, num_outputs, model_config, name):`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`super(YetAnotherCentralizedCriticModel, self).__init__(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`obs_space, action_space, num_outputs, model_config, name`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`self.action_model = FullyConnectedNetwork(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`Box(low=0, high=1, shape=(6,)), # one-hot encoded Discrete(6)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`action_space,`
			`num_outputs,`
			`model_config,`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`name + "_action",`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self.value_model = FullyConnectedNetwork(`
			`obs_space, action_space, 1, model_config, name + "_vf"`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`def forward(self, input_dict, state, seq_lens):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self._value_out, _ = self.value_model(`
			`{"obs": input_dict["obs_flat"]}, state, seq_lens`
			`)`
			`return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`def value_function(self):`
			`return tf.reshape(self._value_out, [-1])`


			`class TorchCentralizedCriticModel(TorchModelV2, nn.Module):`
			`"""Multi-agent model that implements a centralized VF."""`

[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`def __init__(self, obs_space, action_space, num_outputs, model_config, name):`
			`TorchModelV2.__init__(`
			`self, obs_space, action_space, num_outputs, model_config, name`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`nn.Module.__init__(self)`

			`# Base of the model`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self.model = TorchFC(obs_space, action_space, num_outputs, model_config, name)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`# Central VF maps (obs, opp_obs, opp_act) -> vf_pred`
			`input_size = 6 + 6 + 2 # obs + opp_obs + opp_act`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`self.central_vf = nn.Sequential(`
			`SlimFC(input_size, 16, activation_fn=nn.Tanh),`
			`SlimFC(16, 1),`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`@override(ModelV2)`
			`def forward(self, input_dict, state, seq_lens):`
			`model_out, _ = self.model(input_dict, state, seq_lens)`
			`return model_out, []`

			`def central_value_function(self, obs, opponent_obs, opponent_actions):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`input_ = torch.cat(`
			`[`
			`obs,`
			`opponent_obs,`
			`torch.nn.functional.one_hot(opponent_actions.long(), 2).float(),`
			`],`
			`1,`
			`)`
[RLlib] Examples folder restructuring (Model examples; final part). (#8278) - This PR completes any previously missing PyTorch Model counterparts to TFModels in examples/models. - It also makes sure, all example scripts in the rllib/examples folder are tested for both frameworks and learn the given task (this is often currently not checked) using a --as-test flag in connection with a --stop-reward. 2020-05-12 08:23:10 +02:00			`return torch.reshape(self.central_vf(input_), [-1])`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`@override(ModelV2)`
			`def value_function(self):`
			`return self.model.value_function() # not used`


			`class YetAnotherTorchCentralizedCriticModel(TorchModelV2, nn.Module):`
			`"""Multi-agent model that implements a centralized value function.`

			`It assumes the observation is a dict with 'own_obs' and 'opponent_obs', the`
			`former of which can be used for computing actions (i.e., decentralized`
			`execution), and the latter for optimization (i.e., centralized learning).`

			`This model has two parts:`
			`- An action model that looks at just 'own_obs' to compute actions`
			`- A value model that also looks at the 'opponent_obs' / 'opponent_action'`
			`to compute the value (it does this by using the 'obs_flat' tensor).`
			`"""`

[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`def __init__(self, obs_space, action_space, num_outputs, model_config, name):`
			`TorchModelV2.__init__(`
			`self, obs_space, action_space, num_outputs, model_config, name`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`nn.Module.__init__(self)`

			`self.action_model = TorchFC(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`Box(low=0, high=1, shape=(6,)), # one-hot encoded Discrete(6)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`action_space,`
			`num_outputs,`
			`model_config,`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`name + "_action",`
			`)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`self.value_model = TorchFC(`
			`obs_space, action_space, 1, model_config, name + "_vf"`
			`)`
[RLlib] Make PyTorch Model forward pass faster in vf-case. (#8422) 2020-05-14 10:15:50 +02:00			`self._model_in = None`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`def forward(self, input_dict, state, seq_lens):`
[RLlib] Make PyTorch Model forward pass faster in vf-case. (#8422) 2020-05-14 10:15:50 +02:00			# Store model-input for possible `value_function()` call.
			`self._model_in = [input_dict["obs_flat"], state, seq_lens]`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`return self.action_model({"obs": input_dict["obs"]["own_obs"]}, state, seq_lens)`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`def value_function(self):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`value_out, _ = self.value_model(`
			`{"obs": self._model_in[0]}, self._model_in[1], self._model_in[2]`
			`)`
[RLlib] Make PyTorch Model forward pass faster in vf-case. (#8422) 2020-05-14 10:15:50 +02:00			`return torch.reshape(value_out, [-1])`