ray/rllib/algorithms/mbmpo/mbmpo_torch_policy.py

from gym.spaces import Box, Discrete
import logging
from typing import Tuple, Type

import ray
from ray.rllib.algorithms.maml.maml_torch_policy import MAMLTorchPolicy
from ray.rllib.models.catalog import ModelCatalog
from ray.rllib.models.modelv2 import ModelV2
from ray.rllib.models.torch.torch_action_dist import TorchDistributionWrapper
from ray.rllib.utils.error import UnsupportedSpaceException
from ray.rllib.utils.framework import try_import_torch
from ray.rllib.utils.torch_utils import get_device

torch, nn = try_import_torch()

logger = logging.getLogger(__name__)


class MBMPOTorchPolicy(MAMLTorchPolicy):
    def __init__(self, observation_space, action_space, config):
        # Validate spaces.
        # Only support single Box or single Discrete spaces.
        if not isinstance(action_space, (Box, Discrete)):
            raise UnsupportedSpaceException(
                "Action space ({}) of {} is not supported for "
                "MB-MPO. Must be [Box|Discrete].".format(action_space, self)
            )
        # If Box, make sure it's a 1D vector space.
        elif isinstance(action_space, Box) and len(action_space.shape) > 1:
            raise UnsupportedSpaceException(
                "Action space ({}) of {} has multiple dimensions "
                "{}. ".format(action_space, self, action_space.shape)
                + "Consider reshaping this into a single dimension Box space "
                "or using the multi-agent API."
            )

        config = dict(ray.rllib.algorithms.mbmpo.mbmpo.DEFAULT_CONFIG, **config)
        super().__init__(observation_space, action_space, config)

    def make_model_and_action_dist(
        self,
    ) -> Tuple[ModelV2, Type[TorchDistributionWrapper]]:
        """Constructs the necessary ModelV2 and action dist class for the Policy.

        Args:
            obs_space (gym.spaces.Space): The observation space.
            action_space (gym.spaces.Space): The action space.
            config: The SAC trainer's config dict.

        Returns:
            ModelV2: The ModelV2 to be used by the Policy. Note: An additional
                target model will be created in this function and assigned to
                `policy.target_model`.
        """
        # Get the output distribution class for predicting rewards and next-obs.
        self.distr_cls_next_obs, num_outputs = ModelCatalog.get_action_dist(
            self.observation_space,
            self.config,
            dist_type="deterministic",
            framework="torch",
        )

        # Build one dynamics model if we are a Worker.
        # If we are the main MAML learner, build n (num_workers) dynamics Models
        # for being able to create checkpoints for the current state of training.
        device = get_device(self.config)

        self.dynamics_model = ModelCatalog.get_model_v2(
            self.observation_space,
            self.action_space,
            num_outputs=num_outputs,
            model_config=self.config["dynamics_model"],
            framework="torch",
            name="dynamics_ensemble",
        ).to(device)

        action_dist, num_outputs = ModelCatalog.get_action_dist(
            self.action_space, self.config, framework="torch"
        )
        # Create the pi-model and register it with the Policy.
        self.pi = ModelCatalog.get_model_v2(
            self.observation_space,
            self.action_space,
            num_outputs=num_outputs,
            model_config=self.config["model"],
            framework="torch",
            name="policy_model",
        )

        return self.pi, action_dist
[RLlib] Issue #13342: Add `validate_spaces` to MB-MPO. (#14038) 2021-02-11 11:36:53 +01:00			`from gym.spaces import Box, Discrete`
[RLlib] Implementation of "Model-based Meta Policy Optimization" (MB MPO) (#9409) 2020-08-02 09:12:09 -07:00			`import logging`
[RLlib] MB-MPO cleanup (comments, docstrings, type annotations). (#11033) 2020-10-06 20:28:16 +02:00			`from typing import Tuple, Type`
[RLlib] Implementation of "Model-based Meta Policy Optimization" (MB MPO) (#9409) 2020-08-02 09:12:09 -07:00
			`import ray`
[RLlib] Migrate MAML, MB-MPO, MARWIL, and BC to use Policy sub-classing implementation. (#24914) 2022-05-20 05:10:59 -07:00			`from ray.rllib.algorithms.maml.maml_torch_policy import MAMLTorchPolicy`
[RLlib] Implementation of "Model-based Meta Policy Optimization" (MB MPO) (#9409) 2020-08-02 09:12:09 -07:00			`from ray.rllib.models.catalog import ModelCatalog`
[RLlib] MB-MPO cleanup (comments, docstrings, type annotations). (#11033) 2020-10-06 20:28:16 +02:00			`from ray.rllib.models.modelv2 import ModelV2`
			`from ray.rllib.models.torch.torch_action_dist import TorchDistributionWrapper`
[RLlib] Issue #13342: Add `validate_spaces` to MB-MPO. (#14038) 2021-02-11 11:36:53 +01:00			`from ray.rllib.utils.error import UnsupportedSpaceException`
[RLlib] MB-MPO cleanup (comments, docstrings, type annotations). (#11033) 2020-10-06 20:28:16 +02:00			`from ray.rllib.utils.framework import try_import_torch`
[RLlib] MB-MPO TrainerConfig objects. (#25278) 2022-05-30 17:33:01 +02:00			`from ray.rllib.utils.torch_utils import get_device`
[RLlib] Implementation of "Model-based Meta Policy Optimization" (MB MPO) (#9409) 2020-08-02 09:12:09 -07:00
			`torch, nn = try_import_torch()`

			`logger = logging.getLogger(__name__)`


[RLlib] Migrate MAML, MB-MPO, MARWIL, and BC to use Policy sub-classing implementation. (#24914) 2022-05-20 05:10:59 -07:00			`class MBMPOTorchPolicy(MAMLTorchPolicy):`
			`def __init__(self, observation_space, action_space, config):`
			`# Validate spaces.`
			`# Only support single Box or single Discrete spaces.`
			`if not isinstance(action_space, (Box, Discrete)):`
			`raise UnsupportedSpaceException(`
			`"Action space ({}) of {} is not supported for "`
			`"MB-MPO. Must be [Box\|Discrete].".format(action_space, self)`
			`)`
			`# If Box, make sure it's a 1D vector space.`
			`elif isinstance(action_space, Box) and len(action_space.shape) > 1:`
			`raise UnsupportedSpaceException(`
			`"Action space ({}) of {} has multiple dimensions "`
			`"{}. ".format(action_space, self, action_space.shape)`
			`+ "Consider reshaping this into a single dimension Box space "`
			`"or using the multi-agent API."`
			`)`

			`config = dict(ray.rllib.algorithms.mbmpo.mbmpo.DEFAULT_CONFIG, **config)`
			`super().__init__(observation_space, action_space, config)`

			`def make_model_and_action_dist(`
			`self,`
			`) -> Tuple[ModelV2, Type[TorchDistributionWrapper]]:`
			`"""Constructs the necessary ModelV2 and action dist class for the Policy.`

			`Args:`
			`obs_space (gym.spaces.Space): The observation space.`
			`action_space (gym.spaces.Space): The action space.`
Clean up docstyle in python modules and add LINT rule (#25272) 2022-06-01 11:27:54 -07:00			`config: The SAC trainer's config dict.`
[RLlib] Migrate MAML, MB-MPO, MARWIL, and BC to use Policy sub-classing implementation. (#24914) 2022-05-20 05:10:59 -07:00
			`Returns:`
			`ModelV2: The ModelV2 to be used by the Policy. Note: An additional`
			`target model will be created in this function and assigned to`
			`policy.target_model`.
			`"""`
			`# Get the output distribution class for predicting rewards and next-obs.`
			`self.distr_cls_next_obs, num_outputs = ModelCatalog.get_action_dist(`
			`self.observation_space,`
			`self.config,`
			`dist_type="deterministic",`
			`framework="torch",`
			`)`
[RLlib] Issue #13342: Add `validate_spaces` to MB-MPO. (#14038) 2021-02-11 11:36:53 +01:00
[RLlib] Migrate MAML, MB-MPO, MARWIL, and BC to use Policy sub-classing implementation. (#24914) 2022-05-20 05:10:59 -07:00			`# Build one dynamics model if we are a Worker.`
			`# If we are the main MAML learner, build n (num_workers) dynamics Models`
			`# for being able to create checkpoints for the current state of training.`
[RLlib] MB-MPO TrainerConfig objects. (#25278) 2022-05-30 17:33:01 +02:00			`device = get_device(self.config)`

[RLlib] Migrate MAML, MB-MPO, MARWIL, and BC to use Policy sub-classing implementation. (#24914) 2022-05-20 05:10:59 -07:00			`self.dynamics_model = ModelCatalog.get_model_v2(`
			`self.observation_space,`
			`self.action_space,`
			`num_outputs=num_outputs,`
			`model_config=self.config["dynamics_model"],`
			`framework="torch",`
			`name="dynamics_ensemble",`
			`).to(device)`

			`action_dist, num_outputs = ModelCatalog.get_action_dist(`
			`self.action_space, self.config, framework="torch"`
			`)`
			`# Create the pi-model and register it with the Policy.`
			`self.pi = ModelCatalog.get_model_v2(`
			`self.observation_space,`
			`self.action_space,`
			`num_outputs=num_outputs,`
			`model_config=self.config["model"],`
			`framework="torch",`
			`name="policy_model",`
[RLlib] Issue #13342: Add `validate_spaces` to MB-MPO. (#14038) 2021-02-11 11:36:53 +01:00			`)`

[RLlib] Migrate MAML, MB-MPO, MARWIL, and BC to use Policy sub-classing implementation. (#24914) 2022-05-20 05:10:59 -07:00			`return self.pi, action_dist`