from collections import OrderedDict import logging import gym from ray.rllib.models.tf.misc import linear, normc_initializer from ray.rllib.models.preprocessors import get_preprocessor from ray.rllib.utils.annotations import PublicAPI, DeveloperAPI from ray.rllib.utils import try_import_tf, try_import_torch tf = try_import_tf() torch, _ = try_import_torch() logger = logging.getLogger(__name__) class Model: """This class is deprecated, please use TFModelV2 instead.""" def __init__(self, input_dict, obs_space, action_space, num_outputs, options, state_in=None, seq_lens=None): assert isinstance(input_dict, dict), input_dict # Default attribute values for the non-RNN case self.state_init = [] self.state_in = state_in or [] self.state_out = [] self.obs_space = obs_space self.action_space = action_space self.num_outputs = num_outputs self.options = options self.scope = tf.get_variable_scope() self.session = tf.get_default_session() self.input_dict = input_dict if seq_lens is not None: self.seq_lens = seq_lens else: self.seq_lens = tf.placeholder( dtype=tf.int32, shape=[None], name="seq_lens") self._num_outputs = num_outputs if options.get("free_log_std"): assert num_outputs % 2 == 0 num_outputs = num_outputs // 2 ok = True try: restored = input_dict.copy() restored["obs"] = restore_original_dimensions( input_dict["obs"], obs_space) self.outputs, self.last_layer = self._build_layers_v2( restored, num_outputs, options) except NotImplementedError: ok = False # In TF 1.14, you cannot construct variable scopes in exception # handlers so we have to set the OK flag and check it here: if not ok: self.outputs, self.last_layer = self._build_layers( input_dict["obs"], num_outputs, options) if options.get("free_log_std", False): log_std = tf.get_variable( name="log_std", shape=[num_outputs], initializer=tf.zeros_initializer) self.outputs = tf.concat( [self.outputs, 0.0 * self.outputs + log_std], 1) def _build_layers(self, inputs, num_outputs, options): """Builds and returns the output and last layer of the network. Deprecated: use _build_layers_v2 instead, which has better support for dict and tuple spaces. """ raise NotImplementedError @PublicAPI def _build_layers_v2(self, input_dict, num_outputs, options): """Define the layers of a custom model. Arguments: input_dict (dict): Dictionary of input tensors, including "obs", "prev_action", "prev_reward", "is_training". num_outputs (int): Output tensor must be of size [BATCH_SIZE, num_outputs]. options (dict): Model options. Returns: (outputs, feature_layer): Tensors of size [BATCH_SIZE, num_outputs] and [BATCH_SIZE, desired_feature_size]. When using dict or tuple observation spaces, you can access the nested sub-observation batches here as well: Examples: >>> print(input_dict) {'prev_actions': , 'prev_rewards': , 'is_training': , 'obs': OrderedDict([ ('sensors', OrderedDict([ ('front_cam', [ , ]), ('position', ), ('velocity', )]))])} """ raise NotImplementedError @PublicAPI def value_function(self): """Builds the value function output. This method can be overridden to customize the implementation of the value function (e.g., not sharing hidden layers). Returns: Tensor of size [BATCH_SIZE] for the value function. """ return tf.reshape( linear(self.last_layer, 1, "value", normc_initializer(1.0)), [-1]) @PublicAPI def custom_loss(self, policy_loss, loss_inputs): """Override to customize the loss function used to optimize this model. This can be used to incorporate self-supervised losses (by defining a loss over existing input and output tensors of this model), and supervised losses (by defining losses over a variable-sharing copy of this model's layers). You can find an runnable example in examples/custom_loss.py. Arguments: policy_loss (Tensor): scalar policy loss from the policy. loss_inputs (dict): map of input placeholders for rollout data. Returns: Scalar tensor for the customized loss for this model. """ if self.loss() is not None: raise DeprecationWarning( "self.loss() is deprecated, use self.custom_loss() instead.") return policy_loss @PublicAPI def custom_stats(self): """Override to return custom metrics from your model. The stats will be reported as part of the learner stats, i.e., info: learner: model: key1: metric1 key2: metric2 Returns: Dict of string keys to scalar tensors. """ return {} def loss(self): """Deprecated: use self.custom_loss().""" return None @classmethod def get_initial_state(cls, obs_space, action_space, num_outputs, options): raise NotImplementedError( "In order to use recurrent models with ModelV2, you should define " "the get_initial_state @classmethod on your custom model class.") def _validate_output_shape(self): """Checks that the model has the correct number of outputs.""" try: out = tf.convert_to_tensor(self.outputs) shape = out.shape.as_list() except Exception: raise ValueError("Output is not a tensor: {}".format(self.outputs)) else: if len(shape) != 2 or shape[1] != self._num_outputs: raise ValueError( "Expected output shape of [None, {}], got {}".format( self._num_outputs, shape)) @DeveloperAPI def flatten(obs, framework): """Flatten the given tensor.""" if framework == "tf": return tf.layers.flatten(obs) elif framework == "torch": assert torch is not None return torch.flatten(obs, start_dim=1) else: raise NotImplementedError("flatten", framework) @DeveloperAPI def restore_original_dimensions(obs, obs_space, tensorlib=tf): """Unpacks Dict and Tuple space observations into their original form. This is needed since we flatten Dict and Tuple observations in transit. Before sending them to the model though, we should unflatten them into Dicts or Tuples of tensors. Arguments: obs: The flattened observation tensor. obs_space: The flattened obs space. If this has the `original_space` attribute, we will unflatten the tensor to that shape. tensorlib: The library used to unflatten (reshape) the array/tensor. Returns: single tensor or dict / tuple of tensors matching the original observation space. """ if hasattr(obs_space, "original_space"): if tensorlib == "tf": tensorlib = tf elif tensorlib == "torch": assert torch is not None tensorlib = torch return _unpack_obs(obs, obs_space.original_space, tensorlib=tensorlib) else: return obs # Cache of preprocessors, for if the user is calling unpack obs often. _cache = {} def _unpack_obs(obs, space, tensorlib=tf): """Unpack a flattened Dict or Tuple observation array/tensor. Arguments: obs: The flattened observation tensor space: The original space prior to flattening tensorlib: The library used to unflatten (reshape) the array/tensor """ if (isinstance(space, gym.spaces.Dict) or isinstance(space, gym.spaces.Tuple)): if id(space) in _cache: prep = _cache[id(space)] else: prep = get_preprocessor(space)(space) # Make an attempt to cache the result, if enough space left. if len(_cache) < 999: _cache[id(space)] = prep if len(obs.shape) != 2 or obs.shape[1] != prep.shape[0]: raise ValueError( "Expected flattened obs shape of [None, {}], got {}".format( prep.shape[0], obs.shape)) assert len(prep.preprocessors) == len(space.spaces), \ (len(prep.preprocessors) == len(space.spaces)) offset = 0 if isinstance(space, gym.spaces.Tuple): u = [] for p, v in zip(prep.preprocessors, space.spaces): obs_slice = obs[:, offset:offset + p.size] offset += p.size u.append( _unpack_obs( tensorlib.reshape(obs_slice, [-1] + list(p.shape)), v, tensorlib=tensorlib)) else: u = OrderedDict() for p, (k, v) in zip(prep.preprocessors, space.spaces.items()): obs_slice = obs[:, offset:offset + p.size] offset += p.size u[k] = _unpack_obs( tensorlib.reshape(obs_slice, [-1] + list(p.shape)), v, tensorlib=tensorlib) return u else: return obs