ray/rllib/policy/trajectory_view.py

import numpy as np
from typing import Dict, Optional

from ray.rllib.utils.types import TensorType


class ViewRequirement:
    """Single view requirement (for one column in a ModelV2 input_dict).

    Note: This is an experimental class.

    ModelV2 returns a Dict[str, ViewRequirement] upon calling
    `ModelV2.get_view_requirements()`, where the str key represents the column
    name (C) under which the view is available in the `input_dict` and
    ViewRequirement specifies the actual underlying column names (in the
    original data buffer), timesteps, and other options to build the view
    for N.

    Examples:
        >>> # The default ViewRequirement for a Model is:
        >>> req = [ModelV2].get_view_requirements(is_training=False)
        >>> print(req)
        {"obs": ViewRequirement(timesteps=0)}
    """

    def __init__(self,
                 data_col: Optional[str] = None,
                 timesteps: int = 0,
                 fill_mode: str = "zeros",
                 repeat_mode: str = "all"):
        """Initializes a ViewRequirement object.

        Args:
            data_col (): The data column name from the SampleBatch (str key).
                If None, use the dict key under which this ViewRequirement
                resides.
            timesteps (Union[List[int], int]): List of relative (or absolute
                timesteps) to be present in the input_dict.
            fill_mode (str): The fill mode in case t<0 or t>H.
                One of "zeros", "tile".
            repeat_mode (str): The repeat-mode (one of "all" or "only_first").
                E.g. for training, we only want the first internal state
                timestep (the NN will calculate all others again anyways).
        """
        self.data_col = data_col
        self.timesteps = timesteps

        # Switch on absolute timestep mode. Default: False.
        # TODO: (sven)
        # "absolute_timesteps",

        self.fill_mode = fill_mode
        self.repeat_mode = repeat_mode

        # Provide all data as time major (default: False).
        # TODO: (sven)
        # "time_major",


def get_trajectory_view(
        model,
        trajectories,
        is_training: bool = False) -> Dict[str, TensorType]:
    """Returns an input_dict for a Model's forward pass given some data.

    Args:
        model (ModelV2): The ModelV2 object for which to generate the view
            (input_dict) from `data`.
        trajectories (List[Trajectory]): The data from which to generate
            an input_dict.
        is_training (bool): Whether the view should be generated for training
            purposes or inference (default).

    Returns:
        Dict[str, TensorType]: The input_dict to be passed into the ModelV2
            for inference/training.
    """
    # Get ModelV2's view requirements.
    view_reqs = model.get_view_requirements(is_training=is_training)
    # Construct the view dict.
    view = {}
    for view_col, view_req in view_reqs.items():
        # Create the batch of data from the different buffers in `data`.
        # TODO: (sven): Here, we actually do create a copy of the data (from a
        #   list). The only way to avoid this entirely would be to keep a
        #   single(!) np buffer per column across all currently ongoing
        #   agents + episodes (which seems very hard to realize).
        view[view_col] = np.array([
            t.buffers[view_req.data_col][t.cursor + view_req.timesteps]
            for t in trajectories
        ])
    return view