ray/rllib/utils/typing.py

import gym
from typing import (
    Any,
    Callable,
    Dict,
    List,
    Optional,
    Tuple,
    Union,
    TypeVar,
    TYPE_CHECKING,
)

if TYPE_CHECKING:
    from ray.rllib.env.env_context import EnvContext
    from ray.rllib.policy.policy import PolicySpec
    from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch
    from ray.rllib.policy.view_requirement import ViewRequirement
    from ray.rllib.utils import try_import_tf, try_import_torch

    _, tf, _ = try_import_tf()
    torch, _ = try_import_torch()

# Represents a generic tensor type.
# This could be an np.ndarray, tf.Tensor, or a torch.Tensor.
TensorType = Any

# Either a plain tensor, or a dict or tuple of tensors (or StructTensors).
TensorStructType = Union[TensorType, dict, tuple]

# A shape of a tensor.
TensorShape = Union[Tuple[int], List[int]]

# Represents a fully filled out config of a Trainer class.
# Note: Policy config dicts are usually the same as TrainerConfigDict, but
# parts of it may sometimes be altered in e.g. a multi-agent setup,
# where we have >1 Policies in the same Trainer.
TrainerConfigDict = dict

# A trainer config dict that only has overrides. It needs to be combined with
# the default trainer config to be used.
PartialTrainerConfigDict = dict

# Represents the model config sub-dict of the trainer config that is passed to
# the model catalog.
ModelConfigDict = dict

# Objects that can be created through the `from_config()` util method
# need a config dict with a "type" key, a class path (str), or a type directly.
FromConfigSpec = Union[Dict[str, Any], type, str]

# Represents the env_config sub-dict of the trainer config that is passed to
# the env constructor.
EnvConfigDict = dict

# Represents an environment id. These could be:
# - An int index for a sub-env within a vectorized env.
# - An external env ID (str), which changes(!) each episode.
EnvID = Union[int, str]

# Represents a BaseEnv, MultiAgentEnv, ExternalEnv, ExternalMultiAgentEnv,
# VectorEnv, gym.Env, or ActorHandle.
EnvType = Any

# A callable, taking a EnvContext object
# (config dict + properties: `worker_index`, `vector_index`, `num_workers`,
# and `remote`) and returning an env object (or None if no env is used).
EnvCreator = Callable[["EnvContext"], Optional[EnvType]]

# Represents a generic identifier for an agent (e.g., "agent1").
AgentID = Any

# Represents a generic identifier for a policy (e.g., "pol1").
PolicyID = str

# Type of the config["multiagent"]["policies"] dict for multi-agent training.
MultiAgentPolicyConfigDict = Dict[PolicyID, "PolicySpec"]

# State dict of a Policy, mapping strings (e.g. "weights") to some state
# data (TensorStructType).
PolicyState = Dict[str, TensorStructType]

# Represents an episode id.
EpisodeID = int

# Represents an "unroll" (maybe across different sub-envs in a vector env).
UnrollID = int

# A dict keyed by agent ids, e.g. {"agent-1": value}.
MultiAgentDict = Dict[AgentID, Any]

# A dict keyed by env ids that contain further nested dictionaries keyed by
# agent ids. e.g., {"env-1": {"agent-1": value}}.
MultiEnvDict = Dict[EnvID, MultiAgentDict]

# Represents an observation returned from the env.
EnvObsType = Any

# Represents an action passed to the env.
EnvActionType = Any

# Info dictionary returned by calling step() on gym envs. Commonly empty dict.
EnvInfoDict = dict

# Represents a File object
FileType = Any

# Represents a ViewRequirements dict mapping column names (str) to
# ViewRequirement objects.
ViewRequirementsDict = Dict[str, "ViewRequirement"]

# Represents the result dict returned by Trainer.train().
ResultDict = dict

# A tf or torch local optimizer object.
LocalOptimizer = Union["tf.keras.optimizers.Optimizer", "torch.optim.Optimizer"]

# Dict of tensors returned by compute gradients on the policy, e.g.,
# {"td_error": [...], "learner_stats": {"vf_loss": ..., ...}}, for multi-agent,
# {"policy1": {"learner_stats": ..., }, "policy2": ...}.
GradInfoDict = dict

# Dict of learner stats returned by compute gradients on the policy, e.g.,
# {"vf_loss": ..., ...}. This will always be nested under the "learner_stats"
# key(s) of a GradInfoDict. In the multi-agent case, this will be keyed by
# policy id.
LearnerStatsDict = dict

# List of grads+var tuples (tf) or list of gradient tensors (torch)
# representing model gradients and returned by compute_gradients().
ModelGradients = Union[List[Tuple[TensorType, TensorType]], List[TensorType]]

# Type of dict returned by get_weights() representing model weights.
ModelWeights = dict

# An input dict used for direct ModelV2 calls.
ModelInputDict = Dict[str, TensorType]

# Some kind of sample batch.
SampleBatchType = Union["SampleBatch", "MultiAgentBatch"]

# A (possibly nested) space struct: Either a gym.spaces.Space or a
# (possibly nested) dict|tuple of gym.space.Spaces.
SpaceStruct = Union[gym.spaces.Space, dict, tuple]

# Generic type var.
T = TypeVar("T")
[RLlib] Better utils for flattening complex inputs and enable prev-actions for LSTM/attention for complex action spaces. (#21330) 2022-01-05 11:29:44 +01:00			`import gym`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`from typing import (`
			`Any,`
			`Callable,`
			`Dict,`
			`List,`
			`Optional,`
			`Tuple,`
			`Union,`
			`TypeVar,`
			`TYPE_CHECKING,`
			`)`
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00
[CI] Upgrade flake8 to 3.9.1 (#15527) * formatting * format util * format release * format rllib/agents * format rllib/env * format rllib/execution * format rllib/evaluation * format rllib/examples * format rllib/policy * format rllib utils and tests * format streaming * more formatting * update requirements files * fix rllib type checking * updates * update * fix circular import * Update python/ray/tests/test_runtime_env.py * noqa 2021-05-03 14:23:28 -07:00			`if TYPE_CHECKING:`
[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #03 (#21652) 2022-01-25 14:16:58 +01:00			`from ray.rllib.env.env_context import EnvContext`
[RLlib] Redo simplify multi agent config dict: Reverted b/c seemed to break test_typing (non RLlib test). (#17046) 2021-07-15 05:51:24 -04:00			`from ray.rllib.policy.policy import PolicySpec`
[CI] Upgrade flake8 to 3.9.1 (#15527) * formatting * format util * format release * format rllib/agents * format rllib/env * format rllib/execution * format rllib/evaluation * format rllib/examples * format rllib/policy * format rllib utils and tests * format streaming * more formatting * update requirements files * fix rllib type checking * updates * update * fix circular import * Update python/ray/tests/test_runtime_env.py * noqa 2021-05-03 14:23:28 -07:00			`from ray.rllib.policy.sample_batch import SampleBatch, MultiAgentBatch`
			`from ray.rllib.policy.view_requirement import ViewRequirement`
[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #03 (#21652) 2022-01-25 14:16:58 +01:00			`from ray.rllib.utils import try_import_tf, try_import_torch`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00
[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #03 (#21652) 2022-01-25 14:16:58 +01:00			`_, tf, _ = try_import_tf()`
			`torch, _ = try_import_torch()`

			`# Represents a generic tensor type.`
			`# This could be an np.ndarray, tf.Tensor, or a torch.Tensor.`
			`TensorType = Any`

			`# Either a plain tensor, or a dict or tuple of tensors (or StructTensors).`
			`TensorStructType = Union[TensorType, dict, tuple]`

			`# A shape of a tensor.`
			`TensorShape = Union[Tuple[int], List[int]]`
[CI] Upgrade flake8 to 3.9.1 (#15527) * formatting * format util * format release * format rllib/agents * format rllib/env * format rllib/execution * format rllib/evaluation * format rllib/examples * format rllib/policy * format rllib utils and tests * format streaming * more formatting * update requirements files * fix rllib type checking * updates * update * fix circular import * Update python/ray/tests/test_runtime_env.py * noqa 2021-05-03 14:23:28 -07:00
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# Represents a fully filled out config of a Trainer class.`
[RLlib] Type annotations for policy. (#9248) 2020-07-05 13:09:51 +02:00			`# Note: Policy config dicts are usually the same as TrainerConfigDict, but`
			`# parts of it may sometimes be altered in e.g. a multi-agent setup,`
			`# where we have >1 Policies in the same Trainer.`
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`TrainerConfigDict = dict`

			`# A trainer config dict that only has overrides. It needs to be combined with`
			`# the default trainer config to be used.`
			`PartialTrainerConfigDict = dict`

			`# Represents the model config sub-dict of the trainer config that is passed to`
			`# the model catalog.`
			`ModelConfigDict = dict`

[RLlib] Curiosity minor fixes, do-overs, and testing. (#10143) 2020-08-19 17:49:50 +02:00			# Objects that can be created through the `from_config()` util method
			`# need a config dict with a "type" key, a class path (str), or a type directly.`
			`FromConfigSpec = Union[Dict[str, Any], type, str]`

[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #03 (#21652) 2022-01-25 14:16:58 +01:00			`# Represents the env_config sub-dict of the trainer config that is passed to`
			`# the env constructor.`
			`EnvConfigDict = dict`

			`# Represents an environment id. These could be:`
			`# - An int index for a sub-env within a vectorized env.`
			`# - An external env ID (str), which changes(!) each episode.`
			`EnvID = Union[int, str]`

[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# Represents a BaseEnv, MultiAgentEnv, ExternalEnv, ExternalMultiAgentEnv,`
[RLlib; Documentation] Some docstring cleanups; Rename RemoteVectorEnv into RemoteBaseEnv for clarity. (#20250) 2021-11-17 21:40:16 +01:00			`# VectorEnv, gym.Env, or ActorHandle.`
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`EnvType = Any`

[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #03 (#21652) 2022-01-25 14:16:58 +01:00			`# A callable, taking a EnvContext object`
			# (config dict + properties: `worker_index`, `vector_index`, `num_workers`,
			# and `remote`) and returning an env object (or None if no env is used).
			`EnvCreator = Callable[["EnvContext"], Optional[EnvType]]`

[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# Represents a generic identifier for an agent (e.g., "agent1").`
			`AgentID = Any`

			`# Represents a generic identifier for a policy (e.g., "pol1").`
			`PolicyID = str`

			`# Type of the config["multiagent"]["policies"] dict for multi-agent training.`
[RLlib] Redo simplify multi agent config dict: Reverted b/c seemed to break test_typing (non RLlib test). (#17046) 2021-07-15 05:51:24 -04:00			`MultiAgentPolicyConfigDict = Dict[PolicyID, "PolicySpec"]`
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00
[RLlib] Preparatory PR for multi-agent multi-GPU learner (alpha-star style) #03 (#21652) 2022-01-25 14:16:58 +01:00			`# State dict of a Policy, mapping strings (e.g. "weights") to some state`
			`# data (TensorStructType).`
			`PolicyState = Dict[str, TensorStructType]`
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00
[RLlib] Trajectory View API (preparatory cleanup and enhancements). (#9678) 2020-07-29 21:15:09 +02:00			`# Represents an episode id.`
			`EpisodeID = int`

[RLlib] Trajectory view API - 03 Fast LSTM + prev actions/rewards (#9950) 2020-08-21 12:35:16 +02:00			`# Represents an "unroll" (maybe across different sub-envs in a vector env).`
			`UnrollID = int`

[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# A dict keyed by agent ids, e.g. {"agent-1": value}.`
			`MultiAgentDict = Dict[AgentID, Any]`

			`# A dict keyed by env ids that contain further nested dictionaries keyed by`
			`# agent ids. e.g., {"env-1": {"agent-1": value}}.`
			`MultiEnvDict = Dict[EnvID, MultiAgentDict]`

			`# Represents an observation returned from the env.`
			`EnvObsType = Any`

			`# Represents an action passed to the env.`
			`EnvActionType = Any`

			`# Info dictionary returned by calling step() on gym envs. Commonly empty dict.`
			`EnvInfoDict = dict`

[RLlib] Offline Type Annotations (#9676) * Offline Annotations * Modifications * Fixed circular dependencies * Linter fix 2020-07-27 14:01:17 -07:00			`# Represents a File object`
			`FileType = Any`

[RLlib] Attention Net prep PR #3. (#12450) 2020-12-07 13:08:17 +01:00			`# Represents a ViewRequirements dict mapping column names (str) to`
			`# ViewRequirement objects.`
			`ViewRequirementsDict = Dict[str, "ViewRequirement"]`

[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# Represents the result dict returned by Trainer.train().`
			`ResultDict = dict`

[RLlib] Curiosity minor fixes, do-overs, and testing. (#10143) 2020-08-19 17:49:50 +02:00			`# A tf or torch local optimizer object.`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`LocalOptimizer = Union["tf.keras.optimizers.Optimizer", "torch.optim.Optimizer"]`
[RLlib] Curiosity minor fixes, do-overs, and testing. (#10143) 2020-08-19 17:49:50 +02:00
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# Dict of tensors returned by compute gradients on the policy, e.g.,`
			`# {"td_error": [...], "learner_stats": {"vf_loss": ..., ...}}, for multi-agent,`
			`# {"policy1": {"learner_stats": ..., }, "policy2": ...}.`
			`GradInfoDict = dict`

			`# Dict of learner stats returned by compute gradients on the policy, e.g.,`
			`# {"vf_loss": ..., ...}. This will always be nested under the "learner_stats"`
			`# key(s) of a GradInfoDict. In the multi-agent case, this will be keyed by`
			`# policy id.`
			`LearnerStatsDict = dict`

[RLlib] Type annotations for policy. (#9248) 2020-07-05 13:09:51 +02:00			`# List of grads+var tuples (tf) or list of gradient tensors (torch)`
			`# representing model gradients and returned by compute_gradients().`
			`ModelGradients = Union[List[Tuple[TensorType, TensorType]], List[TensorType]]`
[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00
			`# Type of dict returned by get_weights() representing model weights.`
			`ModelWeights = dict`

[RLlib] Fix failing test cases: Soft-deprecate ModelV2.from_batch (in favor of ModelV2.__call__). (#19693) 2021-10-25 15:00:00 +02:00			`# An input dict used for direct ModelV2 calls.`
[RLlib] Attention Nets: tf (#12753) 2020-12-21 02:22:32 +01:00			`ModelInputDict = Dict[str, TensorType]`

[rllib] Add type annotations for evaluation/, env/ packages (#9003) 2020-06-19 13:09:05 -07:00			`# Some kind of sample batch.`
			`SampleBatchType = Union["SampleBatch", "MultiAgentBatch"]`

[RLlib] Better utils for flattening complex inputs and enable prev-actions for LSTM/attention for complex action spaces. (#21330) 2022-01-05 11:29:44 +01:00			`# A (possibly nested) space struct: Either a gym.spaces.Space or a`
			`# (possibly nested) dict\|tuple of gym.space.Spaces.`
			`SpaceStruct = Union[gym.spaces.Space, dict, tuple]`
[RLlib] Initial code/comment cleanups in preparation for decentralized multi-agent learner. (#21420) 2022-01-10 11:22:55 +01:00
			`# Generic type var.`
			`T = TypeVar("T")`