2021-12-16 01:19:49 -08:00
|
|
|
import logging
|
2021-12-06 05:15:33 -08:00
|
|
|
from typing import Callable, Tuple, Optional, List, Dict, Any, TYPE_CHECKING, Union, Set
|
2020-06-19 13:09:05 -07:00
|
|
|
|
2021-12-09 05:40:40 -08:00
|
|
|
import gym
|
2021-10-07 22:39:21 +02:00
|
|
|
import ray
|
2022-05-28 10:50:03 +02:00
|
|
|
from ray.rllib.utils.annotations import Deprecated, DeveloperAPI, PublicAPI
|
2021-02-08 12:05:16 +01:00
|
|
|
from ray.rllib.utils.typing import AgentID, EnvID, EnvType, MultiAgentDict, MultiEnvDict
|
2020-06-19 13:09:05 -07:00
|
|
|
|
|
|
|
if TYPE_CHECKING:
|
2022-02-04 22:22:47 +01:00
|
|
|
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
2018-06-23 18:32:16 -07:00
|
|
|
|
2019-03-29 21:19:42 +01:00
|
|
|
ASYNC_RESET_RETURN = "async_reset_return"
|
|
|
|
|
2021-12-16 01:19:49 -08:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@PublicAPI
|
2020-01-02 17:42:13 -08:00
|
|
|
class BaseEnv:
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""The lowest-level env interface used by RLlib for sampling.
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
BaseEnv models multiple agents executing asynchronously in multiple
|
2021-11-17 21:40:16 +01:00
|
|
|
vectorized sub-environments. A call to `poll()` returns observations from
|
|
|
|
ready agents keyed by their sub-environment ID and agent IDs, and
|
|
|
|
actions for those agents can be sent back via `send_actions()`.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2021-11-17 21:40:16 +01:00
|
|
|
All other RLlib supported env types can be converted to BaseEnv.
|
|
|
|
RLlib handles these conversions internally in RolloutWorker, for example:
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2021-10-29 10:46:52 +02:00
|
|
|
gym.Env => rllib.VectorEnv => rllib.BaseEnv
|
|
|
|
rllib.MultiAgentEnv (is-a gym.Env) => rllib.VectorEnv => rllib.BaseEnv
|
|
|
|
rllib.ExternalEnv => rllib.BaseEnv
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
|
|
|
Examples:
|
2022-03-25 01:04:02 +01:00
|
|
|
>>> MyBaseEnv = ... # doctest: +SKIP
|
|
|
|
>>> env = MyBaseEnv() # doctest: +SKIP
|
|
|
|
>>> obs, rewards, dones, infos, off_policy_actions = env.poll() # doctest: +SKIP
|
|
|
|
>>> print(obs) # doctest: +SKIP
|
2018-06-23 18:32:16 -07:00
|
|
|
{
|
|
|
|
"env_0": {
|
|
|
|
"car_0": [2.4, 1.6],
|
|
|
|
"car_1": [3.4, -3.2],
|
2019-03-08 15:39:48 -08:00
|
|
|
},
|
|
|
|
"env_1": {
|
|
|
|
"car_0": [8.0, 4.1],
|
|
|
|
},
|
|
|
|
"env_2": {
|
|
|
|
"car_0": [2.3, 3.3],
|
|
|
|
"car_1": [1.4, -0.2],
|
|
|
|
"car_3": [1.2, 0.1],
|
|
|
|
},
|
2018-06-23 18:32:16 -07:00
|
|
|
}
|
2022-03-25 01:04:02 +01:00
|
|
|
>>> env.send_actions({ # doctest: +SKIP
|
|
|
|
... "env_0": { # doctest: +SKIP
|
|
|
|
... "car_0": 0, # doctest: +SKIP
|
|
|
|
... "car_1": 1, # doctest: +SKIP
|
|
|
|
... }, ... # doctest: +SKIP
|
|
|
|
... }) # doctest: +SKIP
|
|
|
|
>>> obs, rewards, dones, infos, off_policy_actions = env.poll() # doctest: +SKIP
|
|
|
|
>>> print(obs) # doctest: +SKIP
|
2018-06-23 18:32:16 -07:00
|
|
|
{
|
|
|
|
"env_0": {
|
|
|
|
"car_0": [4.1, 1.7],
|
|
|
|
"car_1": [3.2, -4.2],
|
2019-03-08 15:39:48 -08:00
|
|
|
}, ...
|
2018-06-23 18:32:16 -07:00
|
|
|
}
|
2022-03-25 01:04:02 +01:00
|
|
|
>>> print(dones) # doctest: +SKIP
|
2018-06-23 18:32:16 -07:00
|
|
|
{
|
|
|
|
"env_0": {
|
|
|
|
"__all__": False,
|
|
|
|
"car_0": False,
|
|
|
|
"car_1": True,
|
2019-03-08 15:39:48 -08:00
|
|
|
}, ...
|
2018-06-23 18:32:16 -07:00
|
|
|
}
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""
|
|
|
|
|
2021-02-08 12:05:16 +01:00
|
|
|
def to_base_env(
|
2021-12-01 00:01:02 -08:00
|
|
|
self,
|
2022-01-24 19:38:21 +01:00
|
|
|
make_env: Optional[Callable[[int], EnvType]] = None,
|
2021-02-08 12:05:16 +01:00
|
|
|
num_envs: int = 1,
|
|
|
|
remote_envs: bool = False,
|
|
|
|
remote_env_batch_wait_ms: int = 0,
|
2022-07-15 08:55:14 +02:00
|
|
|
restart_failed_sub_environments: bool = False,
|
2021-02-08 12:05:16 +01:00
|
|
|
) -> "BaseEnv":
|
2021-10-29 10:46:52 +02:00
|
|
|
"""Converts an RLlib-supported env into a BaseEnv object.
|
|
|
|
|
2021-11-17 21:40:16 +01:00
|
|
|
Supported types for the `env` arg are gym.Env, BaseEnv,
|
|
|
|
VectorEnv, MultiAgentEnv, ExternalEnv, or ExternalMultiAgentEnv.
|
2021-10-29 10:46:52 +02:00
|
|
|
|
|
|
|
The resulting BaseEnv is always vectorized (contains n
|
2021-11-17 21:40:16 +01:00
|
|
|
sub-environments) to support batched forward passes, where n may also
|
|
|
|
be 1. BaseEnv also supports async execution via the `poll` and
|
|
|
|
`send_actions` methods and thus supports external simulators.
|
2021-10-29 10:46:52 +02:00
|
|
|
|
|
|
|
TODO: Support gym3 environments, which are already vectorized.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env: An already existing environment of any supported env type
|
|
|
|
to convert/wrap into a BaseEnv. Supported types are gym.Env,
|
2021-11-17 21:40:16 +01:00
|
|
|
BaseEnv, VectorEnv, MultiAgentEnv, ExternalEnv, and
|
2021-10-29 10:46:52 +02:00
|
|
|
ExternalMultiAgentEnv.
|
|
|
|
make_env: A callable taking an int as input (which indicates the
|
|
|
|
number of individual sub-environments within the final
|
|
|
|
vectorized BaseEnv) and returning one individual
|
|
|
|
sub-environment.
|
|
|
|
num_envs: The number of sub-environments to create in the
|
|
|
|
resulting (vectorized) BaseEnv. The already existing `env`
|
|
|
|
will be one of the `num_envs`.
|
|
|
|
remote_envs: Whether each sub-env should be a @ray.remote actor.
|
|
|
|
You can set this behavior in your config via the
|
|
|
|
`remote_worker_envs=True` option.
|
|
|
|
remote_env_batch_wait_ms: The wait time (in ms) to poll remote
|
|
|
|
sub-environments for, if applicable. Only used if
|
|
|
|
`remote_envs` is True.
|
|
|
|
policy_config: Optional policy config dict.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The resulting BaseEnv object.
|
|
|
|
"""
|
2021-12-01 00:01:02 -08:00
|
|
|
return self
|
2018-06-23 18:32:16 -07:00
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@PublicAPI
|
2020-06-19 13:09:05 -07:00
|
|
|
def poll(
|
|
|
|
self,
|
|
|
|
) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict]:
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""Returns observations from ready agents.
|
|
|
|
|
2021-11-17 21:40:16 +01:00
|
|
|
All return values are two-level dicts mapping from EnvID to dicts
|
|
|
|
mapping from AgentIDs to (observation/reward/etc..) values.
|
|
|
|
The number of agents and sub-environments may vary over time.
|
2018-07-01 00:05:08 -07:00
|
|
|
|
2021-10-29 10:46:52 +02:00
|
|
|
Returns:
|
|
|
|
Tuple consisting of
|
|
|
|
1) New observations for each ready agent.
|
|
|
|
2) Reward values for each ready agent. If the episode is
|
|
|
|
just started, the value will be None.
|
|
|
|
3) Done values for each ready agent. The special key "__all__"
|
|
|
|
is used to indicate env termination.
|
|
|
|
4) Info values for each ready agent.
|
|
|
|
5) Agents may take off-policy actions. When that
|
|
|
|
happens, there will be an entry in this dict that contains the
|
|
|
|
taken action. There is no need to send_actions() for agents that
|
|
|
|
have already chosen off-policy actions.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@PublicAPI
|
2020-06-19 13:09:05 -07:00
|
|
|
def send_actions(self, action_dict: MultiEnvDict) -> None:
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""Called to send actions back to running agents in this env.
|
|
|
|
|
2018-06-23 18:32:16 -07:00
|
|
|
Actions should be sent for each ready agent that returned observations
|
|
|
|
in the previous poll() call.
|
|
|
|
|
2020-09-20 11:27:02 +02:00
|
|
|
Args:
|
2021-10-29 10:46:52 +02:00
|
|
|
action_dict: Actions values keyed by env_id and agent_id.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@PublicAPI
|
2021-12-06 05:15:33 -08:00
|
|
|
def try_reset(
|
|
|
|
self, env_id: Optional[EnvID] = None
|
|
|
|
) -> Optional[Union[MultiAgentDict, MultiEnvDict]]:
|
2020-05-30 22:48:34 +02:00
|
|
|
"""Attempt to reset the sub-env with the given id or all sub-envs.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
|
|
|
If the environment does not support synchronous reset, None can be
|
|
|
|
returned here.
|
|
|
|
|
2020-05-30 22:48:34 +02:00
|
|
|
Args:
|
2021-10-29 10:46:52 +02:00
|
|
|
env_id: The sub-environment's ID if applicable. If None, reset
|
|
|
|
the entire Env (i.e. all sub-environments).
|
2020-05-30 22:48:34 +02:00
|
|
|
|
2021-12-06 05:15:33 -08:00
|
|
|
Note: A MultiAgentDict is returned when using the deprecated wrapper
|
|
|
|
classes such as `ray.rllib.env.base_env._MultiAgentEnvToBaseEnv`,
|
|
|
|
however for consistency with the poll() method, a `MultiEnvDict` is
|
|
|
|
returned from the new wrapper classes, such as
|
|
|
|
`ray.rllib.env.multi_agent_env.MultiAgentEnvWrapper`.
|
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
Returns:
|
2021-10-29 10:46:52 +02:00
|
|
|
The reset (multi-agent) observation dict. None if reset is not
|
|
|
|
supported.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""
|
|
|
|
return None
|
|
|
|
|
2022-05-28 10:50:03 +02:00
|
|
|
@DeveloperAPI
|
|
|
|
def try_restart(self, env_id: Optional[EnvID] = None) -> None:
|
|
|
|
"""Attempt to restart the sub-env with the given id or all sub-envs.
|
|
|
|
|
|
|
|
This could result in the sub-env being completely removed (gc'd) and recreated.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env_id: The sub-environment's ID, if applicable. If None, restart
|
|
|
|
the entire Env (i.e. all sub-environments).
|
|
|
|
"""
|
|
|
|
return None
|
|
|
|
|
2019-01-23 21:27:26 -08:00
|
|
|
@PublicAPI
|
2021-12-09 05:40:40 -08:00
|
|
|
def get_sub_environments(self, as_dict: bool = False) -> Union[List[EnvType], dict]:
|
2021-10-29 10:46:52 +02:00
|
|
|
"""Return a reference to the underlying sub environments, if any.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2021-12-09 05:40:40 -08:00
|
|
|
Args:
|
|
|
|
as_dict: If True, return a dict mapping from env_id to env.
|
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
Returns:
|
2021-12-09 05:40:40 -08:00
|
|
|
List or dictionary of the underlying sub environments or [] / {}.
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
"""
|
2021-12-09 05:40:40 -08:00
|
|
|
if as_dict:
|
|
|
|
return {}
|
2018-08-23 17:49:10 -07:00
|
|
|
return []
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2021-12-16 01:19:49 -08:00
|
|
|
@PublicAPI
|
2022-01-06 14:34:20 -08:00
|
|
|
def get_agent_ids(self) -> Set[AgentID]:
|
|
|
|
"""Return the agent ids for the sub_environment.
|
2021-12-16 01:19:49 -08:00
|
|
|
|
|
|
|
Returns:
|
2022-01-06 14:34:20 -08:00
|
|
|
All agent ids for each the environment.
|
2021-12-16 01:19:49 -08:00
|
|
|
"""
|
2022-02-17 05:06:14 -08:00
|
|
|
return {}
|
2021-12-16 01:19:49 -08:00
|
|
|
|
2019-03-29 21:19:42 +01:00
|
|
|
@PublicAPI
|
2021-02-08 12:05:16 +01:00
|
|
|
def try_render(self, env_id: Optional[EnvID] = None) -> None:
|
2021-11-17 21:40:16 +01:00
|
|
|
"""Tries to render the sub-environment with the given id or all.
|
2021-02-08 12:05:16 +01:00
|
|
|
|
|
|
|
Args:
|
2021-11-17 21:40:16 +01:00
|
|
|
env_id: The sub-environment's ID, if applicable.
|
2021-10-29 10:46:52 +02:00
|
|
|
If None, renders the entire Env (i.e. all sub-environments).
|
2021-02-08 12:05:16 +01:00
|
|
|
"""
|
|
|
|
|
|
|
|
# By default, do nothing.
|
|
|
|
pass
|
|
|
|
|
2021-05-12 12:16:00 +02:00
|
|
|
@PublicAPI
|
|
|
|
def stop(self) -> None:
|
|
|
|
"""Releases all resources used."""
|
|
|
|
|
2021-10-29 10:46:52 +02:00
|
|
|
# Try calling `close` on all sub-environments.
|
|
|
|
for env in self.get_sub_environments():
|
2021-05-12 12:16:00 +02:00
|
|
|
if hasattr(env, "close"):
|
|
|
|
env.close()
|
|
|
|
|
2021-10-29 10:46:52 +02:00
|
|
|
@Deprecated(new="get_sub_environments", error=False)
|
|
|
|
def get_unwrapped(self) -> List[EnvType]:
|
|
|
|
return self.get_sub_environments()
|
|
|
|
|
2021-12-09 05:40:40 -08:00
|
|
|
@property
|
2022-05-21 15:05:07 -07:00
|
|
|
@PublicAPI
|
2022-01-06 14:34:20 -08:00
|
|
|
def observation_space(self) -> gym.Space:
|
|
|
|
"""Returns the observation space for each agent.
|
2021-12-09 05:40:40 -08:00
|
|
|
|
|
|
|
Note: samples from the observation space need to be preprocessed into a
|
|
|
|
`MultiEnvDict` before being used by a policy.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The observation space for each environment.
|
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
@property
|
2022-05-21 15:05:07 -07:00
|
|
|
@PublicAPI
|
2021-12-09 05:40:40 -08:00
|
|
|
def action_space(self) -> gym.Space:
|
2022-01-06 14:34:20 -08:00
|
|
|
"""Returns the action space for each agent.
|
2021-12-09 05:40:40 -08:00
|
|
|
|
|
|
|
Note: samples from the action space need to be preprocessed into a
|
|
|
|
`MultiEnvDict` before being passed to `send_actions`.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The observation space for each environment.
|
|
|
|
"""
|
|
|
|
raise NotImplementedError
|
|
|
|
|
2021-12-16 01:19:49 -08:00
|
|
|
@PublicAPI
|
|
|
|
def action_space_sample(self, agent_id: list = None) -> MultiEnvDict:
|
|
|
|
"""Returns a random action for each environment, and potentially each
|
|
|
|
agent in that environment.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
agent_id: List of agent ids to sample actions for. If None or empty
|
|
|
|
list, sample actions for all agents in the environment.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A random action for each environment.
|
|
|
|
"""
|
2022-01-06 14:34:20 -08:00
|
|
|
logger.warning("action_space_sample() has not been implemented")
|
2021-12-16 01:19:49 -08:00
|
|
|
del agent_id
|
|
|
|
return {}
|
|
|
|
|
|
|
|
@PublicAPI
|
|
|
|
def observation_space_sample(self, agent_id: list = None) -> MultiEnvDict:
|
|
|
|
"""Returns a random observation for each environment, and potentially
|
|
|
|
each agent in that environment.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
agent_id: List of agent ids to sample actions for. If None or empty
|
|
|
|
list, sample actions for all agents in the environment.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A random action for each environment.
|
|
|
|
"""
|
|
|
|
logger.warning("observation_space_sample() has not been implemented")
|
2022-01-06 14:34:20 -08:00
|
|
|
del agent_id
|
2021-12-16 01:19:49 -08:00
|
|
|
return {}
|
|
|
|
|
|
|
|
@PublicAPI
|
|
|
|
def last(
|
|
|
|
self,
|
|
|
|
) -> Tuple[MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict, MultiEnvDict]:
|
|
|
|
"""Returns the last observations, rewards, and done flags that were
|
|
|
|
returned by the environment.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The last observations, rewards, and done flags for each environment
|
|
|
|
"""
|
|
|
|
logger.warning("last has not been implemented for this environment.")
|
|
|
|
return {}, {}, {}, {}, {}
|
|
|
|
|
|
|
|
@PublicAPI
|
2021-12-09 05:40:40 -08:00
|
|
|
def observation_space_contains(self, x: MultiEnvDict) -> bool:
|
2021-12-16 01:19:49 -08:00
|
|
|
"""Checks if the given observation is valid for each environment.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
x: Observations to check.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
True if the observations are contained within their respective
|
|
|
|
spaces. False otherwise.
|
|
|
|
"""
|
2022-02-17 05:06:14 -08:00
|
|
|
return self._space_contains(self.observation_space, x)
|
2021-12-09 05:40:40 -08:00
|
|
|
|
2021-12-16 01:19:49 -08:00
|
|
|
@PublicAPI
|
2021-12-09 05:40:40 -08:00
|
|
|
def action_space_contains(self, x: MultiEnvDict) -> bool:
|
2021-12-16 01:19:49 -08:00
|
|
|
"""Checks if the given actions is valid for each environment.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
x: Actions to check.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
True if the actions are contained within their respective
|
|
|
|
spaces. False otherwise.
|
|
|
|
"""
|
2021-12-09 05:40:40 -08:00
|
|
|
return self._space_contains(self.action_space, x)
|
|
|
|
|
2022-01-06 14:34:20 -08:00
|
|
|
def _space_contains(self, space: gym.Space, x: MultiEnvDict) -> bool:
|
2021-12-09 05:40:40 -08:00
|
|
|
"""Check if the given space contains the observations of x.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
space: The space to if x's observations are contained in.
|
|
|
|
x: The observations to check.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
True if the observations of x are contained in space.
|
|
|
|
"""
|
2022-01-06 14:34:20 -08:00
|
|
|
agents = set(self.get_agent_ids())
|
|
|
|
for multi_agent_dict in x.values():
|
2022-02-17 05:06:14 -08:00
|
|
|
for agent_id, obs in multi_agent_dict.items():
|
|
|
|
# this is for the case where we have a single agent
|
|
|
|
# and we're checking a Vector env thats been converted to
|
|
|
|
# a BaseEnv
|
|
|
|
if agent_id == _DUMMY_AGENT_ID:
|
|
|
|
if not space.contains(obs):
|
|
|
|
return False
|
|
|
|
# for the MultiAgent env case
|
|
|
|
elif (agent_id not in agents) or (not space[agent_id].contains(obs)):
|
2022-01-06 14:34:20 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
2021-12-09 05:40:40 -08:00
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2018-06-23 18:32:16 -07:00
|
|
|
# Fixed agent identifier when there is only the single agent in the env
|
2019-03-29 12:44:23 -07:00
|
|
|
_DUMMY_AGENT_ID = "agent0"
|
2018-06-23 18:32:16 -07:00
|
|
|
|
|
|
|
|
2022-05-24 22:14:25 -07:00
|
|
|
@PublicAPI
|
2021-11-30 17:02:10 -08:00
|
|
|
def with_dummy_agent_id(
|
|
|
|
env_id_to_values: Dict[EnvID, Any], dummy_id: "AgentID" = _DUMMY_AGENT_ID
|
|
|
|
) -> MultiEnvDict:
|
2022-05-28 10:50:03 +02:00
|
|
|
ret = {}
|
|
|
|
for (env_id, value) in env_id_to_values.items():
|
|
|
|
# If the value (e.g. the observation) is an Exception, publish this error
|
|
|
|
# under the env ID so the caller of `poll()` knows that the entire episode
|
|
|
|
# (sub-environment) has crashed.
|
|
|
|
ret[env_id] = value if isinstance(value, Exception) else {dummy_id: value}
|
|
|
|
return ret
|
2021-11-30 17:02:10 -08:00
|
|
|
|
|
|
|
|
2022-05-24 22:14:25 -07:00
|
|
|
@DeveloperAPI
|
2021-11-30 17:02:10 -08:00
|
|
|
def convert_to_base_env(
|
|
|
|
env: EnvType,
|
|
|
|
make_env: Callable[[int], EnvType] = None,
|
|
|
|
num_envs: int = 1,
|
|
|
|
remote_envs: bool = False,
|
|
|
|
remote_env_batch_wait_ms: int = 0,
|
2022-02-04 22:22:47 +01:00
|
|
|
worker: Optional["RolloutWorker"] = None,
|
2022-05-28 10:50:03 +02:00
|
|
|
restart_failed_sub_environments: bool = False,
|
2021-11-30 17:02:10 -08:00
|
|
|
) -> "BaseEnv":
|
|
|
|
"""Converts an RLlib-supported env into a BaseEnv object.
|
|
|
|
|
|
|
|
Supported types for the `env` arg are gym.Env, BaseEnv,
|
|
|
|
VectorEnv, MultiAgentEnv, ExternalEnv, or ExternalMultiAgentEnv.
|
|
|
|
|
|
|
|
The resulting BaseEnv is always vectorized (contains n
|
|
|
|
sub-environments) to support batched forward passes, where n may also
|
|
|
|
be 1. BaseEnv also supports async execution via the `poll` and
|
|
|
|
`send_actions` methods and thus supports external simulators.
|
|
|
|
|
|
|
|
TODO: Support gym3 environments, which are already vectorized.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
env: An already existing environment of any supported env type
|
|
|
|
to convert/wrap into a BaseEnv. Supported types are gym.Env,
|
|
|
|
BaseEnv, VectorEnv, MultiAgentEnv, ExternalEnv, and
|
|
|
|
ExternalMultiAgentEnv.
|
|
|
|
make_env: A callable taking an int as input (which indicates the
|
|
|
|
number of individual sub-environments within the final
|
|
|
|
vectorized BaseEnv) and returning one individual
|
|
|
|
sub-environment.
|
|
|
|
num_envs: The number of sub-environments to create in the
|
|
|
|
resulting (vectorized) BaseEnv. The already existing `env`
|
|
|
|
will be one of the `num_envs`.
|
|
|
|
remote_envs: Whether each sub-env should be a @ray.remote actor.
|
|
|
|
You can set this behavior in your config via the
|
|
|
|
`remote_worker_envs=True` option.
|
|
|
|
remote_env_batch_wait_ms: The wait time (in ms) to poll remote
|
|
|
|
sub-environments for, if applicable. Only used if
|
|
|
|
`remote_envs` is True.
|
2022-02-04 22:22:47 +01:00
|
|
|
worker: An optional RolloutWorker that owns the env. This is only
|
|
|
|
used if `remote_worker_envs` is True in your config and the
|
|
|
|
`on_sub_environment_created` custom callback needs to be called
|
|
|
|
on each created actor.
|
2022-05-28 10:50:03 +02:00
|
|
|
restart_failed_sub_environments: If True and any sub-environment (within
|
|
|
|
a vectorized env) throws any error during env stepping, the
|
|
|
|
Sampler will try to restart the faulty sub-environment. This is done
|
|
|
|
without disturbing the other (still intact) sub-environment and without
|
|
|
|
the RolloutWorker crashing.
|
2021-11-30 17:02:10 -08:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The resulting BaseEnv object.
|
|
|
|
"""
|
|
|
|
|
2022-01-08 17:13:04 +01:00
|
|
|
from ray.rllib.env.remote_base_env import RemoteBaseEnv
|
2021-12-01 00:01:02 -08:00
|
|
|
from ray.rllib.env.external_env import ExternalEnv
|
|
|
|
from ray.rllib.env.multi_agent_env import MultiAgentEnv
|
2021-11-30 17:02:10 -08:00
|
|
|
from ray.rllib.env.vector_env import VectorEnv, VectorEnvWrapper
|
2022-01-29 18:41:57 -08:00
|
|
|
|
2021-11-30 17:02:10 -08:00
|
|
|
if remote_envs and num_envs == 1:
|
|
|
|
raise ValueError(
|
|
|
|
"Remote envs only make sense to use if num_envs > 1 "
|
2022-02-04 22:22:47 +01:00
|
|
|
"(i.e. environment vectorization is enabled)."
|
2021-11-30 17:02:10 -08:00
|
|
|
)
|
|
|
|
|
2022-05-28 10:50:03 +02:00
|
|
|
# Given `env` has a `to_base_env` method -> Call that to convert to a BaseEnv type.
|
2021-12-01 00:01:02 -08:00
|
|
|
if isinstance(env, (BaseEnv, MultiAgentEnv, VectorEnv, ExternalEnv)):
|
2022-01-24 19:38:21 +01:00
|
|
|
return env.to_base_env(
|
|
|
|
make_env=make_env,
|
|
|
|
num_envs=num_envs,
|
|
|
|
remote_envs=remote_envs,
|
|
|
|
remote_env_batch_wait_ms=remote_env_batch_wait_ms,
|
2022-07-15 08:55:14 +02:00
|
|
|
restart_failed_sub_environments=restart_failed_sub_environments,
|
2022-01-24 19:38:21 +01:00
|
|
|
)
|
2021-11-30 17:02:10 -08:00
|
|
|
# `env` is not a BaseEnv yet -> Need to convert/vectorize.
|
|
|
|
else:
|
|
|
|
# Sub-environments are ray.remote actors:
|
|
|
|
if remote_envs:
|
|
|
|
# Determine, whether the already existing sub-env (could
|
|
|
|
# be a ray.actor) is multi-agent or not.
|
|
|
|
multiagent = (
|
|
|
|
ray.get(env._is_multi_agent.remote())
|
|
|
|
if hasattr(env, "_is_multi_agent")
|
|
|
|
else False
|
2022-01-29 18:41:57 -08:00
|
|
|
)
|
2021-11-30 17:02:10 -08:00
|
|
|
env = RemoteBaseEnv(
|
|
|
|
make_env,
|
|
|
|
num_envs,
|
|
|
|
multiagent=multiagent,
|
|
|
|
remote_env_batch_wait_ms=remote_env_batch_wait_ms,
|
|
|
|
existing_envs=[env],
|
2022-02-04 22:22:47 +01:00
|
|
|
worker=worker,
|
2022-05-28 10:50:03 +02:00
|
|
|
restart_failed_sub_environments=restart_failed_sub_environments,
|
2021-11-30 17:02:10 -08:00
|
|
|
)
|
|
|
|
# Sub-environments are not ray.remote actors.
|
|
|
|
else:
|
|
|
|
# Convert gym.Env to VectorEnv ...
|
|
|
|
env = VectorEnv.vectorize_gym_envs(
|
|
|
|
make_env=make_env,
|
|
|
|
existing_envs=[env],
|
|
|
|
num_envs=num_envs,
|
|
|
|
action_space=env.action_space,
|
|
|
|
observation_space=env.observation_space,
|
2022-05-28 10:50:03 +02:00
|
|
|
restart_failed_sub_environments=restart_failed_sub_environments,
|
2021-11-30 17:02:10 -08:00
|
|
|
)
|
|
|
|
# ... then the resulting VectorEnv to a BaseEnv.
|
|
|
|
env = VectorEnvWrapper(env)
|
|
|
|
|
|
|
|
# Make sure conversion went well.
|
|
|
|
assert isinstance(env, BaseEnv), env
|
|
|
|
|
|
|
|
return env
|
2022-05-28 10:50:03 +02:00
|
|
|
|
|
|
|
|
|
|
|
@Deprecated(
|
|
|
|
old="ray.rllib.env.base_env._VectorEnvToBaseEnv",
|
|
|
|
new="ray.rllib.env.vector_env.VectorEnvWrapper",
|
|
|
|
error=True,
|
|
|
|
)
|
|
|
|
class _VectorEnvToBaseEnv(BaseEnv):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@Deprecated(
|
|
|
|
old="ray.rllib.env.base_env._ExternalEnvToBaseEnv",
|
|
|
|
new="ray.rllib.env.external.ExternalEnvWrapper",
|
|
|
|
error=True,
|
|
|
|
)
|
|
|
|
class _ExternalEnvToBaseEnv(BaseEnv):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@Deprecated(
|
|
|
|
old="ray.rllib.env.base_env._MultiAgentEnvToBaseEnv",
|
|
|
|
new="ray.rllib.env.multi_agent_env.MultiAgentEnvWrapper",
|
|
|
|
error=True,
|
|
|
|
)
|
|
|
|
class _MultiAgentEnvToBaseEnv(BaseEnv):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@Deprecated(
|
|
|
|
old="ray.rllib.env.base_env._MultiAgentEnvState",
|
|
|
|
new="ray.rllib.env.multi_agent_env._MultiAgentEnvState",
|
|
|
|
error=True,
|
|
|
|
)
|
|
|
|
class _MultiAgentEnvState:
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
@Deprecated(new="with_dummy_agent_id()", error=False)
|
|
|
|
def _with_dummy_agent_id(
|
|
|
|
env_id_to_values: Dict[EnvID, Any], dummy_id: "AgentID" = _DUMMY_AGENT_ID
|
|
|
|
) -> MultiEnvDict:
|
|
|
|
return {k: {dummy_id: v} for (k, v) in env_id_to_values.items()}
|