2020-03-12 04:39:47 +01:00
|
|
|
from collections import Counter
|
2018-06-09 00:21:35 -07:00
|
|
|
import gym
|
2018-10-20 15:21:22 -07:00
|
|
|
import numpy as np
|
2020-04-16 16:13:45 +08:00
|
|
|
import os
|
2019-03-06 10:21:05 -08:00
|
|
|
import random
|
2018-06-09 00:21:35 -07:00
|
|
|
import time
|
|
|
|
import unittest
|
|
|
|
|
|
|
|
import ray
|
2019-04-07 00:36:18 -07:00
|
|
|
from ray.rllib.agents.pg import PGTrainer
|
|
|
|
from ray.rllib.agents.a3c import A2CTrainer
|
2020-03-12 19:03:37 +01:00
|
|
|
from ray.rllib.env.vector_env import VectorEnv
|
2019-06-03 06:49:24 +08:00
|
|
|
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
2018-07-01 00:05:08 -07:00
|
|
|
from ray.rllib.evaluation.metrics import collect_metrics
|
|
|
|
from ray.rllib.evaluation.postprocessing import compute_advantages
|
2020-05-12 08:23:10 +02:00
|
|
|
from ray.rllib.examples.policy.random_policy import RandomPolicy
|
2020-07-28 10:44:54 +02:00
|
|
|
from ray.rllib.policy.policy import Policy
|
2019-05-20 16:46:05 -07:00
|
|
|
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, SampleBatch
|
2020-07-28 10:44:54 +02:00
|
|
|
from ray.rllib.utils.annotations import override
|
2020-05-27 16:19:13 +02:00
|
|
|
from ray.rllib.utils.test_utils import check, framework_iterator
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
from ray.tune.registry import register_env
|
2018-06-09 00:21:35 -07:00
|
|
|
|
|
|
|
|
2020-05-12 08:23:10 +02:00
|
|
|
class MockPolicy(RandomPolicy):
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(RandomPolicy)
|
2018-08-16 14:37:21 -07:00
|
|
|
def compute_actions(self,
|
|
|
|
obs_batch,
|
2020-01-18 07:26:28 +01:00
|
|
|
state_batches=None,
|
2018-10-20 15:21:22 -07:00
|
|
|
prev_action_batch=None,
|
|
|
|
prev_reward_batch=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
episodes=None,
|
2020-02-19 21:18:45 +01:00
|
|
|
explore=None,
|
2020-02-11 00:22:07 +01:00
|
|
|
timestep=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
**kwargs):
|
2020-04-28 14:59:16 +02:00
|
|
|
return np.array([random.choice([0, 1])] * len(obs_batch)), [], {}
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(Policy)
|
2018-10-29 19:37:27 -07:00
|
|
|
def postprocess_trajectory(self,
|
|
|
|
batch,
|
|
|
|
other_agent_batches=None,
|
|
|
|
episode=None):
|
|
|
|
assert episode is not None
|
2020-07-28 10:44:54 +02:00
|
|
|
super().postprocess_trajectory(batch, other_agent_batches, episode)
|
2020-02-01 08:25:45 +02:00
|
|
|
return compute_advantages(
|
|
|
|
batch, 100.0, 0.9, use_gae=False, use_critic=False)
|
2018-06-09 00:21:35 -07:00
|
|
|
|
|
|
|
|
2020-05-12 08:23:10 +02:00
|
|
|
class BadPolicy(RandomPolicy):
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(RandomPolicy)
|
2018-08-16 14:37:21 -07:00
|
|
|
def compute_actions(self,
|
|
|
|
obs_batch,
|
2020-01-18 07:26:28 +01:00
|
|
|
state_batches=None,
|
2018-10-20 15:21:22 -07:00
|
|
|
prev_action_batch=None,
|
|
|
|
prev_reward_batch=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
episodes=None,
|
2020-02-19 21:18:45 +01:00
|
|
|
explore=None,
|
2020-02-11 00:22:07 +01:00
|
|
|
timestep=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
**kwargs):
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
raise Exception("intentional error")
|
|
|
|
|
|
|
|
|
2018-11-11 01:45:37 -08:00
|
|
|
class FailOnStepEnv(gym.Env):
|
|
|
|
def __init__(self):
|
|
|
|
self.observation_space = gym.spaces.Discrete(1)
|
|
|
|
self.action_space = gym.spaces.Discrete(2)
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
raise ValueError("kaboom")
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
raise ValueError("kaboom")
|
|
|
|
|
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
class MockEnv(gym.Env):
|
2018-08-01 16:29:27 -07:00
|
|
|
def __init__(self, episode_length, config=None):
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.episode_length = episode_length
|
2018-08-01 16:29:27 -07:00
|
|
|
self.config = config
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.i = 0
|
|
|
|
self.observation_space = gym.spaces.Discrete(1)
|
|
|
|
self.action_space = gym.spaces.Discrete(2)
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
self.i = 0
|
|
|
|
return self.i
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
self.i += 1
|
|
|
|
return 0, 1, self.i >= self.episode_length, {}
|
|
|
|
|
|
|
|
|
2018-06-25 22:33:57 -07:00
|
|
|
class MockEnv2(gym.Env):
|
|
|
|
def __init__(self, episode_length):
|
|
|
|
self.episode_length = episode_length
|
|
|
|
self.i = 0
|
|
|
|
self.observation_space = gym.spaces.Discrete(100)
|
|
|
|
self.action_space = gym.spaces.Discrete(2)
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
self.i = 0
|
|
|
|
return self.i
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
self.i += 1
|
|
|
|
return self.i, 100, self.i >= self.episode_length, {}
|
|
|
|
|
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
class MockVectorEnv(VectorEnv):
|
|
|
|
def __init__(self, episode_length, num_envs):
|
2020-05-30 22:48:34 +02:00
|
|
|
super().__init__(
|
|
|
|
observation_space=gym.spaces.Discrete(1),
|
|
|
|
action_space=gym.spaces.Discrete(2),
|
|
|
|
num_envs=num_envs)
|
2018-07-19 15:30:36 -07:00
|
|
|
self.envs = [MockEnv(episode_length) for _ in range(num_envs)]
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(VectorEnv)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
def vector_reset(self):
|
|
|
|
return [e.reset() for e in self.envs]
|
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(VectorEnv)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
def reset_at(self, index):
|
|
|
|
return self.envs[index].reset()
|
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(VectorEnv)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
def vector_step(self, actions):
|
|
|
|
obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
|
|
|
|
for i in range(len(self.envs)):
|
|
|
|
obs, rew, done, info = self.envs[i].step(actions[i])
|
|
|
|
obs_batch.append(obs)
|
|
|
|
rew_batch.append(rew)
|
|
|
|
done_batch.append(done)
|
|
|
|
info_batch.append(info)
|
|
|
|
return obs_batch, rew_batch, done_batch, info_batch
|
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(VectorEnv)
|
2018-08-23 17:49:10 -07:00
|
|
|
def get_unwrapped(self):
|
|
|
|
return self.envs
|
|
|
|
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2019-06-03 06:49:24 +08:00
|
|
|
class TestRolloutWorker(unittest.TestCase):
|
2020-03-12 04:39:47 +01:00
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
ray.init(num_cpus=5)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls):
|
|
|
|
ray.shutdown()
|
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_basic(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2019-05-20 16:46:05 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"), policy=MockPolicy)
|
2018-06-09 00:21:35 -07:00
|
|
|
batch = ev.sample()
|
2018-10-20 15:21:22 -07:00
|
|
|
for key in [
|
|
|
|
"obs", "actions", "rewards", "dones", "advantages",
|
|
|
|
"prev_rewards", "prev_actions"
|
|
|
|
]:
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertIn(key, batch)
|
2019-03-06 10:21:05 -08:00
|
|
|
self.assertGreater(np.abs(np.mean(batch[key])), 0)
|
2018-10-20 15:21:22 -07:00
|
|
|
|
|
|
|
def to_prev(vec):
|
|
|
|
out = np.zeros_like(vec)
|
|
|
|
for i, v in enumerate(vec):
|
|
|
|
if i + 1 < len(out) and not batch["dones"][i]:
|
|
|
|
out[i + 1] = v
|
|
|
|
return out.tolist()
|
|
|
|
|
|
|
|
self.assertEqual(batch["prev_rewards"].tolist(),
|
|
|
|
to_prev(batch["rewards"]))
|
|
|
|
self.assertEqual(batch["prev_actions"].tolist(),
|
|
|
|
to_prev(batch["actions"]))
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertGreater(batch["advantages"][0], 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_batch_ids(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2020-06-12 20:17:27 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
|
|
|
policy=MockPolicy,
|
|
|
|
rollout_fragment_length=1)
|
2019-04-07 12:11:30 -07:00
|
|
|
batch1 = ev.sample()
|
|
|
|
batch2 = ev.sample()
|
|
|
|
self.assertEqual(len(set(batch1["unroll_id"])), 1)
|
|
|
|
self.assertEqual(len(set(batch2["unroll_id"])), 1)
|
|
|
|
self.assertEqual(
|
|
|
|
len(set(SampleBatch.concat(batch1, batch2)["unroll_id"])), 2)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2019-04-07 12:11:30 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_global_vars_update(self):
|
2020-02-19 21:18:45 +01:00
|
|
|
# Allow for Unittest run.
|
2020-02-15 23:50:44 +01:00
|
|
|
ray.init(num_cpus=5, ignore_reinit_error=True)
|
2020-05-27 16:19:13 +02:00
|
|
|
for fw in framework_iterator(frameworks=()):
|
|
|
|
agent = A2CTrainer(
|
|
|
|
env="CartPole-v0",
|
|
|
|
config={
|
|
|
|
"num_workers": 1,
|
|
|
|
"lr_schedule": [[0, 0.1], [100000, 0.000001]],
|
|
|
|
"framework": fw,
|
|
|
|
})
|
2020-03-13 18:48:41 -07:00
|
|
|
result = agent.train()
|
2020-05-27 16:19:13 +02:00
|
|
|
for i in range(10):
|
|
|
|
result = agent.train()
|
|
|
|
print("num_steps_sampled={}".format(
|
|
|
|
result["info"]["num_steps_sampled"]))
|
|
|
|
print("num_steps_trained={}".format(
|
|
|
|
result["info"]["num_steps_trained"]))
|
|
|
|
print("num_steps_sampled={}".format(
|
|
|
|
result["info"]["num_steps_sampled"]))
|
|
|
|
print("num_steps_trained={}".format(
|
|
|
|
result["info"]["num_steps_trained"]))
|
|
|
|
if i == 0:
|
|
|
|
self.assertGreater(
|
|
|
|
result["info"]["learner"]["default_policy"]["cur_lr"],
|
|
|
|
0.01)
|
|
|
|
if result["info"]["learner"]["default_policy"]["cur_lr"] < \
|
|
|
|
0.07:
|
|
|
|
break
|
|
|
|
self.assertLess(
|
|
|
|
result["info"]["learner"]["default_policy"]["cur_lr"], 0.07)
|
2020-06-25 19:01:32 +02:00
|
|
|
agent.stop()
|
2018-08-23 17:49:10 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_no_step_on_init(self):
|
2018-11-11 01:45:37 -08:00
|
|
|
register_env("fail", lambda _: FailOnStepEnv())
|
2020-05-27 16:19:13 +02:00
|
|
|
for fw in framework_iterator(frameworks=()):
|
|
|
|
pg = PGTrainer(
|
|
|
|
env="fail", config={
|
|
|
|
"num_workers": 1,
|
|
|
|
"framework": fw,
|
|
|
|
})
|
|
|
|
self.assertRaises(Exception, lambda: pg.train())
|
2020-06-25 19:01:32 +02:00
|
|
|
pg.stop()
|
2018-11-11 01:45:37 -08:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_callbacks(self):
|
2020-05-27 16:19:13 +02:00
|
|
|
for fw in framework_iterator(frameworks=("torch", "tf")):
|
|
|
|
counts = Counter()
|
|
|
|
pg = PGTrainer(
|
|
|
|
env="CartPole-v0", config={
|
|
|
|
"num_workers": 0,
|
|
|
|
"rollout_fragment_length": 50,
|
|
|
|
"train_batch_size": 50,
|
|
|
|
"callbacks": {
|
|
|
|
"on_episode_start":
|
|
|
|
lambda x: counts.update({"start": 1}),
|
|
|
|
"on_episode_step":
|
|
|
|
lambda x: counts.update({"step": 1}),
|
|
|
|
"on_episode_end": lambda x: counts.update({"end": 1}),
|
|
|
|
"on_sample_end":
|
|
|
|
lambda x: counts.update({"sample": 1}),
|
|
|
|
},
|
|
|
|
"framework": fw,
|
|
|
|
})
|
|
|
|
pg.train()
|
|
|
|
pg.train()
|
|
|
|
self.assertGreater(counts["sample"], 0)
|
|
|
|
self.assertGreater(counts["start"], 0)
|
|
|
|
self.assertGreater(counts["end"], 0)
|
|
|
|
self.assertGreater(counts["step"], 0)
|
2020-06-25 19:01:32 +02:00
|
|
|
pg.stop()
|
2018-11-03 18:48:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_query_evaluators(self):
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
register_env("test", lambda _: gym.make("CartPole-v0"))
|
2020-05-27 16:19:13 +02:00
|
|
|
for fw in framework_iterator(frameworks=("torch", "tf")):
|
|
|
|
pg = PGTrainer(
|
|
|
|
env="test",
|
|
|
|
config={
|
|
|
|
"num_workers": 2,
|
|
|
|
"rollout_fragment_length": 5,
|
|
|
|
"num_envs_per_worker": 2,
|
|
|
|
"framework": fw,
|
|
|
|
})
|
|
|
|
results = pg.workers.foreach_worker(
|
|
|
|
lambda ev: ev.rollout_fragment_length)
|
|
|
|
results2 = pg.workers.foreach_worker_with_index(
|
|
|
|
lambda ev, i: (i, ev.rollout_fragment_length))
|
|
|
|
results3 = pg.workers.foreach_worker(
|
|
|
|
lambda ev: ev.foreach_env(lambda env: 1))
|
|
|
|
self.assertEqual(results, [10, 10, 10])
|
|
|
|
self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
|
|
|
|
self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
|
2020-06-25 19:01:32 +02:00
|
|
|
pg.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
def test_action_clipping(self):
|
|
|
|
from ray.rllib.examples.env.random_env import RandomEnv
|
2020-08-07 16:49:49 -07:00
|
|
|
action_space = gym.spaces.Box(-2.0, 1.0, (3, ))
|
2020-07-28 10:44:54 +02:00
|
|
|
|
|
|
|
# Clipping: True (clip between Policy's action_space.low/high),
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
|
|
|
policy=RandomPolicy,
|
|
|
|
policy_config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
ignore_action_bounds=True,
|
|
|
|
),
|
|
|
|
clip_actions=True,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev.sample()
|
|
|
|
# Check, whether the action bounds have been breached (expected).
|
|
|
|
# We still arrived here b/c we clipped according to the Env's action
|
|
|
|
# space.
|
|
|
|
self.assertGreater(np.max(sample["actions"]), action_space.high[0])
|
|
|
|
self.assertLess(np.min(sample["actions"]), action_space.low[0])
|
|
|
|
ev.stop()
|
|
|
|
|
|
|
|
# Clipping: False and RandomPolicy produces invalid actions.
|
|
|
|
# Expect Env to complain.
|
|
|
|
ev2 = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
|
|
|
policy=RandomPolicy,
|
|
|
|
policy_config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
ignore_action_bounds=True,
|
|
|
|
),
|
|
|
|
clip_actions=False, # <- should lead to Env complaining
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
self.assertRaisesRegex(ValueError, r"Illegal action", ev2.sample)
|
|
|
|
ev2.stop()
|
|
|
|
|
|
|
|
# Clipping: False and RandomPolicy produces valid (bounded) actions.
|
|
|
|
# Expect "actions" in SampleBatch to be unclipped.
|
|
|
|
ev3 = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
|
|
|
policy=RandomPolicy,
|
|
|
|
policy_config=dict(action_space=action_space),
|
|
|
|
# Should not be a problem as RandomPolicy abides to bounds.
|
|
|
|
clip_actions=False,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev3.sample()
|
|
|
|
self.assertGreater(np.min(sample["actions"]), action_space.low[0])
|
|
|
|
self.assertLess(np.max(sample["actions"]), action_space.high[0])
|
|
|
|
ev3.stop()
|
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_reward_clipping(self):
|
2020-07-28 10:44:54 +02:00
|
|
|
# Clipping: True (clip between -1.0 and 1.0).
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-08-20 15:28:03 -07:00
|
|
|
env_creator=lambda _: MockEnv2(episode_length=10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-08-20 15:28:03 -07:00
|
|
|
clip_rewards=True,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
self.assertEqual(max(ev.sample()["rewards"]), 1)
|
|
|
|
result = collect_metrics(ev, [])
|
|
|
|
self.assertEqual(result["episode_reward_mean"], 1000)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-08-20 15:28:03 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
from ray.rllib.examples.env.random_env import RandomEnv
|
|
|
|
|
|
|
|
# Clipping in certain range (-2.0, 2.0).
|
|
|
|
ev2 = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(
|
|
|
|
dict(
|
|
|
|
reward_space=gym.spaces.Box(low=-10, high=10, shape=()),
|
|
|
|
p_done=0.0,
|
|
|
|
max_episode_len=10,
|
|
|
|
)),
|
|
|
|
policy=MockPolicy,
|
|
|
|
clip_rewards=2.0,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev2.sample()
|
|
|
|
self.assertEqual(max(sample["rewards"]), 2.0)
|
|
|
|
self.assertEqual(min(sample["rewards"]), -2.0)
|
|
|
|
self.assertLess(np.mean(sample["rewards"]), 0.5)
|
|
|
|
self.assertGreater(np.mean(sample["rewards"]), -0.5)
|
|
|
|
ev2.stop()
|
|
|
|
|
|
|
|
# Clipping: Off.
|
2019-06-03 06:49:24 +08:00
|
|
|
ev2 = RolloutWorker(
|
2018-08-20 15:28:03 -07:00
|
|
|
env_creator=lambda _: MockEnv2(episode_length=10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-08-20 15:28:03 -07:00
|
|
|
clip_rewards=False,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
self.assertEqual(max(ev2.sample()["rewards"]), 100)
|
|
|
|
result2 = collect_metrics(ev2, [])
|
|
|
|
self.assertEqual(result2["episode_reward_mean"], 1000)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev2.stop()
|
2018-08-20 15:28:03 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_hard_horizon(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2020-03-12 19:03:37 +01:00
|
|
|
env_creator=lambda _: MockEnv2(episode_length=10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2019-04-02 02:44:15 -07:00
|
|
|
batch_mode="complete_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10,
|
2019-04-02 02:44:15 -07:00
|
|
|
episode_horizon=4,
|
|
|
|
soft_horizon=False)
|
|
|
|
samples = ev.sample()
|
2020-03-12 19:03:37 +01:00
|
|
|
# Three logical episodes and correct episode resets (always after 4
|
|
|
|
# steps).
|
2019-04-02 02:44:15 -07:00
|
|
|
self.assertEqual(len(set(samples["eps_id"])), 3)
|
2020-03-12 19:03:37 +01:00
|
|
|
for i in range(4):
|
|
|
|
self.assertEqual(np.argmax(samples["obs"][i]), i)
|
|
|
|
self.assertEqual(np.argmax(samples["obs"][4]), 0)
|
|
|
|
# 3 done values.
|
2019-04-02 02:44:15 -07:00
|
|
|
self.assertEqual(sum(samples["dones"]), 3)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2019-04-02 02:44:15 -07:00
|
|
|
|
2020-03-12 19:03:37 +01:00
|
|
|
# A gym env's max_episode_steps is smaller than Trainer's horizon.
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
|
|
|
policy=MockPolicy,
|
|
|
|
batch_mode="complete_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10,
|
2020-03-12 19:03:37 +01:00
|
|
|
episode_horizon=6,
|
|
|
|
soft_horizon=False)
|
|
|
|
samples = ev.sample()
|
|
|
|
# 12 steps due to `complete_episodes` batch_mode.
|
|
|
|
self.assertEqual(len(samples["eps_id"]), 12)
|
|
|
|
# Two logical episodes and correct episode resets (always after 6(!)
|
|
|
|
# steps).
|
|
|
|
self.assertEqual(len(set(samples["eps_id"])), 2)
|
|
|
|
# 2 done values after 6 and 12 steps.
|
|
|
|
check(samples["dones"], [
|
|
|
|
False, False, False, False, False, True, False, False, False,
|
|
|
|
False, False, True
|
|
|
|
])
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2020-03-12 19:03:37 +01:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_soft_horizon(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2019-04-02 02:44:15 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2019-04-02 02:44:15 -07:00
|
|
|
batch_mode="complete_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10,
|
2019-04-02 02:44:15 -07:00
|
|
|
episode_horizon=4,
|
|
|
|
soft_horizon=True)
|
|
|
|
samples = ev.sample()
|
|
|
|
# three logical episodes
|
|
|
|
self.assertEqual(len(set(samples["eps_id"])), 3)
|
|
|
|
# only 1 hard done value
|
|
|
|
self.assertEqual(sum(samples["dones"]), 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2019-04-02 02:44:15 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_metrics(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-07-19 15:30:36 -07:00
|
|
|
batch_mode="complete_episodes")
|
2019-06-03 06:49:24 +08:00
|
|
|
remote_ev = RolloutWorker.as_remote().remote(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-07-19 15:30:36 -07:00
|
|
|
batch_mode="complete_episodes")
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
ev.sample()
|
|
|
|
ray.get(remote_ev.sample.remote())
|
|
|
|
result = collect_metrics(ev, [remote_ev])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 20)
|
2018-08-07 12:17:44 -07:00
|
|
|
self.assertEqual(result["episode_reward_mean"], 10)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_async(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
sample_async=True,
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
for key in ["obs", "actions", "rewards", "dones", "advantages"]:
|
|
|
|
self.assertIn(key, batch)
|
|
|
|
self.assertGreater(batch["advantages"][0], 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_auto_vectorization(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-08-01 16:29:27 -07:00
|
|
|
env_creator=lambda cfg: MockEnv(episode_length=20, config=cfg),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="truncate_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=2,
|
2018-07-19 15:30:36 -07:00
|
|
|
num_envs=8)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 16)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 0)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 16)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 8)
|
2018-08-01 16:29:27 -07:00
|
|
|
indices = []
|
|
|
|
for env in ev.async_env.vector_env.envs:
|
|
|
|
self.assertEqual(env.unwrapped.config.worker_index, 0)
|
|
|
|
indices.append(env.unwrapped.config.vector_index)
|
|
|
|
self.assertEqual(indices, [0, 1, 2, 3, 4, 5, 6, 7])
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_batches_larger_when_vectorized(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=8),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="truncate_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=4,
|
2018-07-19 15:30:36 -07:00
|
|
|
num_envs=4)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 16)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 0)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 4)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_vector_env_support(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-07-19 15:30:36 -07:00
|
|
|
env_creator=lambda _: MockVectorEnv(episode_length=20, num_envs=8),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="truncate_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 0)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 8)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_truncate_episodes(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=15,
|
2018-06-09 00:21:35 -07:00
|
|
|
batch_mode="truncate_episodes")
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 15)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_complete_episodes(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=5,
|
2018-06-09 00:21:35 -07:00
|
|
|
batch_mode="complete_episodes")
|
|
|
|
batch = ev.sample()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.assertEqual(batch.count, 10)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_complete_episodes_packing(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=15,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="complete_episodes")
|
2018-06-09 00:21:35 -07:00
|
|
|
batch = ev.sample()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.assertEqual(batch.count, 20)
|
2018-06-23 18:32:16 -07:00
|
|
|
self.assertEqual(
|
|
|
|
batch["t"].tolist(),
|
|
|
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_filter_sync(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-06-09 00:21:35 -07:00
|
|
|
sample_async=True,
|
|
|
|
observation_filter="ConcurrentMeanStdFilter")
|
|
|
|
time.sleep(2)
|
|
|
|
ev.sample()
|
|
|
|
filters = ev.get_filters(flush_after=True)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertNotEqual(obs_f.rs.n, 0)
|
|
|
|
self.assertNotEqual(obs_f.buffer.n, 0)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_get_filters(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-06-09 00:21:35 -07:00
|
|
|
sample_async=True,
|
|
|
|
observation_filter="ConcurrentMeanStdFilter")
|
|
|
|
self.sample_and_flush(ev)
|
|
|
|
filters = ev.get_filters(flush_after=False)
|
|
|
|
time.sleep(2)
|
|
|
|
filters2 = ev.get_filters(flush_after=False)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
|
|
|
obs_f2 = filters2[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertGreaterEqual(obs_f2.rs.n, obs_f.rs.n)
|
|
|
|
self.assertGreaterEqual(obs_f2.buffer.n, obs_f.buffer.n)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_sync_filter(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2019-05-20 16:46:05 -07:00
|
|
|
policy=MockPolicy,
|
2018-06-09 00:21:35 -07:00
|
|
|
sample_async=True,
|
|
|
|
observation_filter="ConcurrentMeanStdFilter")
|
|
|
|
obs_f = self.sample_and_flush(ev)
|
|
|
|
|
|
|
|
# Current State
|
|
|
|
filters = ev.get_filters(flush_after=False)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
|
|
|
|
self.assertLessEqual(obs_f.buffer.n, 20)
|
|
|
|
|
|
|
|
new_obsf = obs_f.copy()
|
|
|
|
new_obsf.rs._n = 100
|
2019-03-26 00:27:59 -07:00
|
|
|
ev.sync_filters({DEFAULT_POLICY_ID: new_obsf})
|
2018-06-09 00:21:35 -07:00
|
|
|
filters = ev.get_filters(flush_after=False)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertGreaterEqual(obs_f.rs.n, 100)
|
|
|
|
self.assertLessEqual(obs_f.buffer.n, 20)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-04-16 16:13:45 +08:00
|
|
|
def test_extra_python_envs(self):
|
|
|
|
extra_envs = {"env_key_1": "env_value_1", "env_key_2": "env_value_2"}
|
|
|
|
self.assertFalse("env_key_1" in os.environ)
|
|
|
|
self.assertFalse("env_key_2" in os.environ)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev = RolloutWorker(
|
2020-04-16 16:13:45 +08:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
|
|
|
policy=MockPolicy,
|
|
|
|
extra_python_environs=extra_envs)
|
|
|
|
self.assertTrue("env_key_1" in os.environ)
|
|
|
|
self.assertTrue("env_key_2" in os.environ)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2020-04-16 16:13:45 +08:00
|
|
|
|
|
|
|
# reset to original
|
|
|
|
del os.environ["env_key_1"]
|
|
|
|
del os.environ["env_key_2"]
|
|
|
|
|
2020-09-04 17:17:53 -07:00
|
|
|
def test_no_env_seed(self):
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: MockVectorEnv(episode_length=20, num_envs=8),
|
|
|
|
policy=MockPolicy,
|
|
|
|
seed=1)
|
|
|
|
assert not hasattr(ev.env, "seed")
|
|
|
|
ev.stop()
|
|
|
|
|
2020-06-25 19:01:32 +02:00
|
|
|
def sample_and_flush(self, ev):
|
|
|
|
time.sleep(2)
|
|
|
|
ev.sample()
|
|
|
|
filters = ev.get_filters(flush_after=True)
|
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
|
|
|
self.assertNotEqual(obs_f.rs.n, 0)
|
|
|
|
self.assertNotEqual(obs_f.buffer.n, 0)
|
|
|
|
return obs_f
|
|
|
|
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2019-02-15 13:32:43 -08:00
|
|
|
if __name__ == "__main__":
|
2020-03-12 04:39:47 +01:00
|
|
|
import pytest
|
|
|
|
import sys
|
|
|
|
sys.exit(pytest.main(["-v", __file__]))
|