2020-03-12 04:39:47 +01:00
|
|
|
from collections import Counter
|
2018-06-09 00:21:35 -07:00
|
|
|
import gym
|
2020-12-09 01:41:45 +01:00
|
|
|
from gym.spaces import Box, Discrete
|
2018-10-20 15:21:22 -07:00
|
|
|
import numpy as np
|
2020-04-16 16:13:45 +08:00
|
|
|
import os
|
2019-03-06 10:21:05 -08:00
|
|
|
import random
|
2021-09-02 23:02:05 -07:00
|
|
|
import tempfile
|
2018-06-09 00:21:35 -07:00
|
|
|
import time
|
|
|
|
import unittest
|
|
|
|
|
|
|
|
import ray
|
2019-04-07 00:36:18 -07:00
|
|
|
from ray.rllib.agents.pg import PGTrainer
|
|
|
|
from ray.rllib.agents.a3c import A2CTrainer
|
2021-09-02 23:02:05 -07:00
|
|
|
from ray.rllib.env.multi_agent_env import MultiAgentEnv
|
|
|
|
from ray.rllib.env.utils import VideoMonitor
|
2019-06-03 06:49:24 +08:00
|
|
|
from ray.rllib.evaluation.rollout_worker import RolloutWorker
|
2018-07-01 00:05:08 -07:00
|
|
|
from ray.rllib.evaluation.metrics import collect_metrics
|
|
|
|
from ray.rllib.evaluation.postprocessing import compute_advantages
|
2021-07-28 10:40:04 -04:00
|
|
|
from ray.rllib.examples.env.mock_env import MockEnv, MockEnv2, MockVectorEnv,\
|
|
|
|
VectorizedMockEnv
|
2021-09-02 23:02:05 -07:00
|
|
|
from ray.rllib.examples.env.multi_agent import BasicMultiAgent,\
|
|
|
|
MultiAgentCartPole
|
2020-05-12 08:23:10 +02:00
|
|
|
from ray.rllib.examples.policy.random_policy import RandomPolicy
|
2021-03-18 20:27:41 +01:00
|
|
|
from ray.rllib.execution.common import STEPS_SAMPLED_COUNTER, \
|
|
|
|
STEPS_TRAINED_COUNTER
|
2020-07-28 10:44:54 +02:00
|
|
|
from ray.rllib.policy.policy import Policy
|
2020-12-09 01:41:45 +01:00
|
|
|
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID, MultiAgentBatch, \
|
|
|
|
SampleBatch
|
2020-07-28 10:44:54 +02:00
|
|
|
from ray.rllib.utils.annotations import override
|
2020-05-27 16:19:13 +02:00
|
|
|
from ray.rllib.utils.test_utils import check, framework_iterator
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
from ray.tune.registry import register_env
|
2018-06-09 00:21:35 -07:00
|
|
|
|
|
|
|
|
2020-05-12 08:23:10 +02:00
|
|
|
class MockPolicy(RandomPolicy):
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(RandomPolicy)
|
2018-08-16 14:37:21 -07:00
|
|
|
def compute_actions(self,
|
|
|
|
obs_batch,
|
2020-01-18 07:26:28 +01:00
|
|
|
state_batches=None,
|
2018-10-20 15:21:22 -07:00
|
|
|
prev_action_batch=None,
|
|
|
|
prev_reward_batch=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
episodes=None,
|
2020-02-19 21:18:45 +01:00
|
|
|
explore=None,
|
2020-02-11 00:22:07 +01:00
|
|
|
timestep=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
**kwargs):
|
2020-04-28 14:59:16 +02:00
|
|
|
return np.array([random.choice([0, 1])] * len(obs_batch)), [], {}
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(Policy)
|
2018-10-29 19:37:27 -07:00
|
|
|
def postprocess_trajectory(self,
|
|
|
|
batch,
|
|
|
|
other_agent_batches=None,
|
|
|
|
episode=None):
|
|
|
|
assert episode is not None
|
2020-07-28 10:44:54 +02:00
|
|
|
super().postprocess_trajectory(batch, other_agent_batches, episode)
|
2020-02-01 08:25:45 +02:00
|
|
|
return compute_advantages(
|
|
|
|
batch, 100.0, 0.9, use_gae=False, use_critic=False)
|
2018-06-09 00:21:35 -07:00
|
|
|
|
|
|
|
|
2020-05-12 08:23:10 +02:00
|
|
|
class BadPolicy(RandomPolicy):
|
2020-07-28 10:44:54 +02:00
|
|
|
@override(RandomPolicy)
|
2018-08-16 14:37:21 -07:00
|
|
|
def compute_actions(self,
|
|
|
|
obs_batch,
|
2020-01-18 07:26:28 +01:00
|
|
|
state_batches=None,
|
2018-10-20 15:21:22 -07:00
|
|
|
prev_action_batch=None,
|
|
|
|
prev_reward_batch=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
episodes=None,
|
2020-02-19 21:18:45 +01:00
|
|
|
explore=None,
|
2020-02-11 00:22:07 +01:00
|
|
|
timestep=None,
|
2018-12-18 10:40:01 -08:00
|
|
|
**kwargs):
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
raise Exception("intentional error")
|
|
|
|
|
|
|
|
|
2018-11-11 01:45:37 -08:00
|
|
|
class FailOnStepEnv(gym.Env):
|
|
|
|
def __init__(self):
|
|
|
|
self.observation_space = gym.spaces.Discrete(1)
|
|
|
|
self.action_space = gym.spaces.Discrete(2)
|
|
|
|
|
|
|
|
def reset(self):
|
|
|
|
raise ValueError("kaboom")
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
raise ValueError("kaboom")
|
|
|
|
|
|
|
|
|
2019-06-03 06:49:24 +08:00
|
|
|
class TestRolloutWorker(unittest.TestCase):
|
2020-03-12 04:39:47 +01:00
|
|
|
@classmethod
|
|
|
|
def setUpClass(cls):
|
|
|
|
ray.init(num_cpus=5)
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
def tearDownClass(cls):
|
|
|
|
ray.shutdown()
|
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_basic(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2020-10-15 18:21:30 +02:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
|
|
|
policy_spec=MockPolicy)
|
2018-06-09 00:21:35 -07:00
|
|
|
batch = ev.sample()
|
2018-10-20 15:21:22 -07:00
|
|
|
for key in [
|
|
|
|
"obs", "actions", "rewards", "dones", "advantages",
|
|
|
|
"prev_rewards", "prev_actions"
|
|
|
|
]:
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertIn(key, batch)
|
2019-03-06 10:21:05 -08:00
|
|
|
self.assertGreater(np.abs(np.mean(batch[key])), 0)
|
2018-10-20 15:21:22 -07:00
|
|
|
|
|
|
|
def to_prev(vec):
|
|
|
|
out = np.zeros_like(vec)
|
|
|
|
for i, v in enumerate(vec):
|
|
|
|
if i + 1 < len(out) and not batch["dones"][i]:
|
|
|
|
out[i + 1] = v
|
|
|
|
return out.tolist()
|
|
|
|
|
|
|
|
self.assertEqual(batch["prev_rewards"].tolist(),
|
|
|
|
to_prev(batch["rewards"]))
|
|
|
|
self.assertEqual(batch["prev_actions"].tolist(),
|
|
|
|
to_prev(batch["actions"]))
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertGreater(batch["advantages"][0], 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_batch_ids(self):
|
2021-09-23 12:56:45 +02:00
|
|
|
fragment_len = 100
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2020-06-12 20:17:27 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2021-09-23 12:56:45 +02:00
|
|
|
rollout_fragment_length=fragment_len)
|
2019-04-07 12:11:30 -07:00
|
|
|
batch1 = ev.sample()
|
|
|
|
batch2 = ev.sample()
|
2021-09-23 12:56:45 +02:00
|
|
|
unroll_ids_1 = set(batch1["unroll_id"])
|
|
|
|
unroll_ids_2 = set(batch2["unroll_id"])
|
|
|
|
# Assert no overlap of unroll IDs between sample() calls.
|
|
|
|
self.assertTrue(not any(uid in unroll_ids_2 for uid in unroll_ids_1))
|
|
|
|
# CartPole episodes should be short initially: Expect more than one
|
|
|
|
# unroll ID in each batch.
|
|
|
|
self.assertTrue(len(unroll_ids_1) > 1)
|
|
|
|
self.assertTrue(len(unroll_ids_2) > 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2019-04-07 12:11:30 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_global_vars_update(self):
|
2020-10-27 10:00:24 +01:00
|
|
|
for fw in framework_iterator(frameworks=("tf2", "tf")):
|
2020-05-27 16:19:13 +02:00
|
|
|
agent = A2CTrainer(
|
|
|
|
env="CartPole-v0",
|
|
|
|
config={
|
|
|
|
"num_workers": 1,
|
2020-10-27 10:00:24 +01:00
|
|
|
# lr = 0.1 - [(0.1 - 0.000001) / 100000] * ts
|
2020-05-27 16:19:13 +02:00
|
|
|
"lr_schedule": [[0, 0.1], [100000, 0.000001]],
|
|
|
|
"framework": fw,
|
|
|
|
})
|
2020-10-27 10:00:24 +01:00
|
|
|
policy = agent.get_policy()
|
|
|
|
for i in range(3):
|
2020-05-27 16:19:13 +02:00
|
|
|
result = agent.train()
|
2021-03-18 20:27:41 +01:00
|
|
|
print("{}={}".format(STEPS_TRAINED_COUNTER,
|
|
|
|
result["info"][STEPS_TRAINED_COUNTER]))
|
|
|
|
print("{}={}".format(STEPS_SAMPLED_COUNTER,
|
|
|
|
result["info"][STEPS_SAMPLED_COUNTER]))
|
2020-10-27 10:00:24 +01:00
|
|
|
global_timesteps = policy.global_timestep
|
|
|
|
print("global_timesteps={}".format(global_timesteps))
|
|
|
|
expected_lr = \
|
|
|
|
0.1 - ((0.1 - 0.000001) / 100000) * global_timesteps
|
|
|
|
lr = policy.cur_lr
|
|
|
|
if fw == "tf":
|
2021-07-19 13:16:03 -04:00
|
|
|
lr = policy.get_session().run(lr)
|
2020-10-27 10:00:24 +01:00
|
|
|
check(lr, expected_lr, rtol=0.05)
|
2020-06-25 19:01:32 +02:00
|
|
|
agent.stop()
|
2018-08-23 17:49:10 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_no_step_on_init(self):
|
2018-11-11 01:45:37 -08:00
|
|
|
register_env("fail", lambda _: FailOnStepEnv())
|
2020-10-27 10:00:24 +01:00
|
|
|
for fw in framework_iterator():
|
2021-09-02 09:28:16 +02:00
|
|
|
# We expect this to fail already on Trainer init due
|
|
|
|
# to the env sanity check right after env creation (inside
|
|
|
|
# RolloutWorker).
|
|
|
|
self.assertRaises(Exception, lambda: PGTrainer(
|
2020-05-27 16:19:13 +02:00
|
|
|
env="fail", config={
|
2021-09-02 09:28:16 +02:00
|
|
|
"num_workers": 2,
|
2020-05-27 16:19:13 +02:00
|
|
|
"framework": fw,
|
2021-09-02 09:28:16 +02:00
|
|
|
}))
|
2018-11-11 01:45:37 -08:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_callbacks(self):
|
2020-05-27 16:19:13 +02:00
|
|
|
for fw in framework_iterator(frameworks=("torch", "tf")):
|
|
|
|
counts = Counter()
|
|
|
|
pg = PGTrainer(
|
|
|
|
env="CartPole-v0", config={
|
|
|
|
"num_workers": 0,
|
|
|
|
"rollout_fragment_length": 50,
|
|
|
|
"train_batch_size": 50,
|
|
|
|
"callbacks": {
|
|
|
|
"on_episode_start":
|
|
|
|
lambda x: counts.update({"start": 1}),
|
|
|
|
"on_episode_step":
|
|
|
|
lambda x: counts.update({"step": 1}),
|
|
|
|
"on_episode_end": lambda x: counts.update({"end": 1}),
|
|
|
|
"on_sample_end":
|
|
|
|
lambda x: counts.update({"sample": 1}),
|
|
|
|
},
|
|
|
|
"framework": fw,
|
|
|
|
})
|
|
|
|
pg.train()
|
|
|
|
pg.train()
|
|
|
|
self.assertGreater(counts["sample"], 0)
|
|
|
|
self.assertGreater(counts["start"], 0)
|
|
|
|
self.assertGreater(counts["end"], 0)
|
|
|
|
self.assertGreater(counts["step"], 0)
|
2020-06-25 19:01:32 +02:00
|
|
|
pg.stop()
|
2018-11-03 18:48:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_query_evaluators(self):
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
register_env("test", lambda _: gym.make("CartPole-v0"))
|
2020-05-27 16:19:13 +02:00
|
|
|
for fw in framework_iterator(frameworks=("torch", "tf")):
|
|
|
|
pg = PGTrainer(
|
|
|
|
env="test",
|
|
|
|
config={
|
|
|
|
"num_workers": 2,
|
|
|
|
"rollout_fragment_length": 5,
|
|
|
|
"num_envs_per_worker": 2,
|
|
|
|
"framework": fw,
|
2020-10-15 18:21:30 +02:00
|
|
|
"create_env_on_driver": True,
|
2020-05-27 16:19:13 +02:00
|
|
|
})
|
|
|
|
results = pg.workers.foreach_worker(
|
|
|
|
lambda ev: ev.rollout_fragment_length)
|
|
|
|
results2 = pg.workers.foreach_worker_with_index(
|
|
|
|
lambda ev, i: (i, ev.rollout_fragment_length))
|
|
|
|
results3 = pg.workers.foreach_worker(
|
|
|
|
lambda ev: ev.foreach_env(lambda env: 1))
|
|
|
|
self.assertEqual(results, [10, 10, 10])
|
|
|
|
self.assertEqual(results2, [(0, 10), (1, 10), (2, 10)])
|
|
|
|
self.assertEqual(results3, [[1, 1], [1, 1], [1, 1]])
|
2020-06-25 19:01:32 +02:00
|
|
|
pg.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
def test_action_clipping(self):
|
|
|
|
from ray.rllib.examples.env.random_env import RandomEnv
|
2020-08-07 16:49:49 -07:00
|
|
|
action_space = gym.spaces.Box(-2.0, 1.0, (3, ))
|
2020-07-28 10:44:54 +02:00
|
|
|
|
2021-06-30 12:32:11 +02:00
|
|
|
# Clipping: True (clip between Policy's action_space.low/high).
|
2020-07-28 10:44:54 +02:00
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=RandomPolicy,
|
2020-07-28 10:44:54 +02:00
|
|
|
policy_config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
ignore_action_bounds=True,
|
|
|
|
),
|
2021-06-30 12:32:11 +02:00
|
|
|
normalize_actions=False,
|
2020-07-28 10:44:54 +02:00
|
|
|
clip_actions=True,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev.sample()
|
|
|
|
# Check, whether the action bounds have been breached (expected).
|
|
|
|
# We still arrived here b/c we clipped according to the Env's action
|
|
|
|
# space.
|
|
|
|
self.assertGreater(np.max(sample["actions"]), action_space.high[0])
|
|
|
|
self.assertLess(np.min(sample["actions"]), action_space.low[0])
|
|
|
|
ev.stop()
|
|
|
|
|
|
|
|
# Clipping: False and RandomPolicy produces invalid actions.
|
|
|
|
# Expect Env to complain.
|
|
|
|
ev2 = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=RandomPolicy,
|
2020-07-28 10:44:54 +02:00
|
|
|
policy_config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
ignore_action_bounds=True,
|
|
|
|
),
|
2021-06-30 12:32:11 +02:00
|
|
|
# No normalization (+clipping) and no clipping ->
|
|
|
|
# Should lead to Env complaining.
|
|
|
|
normalize_actions=False,
|
|
|
|
clip_actions=False,
|
2020-07-28 10:44:54 +02:00
|
|
|
batch_mode="complete_episodes")
|
|
|
|
self.assertRaisesRegex(ValueError, r"Illegal action", ev2.sample)
|
|
|
|
ev2.stop()
|
|
|
|
|
|
|
|
# Clipping: False and RandomPolicy produces valid (bounded) actions.
|
|
|
|
# Expect "actions" in SampleBatch to be unclipped.
|
|
|
|
ev3 = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=RandomPolicy,
|
2020-07-28 10:44:54 +02:00
|
|
|
policy_config=dict(action_space=action_space),
|
|
|
|
# Should not be a problem as RandomPolicy abides to bounds.
|
2021-06-30 12:32:11 +02:00
|
|
|
normalize_actions=False,
|
2020-07-28 10:44:54 +02:00
|
|
|
clip_actions=False,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev3.sample()
|
|
|
|
self.assertGreater(np.min(sample["actions"]), action_space.low[0])
|
|
|
|
self.assertLess(np.max(sample["actions"]), action_space.high[0])
|
|
|
|
ev3.stop()
|
2021-06-30 12:32:11 +02:00
|
|
|
|
|
|
|
def test_action_normalization(self):
|
|
|
|
from ray.rllib.examples.env.random_env import RandomEnv
|
|
|
|
action_space = gym.spaces.Box(0.0001, 0.0002, (5, ))
|
|
|
|
|
|
|
|
# Normalize: True (unsquash between Policy's action_space.low/high).
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
max_episode_len=10,
|
|
|
|
p_done=0.0,
|
|
|
|
check_action_bounds=True,
|
|
|
|
)),
|
|
|
|
policy_spec=RandomPolicy,
|
|
|
|
policy_config=dict(
|
|
|
|
action_space=action_space,
|
|
|
|
ignore_action_bounds=True,
|
|
|
|
),
|
|
|
|
normalize_actions=True,
|
|
|
|
clip_actions=False,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev.sample()
|
|
|
|
# Check, whether the action bounds have been breached (expected).
|
|
|
|
# We still arrived here b/c we unsquashed according to the Env's action
|
|
|
|
# space.
|
|
|
|
self.assertGreater(np.max(sample["actions"]), action_space.high[0])
|
|
|
|
self.assertLess(np.min(sample["actions"]), action_space.low[0])
|
|
|
|
ev.stop()
|
2020-07-28 10:44:54 +02:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_reward_clipping(self):
|
2020-07-28 10:44:54 +02:00
|
|
|
# Clipping: True (clip between -1.0 and 1.0).
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-08-20 15:28:03 -07:00
|
|
|
env_creator=lambda _: MockEnv2(episode_length=10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-08-20 15:28:03 -07:00
|
|
|
clip_rewards=True,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
self.assertEqual(max(ev.sample()["rewards"]), 1)
|
|
|
|
result = collect_metrics(ev, [])
|
|
|
|
self.assertEqual(result["episode_reward_mean"], 1000)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-08-20 15:28:03 -07:00
|
|
|
|
2020-07-28 10:44:54 +02:00
|
|
|
from ray.rllib.examples.env.random_env import RandomEnv
|
|
|
|
|
|
|
|
# Clipping in certain range (-2.0, 2.0).
|
|
|
|
ev2 = RolloutWorker(
|
|
|
|
env_creator=lambda _: RandomEnv(
|
|
|
|
dict(
|
|
|
|
reward_space=gym.spaces.Box(low=-10, high=10, shape=()),
|
|
|
|
p_done=0.0,
|
|
|
|
max_episode_len=10,
|
|
|
|
)),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-07-28 10:44:54 +02:00
|
|
|
clip_rewards=2.0,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
sample = ev2.sample()
|
|
|
|
self.assertEqual(max(sample["rewards"]), 2.0)
|
|
|
|
self.assertEqual(min(sample["rewards"]), -2.0)
|
|
|
|
self.assertLess(np.mean(sample["rewards"]), 0.5)
|
|
|
|
self.assertGreater(np.mean(sample["rewards"]), -0.5)
|
|
|
|
ev2.stop()
|
|
|
|
|
|
|
|
# Clipping: Off.
|
2019-06-03 06:49:24 +08:00
|
|
|
ev2 = RolloutWorker(
|
2018-08-20 15:28:03 -07:00
|
|
|
env_creator=lambda _: MockEnv2(episode_length=10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-08-20 15:28:03 -07:00
|
|
|
clip_rewards=False,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
self.assertEqual(max(ev2.sample()["rewards"]), 100)
|
|
|
|
result2 = collect_metrics(ev2, [])
|
|
|
|
self.assertEqual(result2["episode_reward_mean"], 1000)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev2.stop()
|
2018-08-20 15:28:03 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_hard_horizon(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2020-03-12 19:03:37 +01:00
|
|
|
env_creator=lambda _: MockEnv2(episode_length=10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2019-04-02 02:44:15 -07:00
|
|
|
batch_mode="complete_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10,
|
2019-04-02 02:44:15 -07:00
|
|
|
episode_horizon=4,
|
|
|
|
soft_horizon=False)
|
|
|
|
samples = ev.sample()
|
2020-03-12 19:03:37 +01:00
|
|
|
# Three logical episodes and correct episode resets (always after 4
|
|
|
|
# steps).
|
2019-04-02 02:44:15 -07:00
|
|
|
self.assertEqual(len(set(samples["eps_id"])), 3)
|
2020-03-12 19:03:37 +01:00
|
|
|
for i in range(4):
|
|
|
|
self.assertEqual(np.argmax(samples["obs"][i]), i)
|
|
|
|
self.assertEqual(np.argmax(samples["obs"][4]), 0)
|
|
|
|
# 3 done values.
|
2019-04-02 02:44:15 -07:00
|
|
|
self.assertEqual(sum(samples["dones"]), 3)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2019-04-02 02:44:15 -07:00
|
|
|
|
2020-03-12 19:03:37 +01:00
|
|
|
# A gym env's max_episode_steps is smaller than Trainer's horizon.
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-03-12 19:03:37 +01:00
|
|
|
batch_mode="complete_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10,
|
2020-03-12 19:03:37 +01:00
|
|
|
episode_horizon=6,
|
|
|
|
soft_horizon=False)
|
|
|
|
samples = ev.sample()
|
|
|
|
# 12 steps due to `complete_episodes` batch_mode.
|
|
|
|
self.assertEqual(len(samples["eps_id"]), 12)
|
|
|
|
# Two logical episodes and correct episode resets (always after 6(!)
|
|
|
|
# steps).
|
|
|
|
self.assertEqual(len(set(samples["eps_id"])), 2)
|
|
|
|
# 2 done values after 6 and 12 steps.
|
|
|
|
check(samples["dones"], [
|
|
|
|
False, False, False, False, False, True, False, False, False,
|
|
|
|
False, False, True
|
|
|
|
])
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2020-03-12 19:03:37 +01:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_soft_horizon(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2019-04-02 02:44:15 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2019-04-02 02:44:15 -07:00
|
|
|
batch_mode="complete_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10,
|
2019-04-02 02:44:15 -07:00
|
|
|
episode_horizon=4,
|
|
|
|
soft_horizon=True)
|
|
|
|
samples = ev.sample()
|
|
|
|
# three logical episodes
|
|
|
|
self.assertEqual(len(set(samples["eps_id"])), 3)
|
|
|
|
# only 1 hard done value
|
|
|
|
self.assertEqual(sum(samples["dones"]), 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2019-04-02 02:44:15 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_metrics(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-07-19 15:30:36 -07:00
|
|
|
batch_mode="complete_episodes")
|
2019-06-03 06:49:24 +08:00
|
|
|
remote_ev = RolloutWorker.as_remote().remote(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-07-19 15:30:36 -07:00
|
|
|
batch_mode="complete_episodes")
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
ev.sample()
|
|
|
|
ray.get(remote_ev.sample.remote())
|
|
|
|
result = collect_metrics(ev, [remote_ev])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 20)
|
2018-08-07 12:17:44 -07:00
|
|
|
self.assertEqual(result["episode_reward_mean"], 10)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_async(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
sample_async=True,
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
for key in ["obs", "actions", "rewards", "dones", "advantages"]:
|
|
|
|
self.assertIn(key, batch)
|
|
|
|
self.assertGreater(batch["advantages"][0], 1)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_auto_vectorization(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-08-01 16:29:27 -07:00
|
|
|
env_creator=lambda cfg: MockEnv(episode_length=20, config=cfg),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="truncate_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=2,
|
2018-07-19 15:30:36 -07:00
|
|
|
num_envs=8)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 16)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 0)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 16)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 8)
|
2018-08-01 16:29:27 -07:00
|
|
|
indices = []
|
|
|
|
for env in ev.async_env.vector_env.envs:
|
|
|
|
self.assertEqual(env.unwrapped.config.worker_index, 0)
|
|
|
|
indices.append(env.unwrapped.config.vector_index)
|
|
|
|
self.assertEqual(indices, [0, 1, 2, 3, 4, 5, 6, 7])
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_batches_larger_when_vectorized(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(episode_length=8),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="truncate_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=4,
|
2018-07-19 15:30:36 -07:00
|
|
|
num_envs=4)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 16)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 0)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch = ev.sample()
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 4)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_vector_env_support(self):
|
2021-07-28 10:40:04 -04:00
|
|
|
# Test a vector env that contains 8 actual envs
|
|
|
|
# (MockEnv instances).
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2021-07-28 10:40:04 -04:00
|
|
|
env_creator=(
|
|
|
|
lambda _: VectorizedMockEnv(episode_length=20, num_envs=8)),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="truncate_episodes",
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=10)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 0)
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
result = collect_metrics(ev, [])
|
2018-09-30 01:15:13 -07:00
|
|
|
self.assertEqual(result["episodes_this_iter"], 8)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2021-07-28 10:40:04 -04:00
|
|
|
# Test a vector env that pretends(!) to contain 4 envs, but actually
|
|
|
|
# only has 1 (CartPole).
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=(lambda _: MockVectorEnv(20, mocked_num_envs=4)),
|
|
|
|
policy_spec=MockPolicy,
|
|
|
|
batch_mode="truncate_episodes",
|
|
|
|
rollout_fragment_length=10)
|
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
result = collect_metrics(ev, [])
|
|
|
|
self.assertGreater(result["episodes_this_iter"], 3)
|
|
|
|
for _ in range(8):
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
result = collect_metrics(ev, [])
|
|
|
|
self.assertGreater(result["episodes_this_iter"], 7)
|
|
|
|
ev.stop()
|
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_truncate_episodes(self):
|
2020-12-09 01:41:45 +01:00
|
|
|
ev_env_steps = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=15,
|
2018-06-09 00:21:35 -07:00
|
|
|
batch_mode="truncate_episodes")
|
2020-12-09 01:41:45 +01:00
|
|
|
batch = ev_env_steps.sample()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.assertEqual(batch.count, 15)
|
2020-12-09 01:41:45 +01:00
|
|
|
self.assertTrue(isinstance(batch, SampleBatch))
|
|
|
|
ev_env_steps.stop()
|
|
|
|
|
|
|
|
action_space = Discrete(2)
|
|
|
|
obs_space = Box(float("-inf"), float("inf"), (4, ), dtype=np.float32)
|
|
|
|
ev_agent_steps = RolloutWorker(
|
|
|
|
env_creator=lambda _: MultiAgentCartPole({"num_agents": 4}),
|
|
|
|
policy_spec={
|
|
|
|
"pol0": (MockPolicy, obs_space, action_space, {}),
|
|
|
|
"pol1": (MockPolicy, obs_space, action_space, {}),
|
|
|
|
},
|
2021-06-21 13:46:01 +02:00
|
|
|
policy_mapping_fn=lambda agent_id, episode, **kwargs:
|
|
|
|
"pol0" if agent_id == 0 else "pol1",
|
2020-12-09 01:41:45 +01:00
|
|
|
rollout_fragment_length=301,
|
|
|
|
count_steps_by="env_steps",
|
|
|
|
batch_mode="truncate_episodes",
|
|
|
|
)
|
|
|
|
batch = ev_agent_steps.sample()
|
|
|
|
self.assertTrue(isinstance(batch, MultiAgentBatch))
|
|
|
|
self.assertGreater(batch.agent_steps(), 301)
|
|
|
|
self.assertEqual(batch.env_steps(), 301)
|
|
|
|
ev_agent_steps.stop()
|
|
|
|
|
|
|
|
ev_agent_steps = RolloutWorker(
|
|
|
|
env_creator=lambda _: MultiAgentCartPole({"num_agents": 4}),
|
|
|
|
policy_spec={
|
|
|
|
"pol0": (MockPolicy, obs_space, action_space, {}),
|
|
|
|
"pol1": (MockPolicy, obs_space, action_space, {}),
|
|
|
|
},
|
2021-06-21 13:46:01 +02:00
|
|
|
policy_mapping_fn=lambda agent_id, episode, **kwargs:
|
|
|
|
"pol0" if agent_id == 0 else "pol1",
|
2020-12-09 01:41:45 +01:00
|
|
|
rollout_fragment_length=301,
|
|
|
|
count_steps_by="agent_steps",
|
|
|
|
batch_mode="truncate_episodes")
|
|
|
|
batch = ev_agent_steps.sample()
|
|
|
|
self.assertTrue(isinstance(batch, MultiAgentBatch))
|
|
|
|
self.assertLess(batch.env_steps(), 301)
|
|
|
|
# When counting agent steps, the count may be slightly larger than
|
|
|
|
# rollout_fragment_length, b/c we have up to N agents stepping in each
|
|
|
|
# env step and we only check, whether we should build after each env
|
|
|
|
# step.
|
|
|
|
self.assertGreaterEqual(batch.agent_steps(), 301)
|
|
|
|
ev_agent_steps.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_complete_episodes(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=5,
|
2018-06-09 00:21:35 -07:00
|
|
|
batch_mode="complete_episodes")
|
|
|
|
batch = ev.sample()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.assertEqual(batch.count, 10)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_complete_episodes_packing(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-03-14 12:05:04 -07:00
|
|
|
rollout_fragment_length=15,
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
batch_mode="complete_episodes")
|
2018-06-09 00:21:35 -07:00
|
|
|
batch = ev.sample()
|
[rllib] Envs for vectorized execution, async execution, and policy serving (#2170)
## What do these changes do?
**Vectorized envs**: Users can either implement `VectorEnv`, or alternatively set `num_envs=N` to auto-vectorize gym envs (this vectorizes just the action computation part).
```
# CartPole-v0 on single core with 64x64 MLP:
# vector_width=1:
Actions per second 2720.1284458322966
# vector_width=8:
Actions per second 13773.035334888269
# vector_width=64:
Actions per second 37903.20472563333
```
**Async envs**: The more general form of `VectorEnv` is `AsyncVectorEnv`, which allows agents to execute out of lockstep. We use this as an adapter to support `ServingEnv`. Since we can convert any other form of env to `AsyncVectorEnv`, utils.sampler has been rewritten to run against this interface.
**Policy serving**: This provides an env which is not stepped. Rather, the env executes in its own thread, querying the policy for actions via `self.get_action(obs)`, and reporting results via `self.log_returns(rewards)`. We also support logging of off-policy actions via `self.log_action(obs, action)`. This is a more convenient API for some use cases, and also provides parallelizable support for policy serving (for example, if you start a HTTP server in the env) and ingest of offline logs (if the env reads from serving logs).
Any of these types of envs can be passed to RLlib agents. RLlib handles conversions internally in CommonPolicyEvaluator, for example:
```
gym.Env => rllib.VectorEnv => rllib.AsyncVectorEnv
rllib.ServingEnv => rllib.AsyncVectorEnv
```
2018-06-18 11:55:32 -07:00
|
|
|
self.assertEqual(batch.count, 20)
|
2018-06-23 18:32:16 -07:00
|
|
|
self.assertEqual(
|
|
|
|
batch["t"].tolist(),
|
|
|
|
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_filter_sync(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-06-09 00:21:35 -07:00
|
|
|
sample_async=True,
|
|
|
|
observation_filter="ConcurrentMeanStdFilter")
|
|
|
|
time.sleep(2)
|
|
|
|
ev.sample()
|
|
|
|
filters = ev.get_filters(flush_after=True)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertNotEqual(obs_f.rs.n, 0)
|
|
|
|
self.assertNotEqual(obs_f.buffer.n, 0)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_get_filters(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-06-09 00:21:35 -07:00
|
|
|
sample_async=True,
|
|
|
|
observation_filter="ConcurrentMeanStdFilter")
|
|
|
|
self.sample_and_flush(ev)
|
|
|
|
filters = ev.get_filters(flush_after=False)
|
|
|
|
time.sleep(2)
|
|
|
|
filters2 = ev.get_filters(flush_after=False)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
|
|
|
obs_f2 = filters2[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertGreaterEqual(obs_f2.rs.n, obs_f.rs.n)
|
|
|
|
self.assertGreaterEqual(obs_f2.buffer.n, obs_f.buffer.n)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-01-18 07:26:28 +01:00
|
|
|
def test_sync_filter(self):
|
2019-06-03 06:49:24 +08:00
|
|
|
ev = RolloutWorker(
|
2018-06-09 00:21:35 -07:00
|
|
|
env_creator=lambda _: gym.make("CartPole-v0"),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2018-06-09 00:21:35 -07:00
|
|
|
sample_async=True,
|
|
|
|
observation_filter="ConcurrentMeanStdFilter")
|
|
|
|
obs_f = self.sample_and_flush(ev)
|
|
|
|
|
|
|
|
# Current State
|
|
|
|
filters = ev.get_filters(flush_after=False)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
|
|
|
|
self.assertLessEqual(obs_f.buffer.n, 20)
|
|
|
|
|
|
|
|
new_obsf = obs_f.copy()
|
|
|
|
new_obsf.rs._n = 100
|
2019-03-26 00:27:59 -07:00
|
|
|
ev.sync_filters({DEFAULT_POLICY_ID: new_obsf})
|
2018-06-09 00:21:35 -07:00
|
|
|
filters = ev.get_filters(flush_after=False)
|
2019-03-26 00:27:59 -07:00
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
2018-06-09 00:21:35 -07:00
|
|
|
self.assertGreaterEqual(obs_f.rs.n, 100)
|
|
|
|
self.assertLessEqual(obs_f.buffer.n, 20)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2020-04-16 16:13:45 +08:00
|
|
|
def test_extra_python_envs(self):
|
|
|
|
extra_envs = {"env_key_1": "env_value_1", "env_key_2": "env_value_2"}
|
|
|
|
self.assertFalse("env_key_1" in os.environ)
|
|
|
|
self.assertFalse("env_key_2" in os.environ)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev = RolloutWorker(
|
2020-04-16 16:13:45 +08:00
|
|
|
env_creator=lambda _: MockEnv(10),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-04-16 16:13:45 +08:00
|
|
|
extra_python_environs=extra_envs)
|
|
|
|
self.assertTrue("env_key_1" in os.environ)
|
|
|
|
self.assertTrue("env_key_2" in os.environ)
|
2020-06-25 19:01:32 +02:00
|
|
|
ev.stop()
|
2020-04-16 16:13:45 +08:00
|
|
|
|
|
|
|
# reset to original
|
|
|
|
del os.environ["env_key_1"]
|
|
|
|
del os.environ["env_key_2"]
|
|
|
|
|
2020-09-04 17:17:53 -07:00
|
|
|
def test_no_env_seed(self):
|
|
|
|
ev = RolloutWorker(
|
2021-07-28 10:40:04 -04:00
|
|
|
env_creator=lambda _: MockVectorEnv(20, mocked_num_envs=8),
|
2020-10-15 18:21:30 +02:00
|
|
|
policy_spec=MockPolicy,
|
2020-09-04 17:17:53 -07:00
|
|
|
seed=1)
|
|
|
|
assert not hasattr(ev.env, "seed")
|
|
|
|
ev.stop()
|
|
|
|
|
2021-08-26 02:32:58 -07:00
|
|
|
def test_multi_env_seed(self):
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: MockEnv2(100),
|
|
|
|
num_envs=3,
|
|
|
|
policy_spec=MockPolicy,
|
|
|
|
seed=1)
|
2021-09-02 23:02:05 -07:00
|
|
|
# Make sure we can properly sample from the wrapped env.
|
|
|
|
ev.sample()
|
|
|
|
# Make sure all environments got a different deterministic seed.
|
2021-08-26 02:32:58 -07:00
|
|
|
seeds = ev.foreach_env(lambda env: env.rng_seed)
|
2021-09-01 21:57:46 -07:00
|
|
|
self.assertEqual(seeds, [1, 2, 3])
|
2021-08-26 02:32:58 -07:00
|
|
|
ev.stop()
|
|
|
|
|
2021-09-02 23:02:05 -07:00
|
|
|
def test_wrap_multi_agent_env(self):
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: BasicMultiAgent(10),
|
|
|
|
policy_spec=MockPolicy,
|
|
|
|
policy_config={
|
|
|
|
"in_evaluation": False,
|
|
|
|
},
|
|
|
|
record_env=tempfile.gettempdir())
|
|
|
|
# Make sure we can properly sample from the wrapped env.
|
|
|
|
ev.sample()
|
|
|
|
# Make sure the resulting environment is indeed still an
|
|
|
|
# instance of MultiAgentEnv and VideoMonitor.
|
|
|
|
self.assertTrue(isinstance(ev.env.unwrapped, MultiAgentEnv))
|
|
|
|
self.assertTrue(isinstance(ev.env, gym.Env))
|
|
|
|
self.assertTrue(isinstance(ev.env, VideoMonitor))
|
|
|
|
ev.stop()
|
|
|
|
|
2021-10-12 16:21:02 +02:00
|
|
|
def test_no_training(self):
|
|
|
|
class NoTrainingEnv(MockEnv):
|
|
|
|
def __init__(self, episode_length, training_enabled):
|
|
|
|
super(NoTrainingEnv, self).__init__(episode_length)
|
|
|
|
self.training_enabled = training_enabled
|
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
obs, rew, done, info = super(NoTrainingEnv, self).step(action)
|
|
|
|
return obs, rew, done, {
|
|
|
|
**info, "training_enabled": self.training_enabled
|
|
|
|
}
|
|
|
|
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: NoTrainingEnv(10, True),
|
|
|
|
policy_spec=MockPolicy,
|
|
|
|
rollout_fragment_length=5,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertEqual(batch.count, 10)
|
|
|
|
self.assertEqual(len(batch["obs"]), 10)
|
|
|
|
ev.stop()
|
|
|
|
|
|
|
|
ev = RolloutWorker(
|
|
|
|
env_creator=lambda _: NoTrainingEnv(10, False),
|
|
|
|
policy_spec=MockPolicy,
|
|
|
|
rollout_fragment_length=5,
|
|
|
|
batch_mode="complete_episodes")
|
|
|
|
batch = ev.sample()
|
|
|
|
self.assertTrue(isinstance(batch, MultiAgentBatch))
|
|
|
|
self.assertEqual(len(batch.policy_batches), 0)
|
|
|
|
ev.stop()
|
|
|
|
|
2020-06-25 19:01:32 +02:00
|
|
|
def sample_and_flush(self, ev):
|
|
|
|
time.sleep(2)
|
|
|
|
ev.sample()
|
|
|
|
filters = ev.get_filters(flush_after=True)
|
|
|
|
obs_f = filters[DEFAULT_POLICY_ID]
|
|
|
|
self.assertNotEqual(obs_f.rs.n, 0)
|
|
|
|
self.assertNotEqual(obs_f.buffer.n, 0)
|
|
|
|
return obs_f
|
|
|
|
|
2018-06-09 00:21:35 -07:00
|
|
|
|
2019-02-15 13:32:43 -08:00
|
|
|
if __name__ == "__main__":
|
2020-03-12 04:39:47 +01:00
|
|
|
import pytest
|
|
|
|
import sys
|
|
|
|
sys.exit(pytest.main(["-v", __file__]))
|