[rllib] Improve error message for bad envs, add remote env docs (#4044)

* commit

* fix up rew
This commit is contained in:
Eric Liang 2019-02-18 01:28:19 -08:00 committed by GitHub
parent b78d77257b
commit f8bef004da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 2 deletions

View file

@ -115,6 +115,8 @@ Vectorized
RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv <https://github.com/ray-project/ray/blob/master/python/ray/rllib/env/vector_env.py>`__ to implement ``vector_step()`` and ``vector_reset()``.
Note that auto-vectorization only applies to policy inference by default. This means that policy inference will be batched, but your envs will still be stepped one at a time. If you would like your envs to be stepped in parallel, you can set ``"remote_worker_envs": True``. This will create env instances in Ray actors and step them in parallel. These remote processes introduce communication overheads, so this only helps if your env is very expensive to step.
Multi-Agent and Hierarchical
----------------------------

View file

@ -3,6 +3,7 @@ from __future__ import division
from __future__ import print_function
import logging
import numpy as np
import ray
from ray.rllib.utils.annotations import override, PublicAPI
@ -111,9 +112,16 @@ class _VectorizedGymEnv(VectorEnv):
def vector_step(self, actions):
obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
for i in range(self.num_envs):
obs, rew, done, info = self.envs[i].step(actions[i])
obs, r, done, info = self.envs[i].step(actions[i])
if not np.isscalar(r) or not np.isreal(r) or not np.isfinite(r):
raise ValueError(
"Reward should be finite scalar, got {} ({})".format(
r, type(r)))
if type(info) is not dict:
raise ValueError("Info should be a dict, got {} ({})".format(
info, type(info)))
obs_batch.append(obs)
rew_batch.append(rew)
rew_batch.append(r)
done_batch.append(done)
info_batch.append(info)
return obs_batch, rew_batch, done_batch, info_batch