mirror of
https://github.com/vale981/ray
synced 2025-03-05 18:11:42 -05:00
[rllib] Improve error message for bad envs, add remote env docs (#4044)
* commit * fix up rew
This commit is contained in:
parent
b78d77257b
commit
f8bef004da
2 changed files with 12 additions and 2 deletions
|
@ -115,6 +115,8 @@ Vectorized
|
|||
|
||||
RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv <https://github.com/ray-project/ray/blob/master/python/ray/rllib/env/vector_env.py>`__ to implement ``vector_step()`` and ``vector_reset()``.
|
||||
|
||||
Note that auto-vectorization only applies to policy inference by default. This means that policy inference will be batched, but your envs will still be stepped one at a time. If you would like your envs to be stepped in parallel, you can set ``"remote_worker_envs": True``. This will create env instances in Ray actors and step them in parallel. These remote processes introduce communication overheads, so this only helps if your env is very expensive to step.
|
||||
|
||||
Multi-Agent and Hierarchical
|
||||
----------------------------
|
||||
|
||||
|
|
12
python/ray/rllib/env/vector_env.py
vendored
12
python/ray/rllib/env/vector_env.py
vendored
|
@ -3,6 +3,7 @@ from __future__ import division
|
|||
from __future__ import print_function
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
|
||||
import ray
|
||||
from ray.rllib.utils.annotations import override, PublicAPI
|
||||
|
@ -111,9 +112,16 @@ class _VectorizedGymEnv(VectorEnv):
|
|||
def vector_step(self, actions):
|
||||
obs_batch, rew_batch, done_batch, info_batch = [], [], [], []
|
||||
for i in range(self.num_envs):
|
||||
obs, rew, done, info = self.envs[i].step(actions[i])
|
||||
obs, r, done, info = self.envs[i].step(actions[i])
|
||||
if not np.isscalar(r) or not np.isreal(r) or not np.isfinite(r):
|
||||
raise ValueError(
|
||||
"Reward should be finite scalar, got {} ({})".format(
|
||||
r, type(r)))
|
||||
if type(info) is not dict:
|
||||
raise ValueError("Info should be a dict, got {} ({})".format(
|
||||
info, type(info)))
|
||||
obs_batch.append(obs)
|
||||
rew_batch.append(rew)
|
||||
rew_batch.append(r)
|
||||
done_batch.append(done)
|
||||
info_batch.append(info)
|
||||
return obs_batch, rew_batch, done_batch, info_batch
|
||||
|
|
Loading…
Add table
Reference in a new issue