ray/examples/a3c/vectorized/wrappers.py
Richard Liaw b463d9e5c7 Initial A3C Example - PongDeterministic-v3 (#331)
* Initializing A3C code

* Modifications for Ray usage

* cleanup

* removing universe dependency

* fixes (not yet working

* hack

* documentation

* Cleanup

* Preliminary Portion

Make sure to change when merging

* RL part

* Cleaning up Driver and Worker code

* Updating driver code

* instructions...

* fixed

* Minor changes.

* Fixing cmake issues

* ray instruction

* updating port to new universe

* Fix for env.configure

* redundant commands

* Revert scipy.misc -> cv2 and raise exception for wrong gym version.
2017-03-11 00:57:53 -08:00

93 lines
2.8 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import gym
import weakref
import vectorized.vectorize_core as core
class Vectorize(gym.Wrapper):
"""
Given an unvectorized environment (where, e.g., the output of .step() is an observation
rather than a list of observations), turn it into a vectorized environment with a batch of size
1.
"""
metadata = {'runtime.vectorized': True}
def __init__(self, env):
super(Vectorize, self).__init__(env)
assert not env.metadata.get('runtime.vectorized')
assert self.metadata.get('runtime.vectorized')
self.n = 1
def _reset(self):
observation = self.env.reset()
return [observation]
def _step(self, action):
observation, reward, done, info = self.env.step(action[0])
return [observation], [reward], [done], {'n': [info]}
def _seed(self, seed):
return [self.env.seed(seed[0])]
class Unvectorize(core.Wrapper):
"""
Take a vectorized environment with a batch of size 1 and turn it into an unvectorized environment.
"""
autovectorize = False
metadata = {'runtime.vectorized': False}
def _configure(self, **kwargs):
super(Unvectorize, self)._configure(**kwargs)
if self.n != 1:
raise Exception('Can only disable vectorization with n=1, not n={}'.format(self.n))
def _reset(self):
observation_n = self.env.reset()
return observation_n[0]
def _step(self, action):
action_n = [action]
observation_n, reward_n, done_n, info = self.env.step(action_n)
return observation_n[0], reward_n[0], done_n[0], info['n'][0]
def _seed(self, seed):
return self.env.seed([seed])[0]
class WeakUnvectorize(Unvectorize):
def __init__(self, env, i):
self._env_ref = weakref.ref(env)
super(WeakUnvectorize, self).__init__(env)
# WeakUnvectorize won't get configure called on it
self.i = i
def _check_for_duplicate_wrappers(self):
pass # Disable this check because we need to wrap vectorized envs in multiple unvectorize wrappers
@property
def env(self):
# Called upon instantiation
if not hasattr(self, '_env_ref'):
return
env = self._env_ref()
if env is None:
raise Exception("env has been garbage collected. To keep using WeakUnvectorize, you must keep around a reference to the env object. (HINT: try assigning the env to a variable in your code.)")
return env
@env.setter
def env(self, value):
# We'll maintain our own weakref, thank you very much.
pass
def _seed(self, seed):
# We handle the seeding ourselves in the vectorized Monitor
return [seed]
def close(self):
# Don't want to close through this wrapper
pass