ray/examples/a3c/vectorized/monitoring.py
Richard Liaw b463d9e5c7 Initial A3C Example - PongDeterministic-v3 (#331)
* Initializing A3C code

* Modifications for Ray usage

* cleanup

* removing universe dependency

* fixes (not yet working

* hack

* documentation

* Cleanup

* Preliminary Portion

Make sure to change when merging

* RL part

* Cleaning up Driver and Worker code

* Updating driver code

* instructions...

* fixed

* Minor changes.

* Fixing cmake issues

* ray instruction

* updating port to new universe

* Fix for env.configure

* redundant commands

* Revert scipy.misc -> cv2 and raise exception for wrong gym version.
2017-03-11 00:57:53 -08:00

56 lines
2.3 KiB
Python

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import weakref
from gym import monitoring
class Monitor(object):
def __init__(self, env_n):
"""env_n is a collection of unvectorized envs"""
self.monitor_n = [monitoring.Monitor(env) for env in env_n]
@property
def env(self):
# The real env is the first unwrapped env. Maybe we should
# maintain our own weakref rather than doing this.
return self.monitor_n[0].env.env
def start(self, directory, video_callable=None, seed_n=None, force=False,
resume=False, write_upon_reset=False, uid=None):
if seed_n is None:
seed_n = [None] * len(self.monitor_n)
# There's way to seed just one of the vectorized environments,
# so we have to do the seeding ourselves outside of the
# underlying monitor instances.
#
# The monitor will call the .seed method on the
# WeakUnvectorized env, which just returns rather than
# actually re-seeding the env.
self.env.seed(seed_n)
for i, monitor in enumerate(self.monitor_n):
# Only allow recording of video in first monitor
if i > 0:
video_callable = False
# Seed gets passed in but just recorded, not used.
monitor.start(directory=directory, video_callable=video_callable,
force=force, resume=resume, write_upon_reset=write_upon_reset, uid=uid)
def close(self, *args, **kwargs):
[monitor.close(*args, **kwargs) for monitor in self.monitor_n]
def _before_reset(self):
return [monitor._before_reset() for monitor in self.monitor_n]
def _after_reset(self, observation_n):
assert len(observation_n) == len(self.monitor_n)
return [monitor._after_reset(observation) for monitor, observation in zip(self.monitor_n, observation_n)]
def _before_step(self, action_n):
assert len(action_n) == len(self.monitor_n)
return [monitor._before_step(action) for monitor, action in zip(self.monitor_n, action_n)]
def _after_step(self, observation_n, reward_n, done_n, info):
return [monitor._after_step(o, r, d, i) for monitor, o, r, d, i in zip(self.monitor_n, observation_n, reward_n, done_n, info['n'])]