This commit is contained in:
Eric Liang 2019-06-09 19:20:55 -07:00 committed by Jones Wong
parent 671c0f769e
commit 4f8e100fe0

View file

@ -7,6 +7,7 @@ from __future__ import print_function
import gym
import numpy as np
from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter
@ -18,6 +19,12 @@ if __name__ == "__main__":
# simulator is available, but let's do it anyways for example purposes:
env = gym.make("CartPole-v0")
# RLlib uses preprocessors to implement transforms such as one-hot encoding
# and flattening of tuple and dict observations. For CartPole a no-op
# preprocessor is used, but this may be relevant for more complex envs.
prep = get_preprocessor(env.observation_space)(env.observation_space)
print("The preprocessor is", prep)
for eps_id in range(100):
obs = env.reset()
prev_action = np.zeros_like(env.action_space.sample())
@ -31,7 +38,7 @@ if __name__ == "__main__":
t=t,
eps_id=eps_id,
agent_index=0,
obs=obs,
obs=prep.transform(obs),
actions=action,
action_prob=1.0, # put the true action probability here
rewards=rew,
@ -39,7 +46,7 @@ if __name__ == "__main__":
prev_rewards=prev_reward,
dones=done,
infos=info,
new_obs=new_obs)
new_obs=prep.transform(new_obs))
obs = new_obs
prev_action = action
prev_reward = rew