mirror of
https://github.com/vale981/ray
synced 2025-03-06 10:31:39 -05:00
fix (#4950)
This commit is contained in:
parent
671c0f769e
commit
4f8e100fe0
1 changed files with 9 additions and 2 deletions
|
@ -7,6 +7,7 @@ from __future__ import print_function
|
|||
import gym
|
||||
import numpy as np
|
||||
|
||||
from ray.rllib.models.preprocessors import get_preprocessor
|
||||
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
|
||||
from ray.rllib.offline.json_writer import JsonWriter
|
||||
|
||||
|
@ -18,6 +19,12 @@ if __name__ == "__main__":
|
|||
# simulator is available, but let's do it anyways for example purposes:
|
||||
env = gym.make("CartPole-v0")
|
||||
|
||||
# RLlib uses preprocessors to implement transforms such as one-hot encoding
|
||||
# and flattening of tuple and dict observations. For CartPole a no-op
|
||||
# preprocessor is used, but this may be relevant for more complex envs.
|
||||
prep = get_preprocessor(env.observation_space)(env.observation_space)
|
||||
print("The preprocessor is", prep)
|
||||
|
||||
for eps_id in range(100):
|
||||
obs = env.reset()
|
||||
prev_action = np.zeros_like(env.action_space.sample())
|
||||
|
@ -31,7 +38,7 @@ if __name__ == "__main__":
|
|||
t=t,
|
||||
eps_id=eps_id,
|
||||
agent_index=0,
|
||||
obs=obs,
|
||||
obs=prep.transform(obs),
|
||||
actions=action,
|
||||
action_prob=1.0, # put the true action probability here
|
||||
rewards=rew,
|
||||
|
@ -39,7 +46,7 @@ if __name__ == "__main__":
|
|||
prev_rewards=prev_reward,
|
||||
dones=done,
|
||||
infos=info,
|
||||
new_obs=new_obs)
|
||||
new_obs=prep.transform(new_obs))
|
||||
obs = new_obs
|
||||
prev_action = action
|
||||
prev_reward = rew
|
||||
|
|
Loading…
Add table
Reference in a new issue