fix (#4950)

2025-03-06 10:31:39 -05:00 · 2019-06-09 19:20:55 -07:00 · 2019-06-09 19:20:55 -07:00 · 4f8e100fe0
commit 4f8e100fe0
parent 671c0f769e
1 changed files with 9 additions and 2 deletions
--- a/python/ray/rllib/examples/saving_experiences.py
+++ b/python/ray/rllib/examples/saving_experiences.py
@ -7,6 +7,7 @@ from __future__ import print_function
 import gym
 import numpy as np

+from ray.rllib.models.preprocessors import get_preprocessor
 from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
 from ray.rllib.offline.json_writer import JsonWriter

@ -18,6 +19,12 @@ if __name__ == "__main__":
    # simulator is available, but let's do it anyways for example purposes:
    env = gym.make("CartPole-v0")

+    # RLlib uses preprocessors to implement transforms such as one-hot encoding
+    # and flattening of tuple and dict observations. For CartPole a no-op
+    # preprocessor is used, but this may be relevant for more complex envs.
+    prep = get_preprocessor(env.observation_space)(env.observation_space)
+    print("The preprocessor is", prep)
+
    for eps_id in range(100):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
@ -31,7 +38,7 @@ if __name__ == "__main__":
                t=t,
                eps_id=eps_id,
                agent_index=0,
-                obs=obs,
+                obs=prep.transform(obs),
                actions=action,
                action_prob=1.0,  # put the true action probability here
                rewards=rew,
@ -39,7 +46,7 @@ if __name__ == "__main__":
                prev_rewards=prev_reward,
                dones=done,
                infos=info,
-                new_obs=new_obs)
+                new_obs=prep.transform(new_obs))
            obs = new_obs
            prev_action = action
            prev_reward = rew