ray/rllib/examples/saving_experiences.py
Sven 60d4d5e1aa Remove future imports (#6724)
* Remove all __future__ imports from RLlib.

* Remove (object) again from tf_run_builder.py::TFRunBuilder.

* Fix 2xLINT warnings.

* Fix broken appo_policy import (must be appo_tf_policy)

* Remove future imports from all other ray files (not just RLlib).

* Remove future imports from all other ray files (not just RLlib).

* Remove future import blocks that contain `unicode_literals` as well.
Revert appo_tf_policy.py to appo_policy.py (belongs to another PR).

* Add two empty lines before Schedule class.

* Put back __future__ imports into determine_tests_to_run.py. Fails otherwise on a py2/print related error.
2020-01-09 00:15:48 -08:00

52 lines
1.9 KiB
Python

"""Simple example of writing experiences to a file using JsonWriter."""
# __sphinx_doc_begin__
import gym
import numpy as np
from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter
if __name__ == "__main__":
batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder
writer = JsonWriter("/tmp/demo-out")
# You normally wouldn't want to manually create sample batches if a
# simulator is available, but let's do it anyways for example purposes:
env = gym.make("CartPole-v0")
# RLlib uses preprocessors to implement transforms such as one-hot encoding
# and flattening of tuple and dict observations. For CartPole a no-op
# preprocessor is used, but this may be relevant for more complex envs.
prep = get_preprocessor(env.observation_space)(env.observation_space)
print("The preprocessor is", prep)
for eps_id in range(100):
obs = env.reset()
prev_action = np.zeros_like(env.action_space.sample())
prev_reward = 0
done = False
t = 0
while not done:
action = env.action_space.sample()
new_obs, rew, done, info = env.step(action)
batch_builder.add_values(
t=t,
eps_id=eps_id,
agent_index=0,
obs=prep.transform(obs),
actions=action,
action_prob=1.0, # put the true action probability here
rewards=rew,
prev_actions=prev_action,
prev_rewards=prev_reward,
dones=done,
infos=info,
new_obs=prep.transform(new_obs))
obs = new_obs
prev_action = action
prev_reward = rew
t += 1
writer.write(batch_builder.build_and_reset())
# __sphinx_doc_end__