ray/rllib/examples/saving_experiences.py

"""Simple example of writing experiences to a file using JsonWriter."""

# __sphinx_doc_begin__
import gym
import numpy as np
import os

import ray.utils

from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
from ray.rllib.offline.json_writer import JsonWriter

if __name__ == "__main__":
    batch_builder = SampleBatchBuilder()  # or MultiAgentSampleBatchBuilder
    writer = JsonWriter(
        os.path.join(ray.utils.get_user_temp_dir(), "demo-out"))

    # You normally wouldn't want to manually create sample batches if a
    # simulator is available, but let's do it anyways for example purposes:
    env = gym.make("CartPole-v0")

    # RLlib uses preprocessors to implement transforms such as one-hot encoding
    # and flattening of tuple and dict observations. For CartPole a no-op
    # preprocessor is used, but this may be relevant for more complex envs.
    prep = get_preprocessor(env.observation_space)(env.observation_space)
    print("The preprocessor is", prep)

    for eps_id in range(100):
        obs = env.reset()
        prev_action = np.zeros_like(env.action_space.sample())
        prev_reward = 0
        done = False
        t = 0
        while not done:
            action = env.action_space.sample()
            new_obs, rew, done, info = env.step(action)
            batch_builder.add_values(
                t=t,
                eps_id=eps_id,
                agent_index=0,
                obs=prep.transform(obs),
                actions=action,
                action_prob=1.0,  # put the true action probability here
                rewards=rew,
                prev_actions=prev_action,
                prev_rewards=prev_reward,
                dones=done,
                infos=info,
                new_obs=prep.transform(new_obs))
            obs = new_obs
            prev_action = action
            prev_reward = rew
            t += 1
        writer.write(batch_builder.build_and_reset())
# __sphinx_doc_end__
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00			`"""Simple example of writing experiences to a file using JsonWriter."""`

			`# __sphinx_doc_begin__`
			`import gym`
			`import numpy as np`
Change /tmp to platform-specific temporary directory (#7529) 2020-03-16 18:10:14 -07:00			`import os`

			`import ray.utils`
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00
fix (#4950) 2019-06-09 19:20:55 -07:00			`from ray.rllib.models.preprocessors import get_preprocessor`
[rllib] annotate public vs developer vs private APIs (#3808) 2019-01-23 21:27:26 -08:00			`from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder`
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00			`from ray.rllib.offline.json_writer import JsonWriter`

			`if __name__ == "__main__":`
			`batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder`
Change /tmp to platform-specific temporary directory (#7529) 2020-03-16 18:10:14 -07:00			`writer = JsonWriter(`
			`os.path.join(ray.utils.get_user_temp_dir(), "demo-out"))`
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00
			`# You normally wouldn't want to manually create sample batches if a`
			`# simulator is available, but let's do it anyways for example purposes:`
			`env = gym.make("CartPole-v0")`

fix (#4950) 2019-06-09 19:20:55 -07:00			`# RLlib uses preprocessors to implement transforms such as one-hot encoding`
			`# and flattening of tuple and dict observations. For CartPole a no-op`
			`# preprocessor is used, but this may be relevant for more complex envs.`
			`prep = get_preprocessor(env.observation_space)(env.observation_space)`
			`print("The preprocessor is", prep)`

[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00			`for eps_id in range(100):`
			`obs = env.reset()`
			`prev_action = np.zeros_like(env.action_space.sample())`
			`prev_reward = 0`
			`done = False`
			`t = 0`
			`while not done:`
			`action = env.action_space.sample()`
			`new_obs, rew, done, info = env.step(action)`
			`batch_builder.add_values(`
			`t=t,`
			`eps_id=eps_id,`
			`agent_index=0,`
fix (#4950) 2019-06-09 19:20:55 -07:00			`obs=prep.transform(obs),`
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00			`actions=action,`
[rllib] Basic infrastructure for off-policy estimation (IS, WIS) (#3941) 2019-02-13 16:25:05 -08:00			`action_prob=1.0, # put the true action probability here`
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00			`rewards=rew,`
			`prev_actions=prev_action,`
			`prev_rewards=prev_reward,`
			`dones=done,`
			`infos=info,`
fix (#4950) 2019-06-09 19:20:55 -07:00			`new_obs=prep.transform(new_obs))`
[rllib] Documentation for I/O API and multi-agent support / cleanup (#3650) 2019-01-03 15:15:36 +08:00			`obs = new_obs`
			`prev_action = action`
			`prev_reward = rew`
			`t += 1`
			`writer.write(batch_builder.build_and_reset())`
			`# __sphinx_doc_end__`