mirror of
https://github.com/vale981/ray
synced 2025-03-04 17:41:43 -05:00
59 lines
2.1 KiB
Python
59 lines
2.1 KiB
Python
"""Simple example of writing experiences to a file using JsonWriter."""
|
|
|
|
# __sphinx_doc_begin__
|
|
import gym
|
|
import numpy as np
|
|
import os
|
|
|
|
import ray._private.utils
|
|
|
|
from ray.rllib.models.preprocessors import get_preprocessor
|
|
from ray.rllib.evaluation.sample_batch_builder import SampleBatchBuilder
|
|
from ray.rllib.offline.json_writer import JsonWriter
|
|
|
|
if __name__ == "__main__":
|
|
batch_builder = SampleBatchBuilder() # or MultiAgentSampleBatchBuilder
|
|
writer = JsonWriter(
|
|
os.path.join(ray._private.utils.get_user_temp_dir(), "demo-out")
|
|
)
|
|
|
|
# You normally wouldn't want to manually create sample batches if a
|
|
# simulator is available, but let's do it anyways for example purposes:
|
|
env = gym.make("CartPole-v0")
|
|
|
|
# RLlib uses preprocessors to implement transforms such as one-hot encoding
|
|
# and flattening of tuple and dict observations. For CartPole a no-op
|
|
# preprocessor is used, but this may be relevant for more complex envs.
|
|
prep = get_preprocessor(env.observation_space)(env.observation_space)
|
|
print("The preprocessor is", prep)
|
|
|
|
for eps_id in range(100):
|
|
obs = env.reset()
|
|
prev_action = np.zeros_like(env.action_space.sample())
|
|
prev_reward = 0
|
|
done = False
|
|
t = 0
|
|
while not done:
|
|
action = env.action_space.sample()
|
|
new_obs, rew, done, info = env.step(action)
|
|
batch_builder.add_values(
|
|
t=t,
|
|
eps_id=eps_id,
|
|
agent_index=0,
|
|
obs=prep.transform(obs),
|
|
actions=action,
|
|
action_prob=1.0, # put the true action probability here
|
|
action_logp=0.0,
|
|
rewards=rew,
|
|
prev_actions=prev_action,
|
|
prev_rewards=prev_reward,
|
|
dones=done,
|
|
infos=info,
|
|
new_obs=prep.transform(new_obs),
|
|
)
|
|
obs = new_obs
|
|
prev_action = action
|
|
prev_reward = rew
|
|
t += 1
|
|
writer.write(batch_builder.build_and_reset())
|
|
# __sphinx_doc_end__
|