ray/rllib/tests/test_io.py
Sven 60d4d5e1aa Remove future imports (#6724)
* Remove all __future__ imports from RLlib.

* Remove (object) again from tf_run_builder.py::TFRunBuilder.

* Fix 2xLINT warnings.

* Fix broken appo_policy import (must be appo_tf_policy)

* Remove future imports from all other ray files (not just RLlib).

* Remove future imports from all other ray files (not just RLlib).

* Remove future import blocks that contain `unicode_literals` as well.
Revert appo_tf_policy.py to appo_policy.py (belongs to another PR).

* Add two empty lines before Schedule class.

* Put back __future__ imports into determine_tests_to_run.py. Fails otherwise on a py2/print related error.
2020-01-09 00:15:48 -08:00

321 lines
10 KiB
Python

import glob
import gym
import json
import numpy as np
import os
import random
import shutil
import tempfile
import time
import unittest
import ray
from ray.rllib.agents.pg import PGTrainer
from ray.rllib.agents.pg.pg_tf_policy import PGTFPolicy
from ray.rllib.evaluation import SampleBatch
from ray.rllib.offline import IOContext, JsonWriter, JsonReader
from ray.rllib.offline.json_writer import _to_json
from ray.rllib.tests.test_multi_agent_env import MultiCartpole
from ray.tune.registry import register_env
SAMPLES = SampleBatch({
"actions": np.array([1, 2, 3, 4]),
"obs": np.array([4, 5, 6, 7]),
"eps_id": [1, 1, 2, 3],
})
def make_sample_batch(i):
return SampleBatch({
"actions": np.array([i, i, i]),
"obs": np.array([i, i, i])
})
class AgentIOTest(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.test_dir)
def writeOutputs(self, output):
agent = PGTrainer(
env="CartPole-v0",
config={
"output": output,
"sample_batch_size": 250,
})
agent.train()
return agent
def testAgentOutputOk(self):
self.writeOutputs(self.test_dir)
self.assertEqual(len(os.listdir(self.test_dir)), 1)
reader = JsonReader(self.test_dir + "/*.json")
reader.next()
def testAgentOutputLogdir(self):
agent = self.writeOutputs("logdir")
self.assertEqual(len(glob.glob(agent.logdir + "/output-*.json")), 1)
def testAgentInputDir(self):
self.writeOutputs(self.test_dir)
agent = PGTrainer(
env="CartPole-v0",
config={
"input": self.test_dir,
"input_evaluation": [],
})
result = agent.train()
self.assertEqual(result["timesteps_total"], 250) # read from input
self.assertTrue(np.isnan(result["episode_reward_mean"]))
def testSplitByEpisode(self):
splits = SAMPLES.split_by_episode()
self.assertEqual(len(splits), 3)
self.assertEqual(splits[0].count, 2)
self.assertEqual(splits[1].count, 1)
self.assertEqual(splits[2].count, 1)
def testAgentInputPostprocessingEnabled(self):
self.writeOutputs(self.test_dir)
# Rewrite the files to drop advantages and value_targets for testing
for path in glob.glob(self.test_dir + "/*.json"):
out = []
for line in open(path).readlines():
data = json.loads(line)
del data["advantages"]
del data["value_targets"]
out.append(data)
with open(path, "w") as f:
for data in out:
f.write(json.dumps(data))
agent = PGTrainer(
env="CartPole-v0",
config={
"input": self.test_dir,
"input_evaluation": [],
"postprocess_inputs": True, # adds back 'advantages'
})
result = agent.train()
self.assertEqual(result["timesteps_total"], 250) # read from input
self.assertTrue(np.isnan(result["episode_reward_mean"]))
def testAgentInputEvalSim(self):
self.writeOutputs(self.test_dir)
agent = PGTrainer(
env="CartPole-v0",
config={
"input": self.test_dir,
"input_evaluation": ["simulation"],
})
for _ in range(50):
result = agent.train()
if not np.isnan(result["episode_reward_mean"]):
return # simulation ok
time.sleep(0.1)
assert False, "did not see any simulation results"
def testAgentInputList(self):
self.writeOutputs(self.test_dir)
agent = PGTrainer(
env="CartPole-v0",
config={
"input": glob.glob(self.test_dir + "/*.json"),
"input_evaluation": [],
"sample_batch_size": 99,
})
result = agent.train()
self.assertEqual(result["timesteps_total"], 250) # read from input
self.assertTrue(np.isnan(result["episode_reward_mean"]))
def testAgentInputDict(self):
self.writeOutputs(self.test_dir)
agent = PGTrainer(
env="CartPole-v0",
config={
"input": {
self.test_dir: 0.1,
"sampler": 0.9,
},
"train_batch_size": 2000,
"input_evaluation": [],
})
result = agent.train()
self.assertTrue(not np.isnan(result["episode_reward_mean"]))
def testMultiAgent(self):
register_env("multi_cartpole", lambda _: MultiCartpole(10))
single_env = gym.make("CartPole-v0")
def gen_policy():
obs_space = single_env.observation_space
act_space = single_env.action_space
return (PGTFPolicy, obs_space, act_space, {})
pg = PGTrainer(
env="multi_cartpole",
config={
"num_workers": 0,
"output": self.test_dir,
"multiagent": {
"policies": {
"policy_1": gen_policy(),
"policy_2": gen_policy(),
},
"policy_mapping_fn": (
lambda agent_id: random.choice(
["policy_1", "policy_2"])),
},
})
pg.train()
self.assertEqual(len(os.listdir(self.test_dir)), 1)
pg.stop()
pg = PGTrainer(
env="multi_cartpole",
config={
"num_workers": 0,
"input": self.test_dir,
"input_evaluation": ["simulation"],
"train_batch_size": 2000,
"multiagent": {
"policies": {
"policy_1": gen_policy(),
"policy_2": gen_policy(),
},
"policy_mapping_fn": (
lambda agent_id: random.choice(
["policy_1", "policy_2"])),
},
})
for _ in range(50):
result = pg.train()
if not np.isnan(result["episode_reward_mean"]):
return # simulation ok
time.sleep(0.1)
assert False, "did not see any simulation results"
class JsonIOTest(unittest.TestCase):
def setUp(self):
self.test_dir = tempfile.mkdtemp()
def tearDown(self):
shutil.rmtree(self.test_dir)
def testWriteSimple(self):
ioctx = IOContext(self.test_dir, {}, 0, None)
writer = JsonWriter(
self.test_dir, ioctx, max_file_size=1000, compress_columns=["obs"])
self.assertEqual(len(os.listdir(self.test_dir)), 0)
writer.write(SAMPLES)
writer.write(SAMPLES)
self.assertEqual(len(os.listdir(self.test_dir)), 1)
def testWriteFileURI(self):
ioctx = IOContext(self.test_dir, {}, 0, None)
writer = JsonWriter(
"file:" + self.test_dir,
ioctx,
max_file_size=1000,
compress_columns=["obs"])
self.assertEqual(len(os.listdir(self.test_dir)), 0)
writer.write(SAMPLES)
writer.write(SAMPLES)
self.assertEqual(len(os.listdir(self.test_dir)), 1)
def testWritePaginate(self):
ioctx = IOContext(self.test_dir, {}, 0, None)
writer = JsonWriter(
self.test_dir, ioctx, max_file_size=5000, compress_columns=["obs"])
self.assertEqual(len(os.listdir(self.test_dir)), 0)
for _ in range(100):
writer.write(SAMPLES)
num_files = len(os.listdir(self.test_dir))
assert num_files in [12, 13], num_files
def testReadWrite(self):
ioctx = IOContext(self.test_dir, {}, 0, None)
writer = JsonWriter(
self.test_dir, ioctx, max_file_size=5000, compress_columns=["obs"])
for i in range(100):
writer.write(make_sample_batch(i))
reader = JsonReader(self.test_dir + "/*.json")
seen_a = set()
seen_o = set()
for i in range(1000):
batch = reader.next()
seen_a.add(batch["actions"][0])
seen_o.add(batch["obs"][0])
self.assertGreater(len(seen_a), 90)
self.assertLess(len(seen_a), 101)
self.assertGreater(len(seen_o), 90)
self.assertLess(len(seen_o), 101)
def testSkipsOverEmptyLinesAndFiles(self):
open(self.test_dir + "/empty", "w").close()
with open(self.test_dir + "/f1", "w") as f:
f.write("\n")
f.write("\n")
f.write(_to_json(make_sample_batch(0), []))
with open(self.test_dir + "/f2", "w") as f:
f.write(_to_json(make_sample_batch(1), []))
f.write("\n")
reader = JsonReader([
self.test_dir + "/empty",
self.test_dir + "/f1",
"file:" + self.test_dir + "/f2",
])
seen_a = set()
for i in range(100):
batch = reader.next()
seen_a.add(batch["actions"][0])
self.assertEqual(len(seen_a), 2)
def testSkipsOverCorruptedLines(self):
with open(self.test_dir + "/f1", "w") as f:
f.write(_to_json(make_sample_batch(0), []))
f.write("\n")
f.write(_to_json(make_sample_batch(1), []))
f.write("\n")
f.write(_to_json(make_sample_batch(2), []))
f.write("\n")
f.write(_to_json(make_sample_batch(3), []))
f.write("\n")
f.write("{..corrupted_json_record")
reader = JsonReader([
self.test_dir + "/f1",
])
seen_a = set()
for i in range(10):
batch = reader.next()
seen_a.add(batch["actions"][0])
self.assertEqual(len(seen_a), 4)
def testAbortOnAllEmptyInputs(self):
open(self.test_dir + "/empty", "w").close()
reader = JsonReader([
self.test_dir + "/empty",
])
self.assertRaises(ValueError, lambda: reader.next())
with open(self.test_dir + "/empty1", "w") as f:
for _ in range(100):
f.write("\n")
with open(self.test_dir + "/empty2", "w") as f:
for _ in range(100):
f.write("\n")
reader = JsonReader([
self.test_dir + "/empty1",
self.test_dir + "/empty2",
])
self.assertRaises(ValueError, lambda: reader.next())
if __name__ == "__main__":
ray.init(num_cpus=1)
unittest.main(verbosity=2)