2017-03-07 23:42:44 -08:00
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
|
|
|
|
import unittest
|
|
|
|
import numpy as np
|
|
|
|
from numpy.testing import assert_allclose
|
|
|
|
|
2019-07-27 02:08:16 -07:00
|
|
|
from ray.rllib.models.tf.tf_action_dist import Categorical
|
2018-07-01 00:05:08 -07:00
|
|
|
from ray.rllib.agents.ppo.utils import flatten, concatenate
|
2019-05-16 22:12:07 -07:00
|
|
|
from ray.rllib.utils import try_import_tf
|
|
|
|
|
|
|
|
tf = try_import_tf()
|
2017-03-07 23:42:44 -08:00
|
|
|
|
2017-05-16 14:12:18 -07:00
|
|
|
|
2017-07-17 01:58:54 -07:00
|
|
|
# TODO(ekl): move to rllib/models dir
|
|
|
|
class DistributionsTest(unittest.TestCase):
|
2017-07-13 14:53:57 -07:00
|
|
|
def testCategorical(self):
|
|
|
|
num_samples = 100000
|
|
|
|
logits = tf.placeholder(tf.float32, shape=(None, 10))
|
|
|
|
z = 8 * (np.random.rand(10) - 0.5)
|
|
|
|
data = np.tile(z, (num_samples, 1))
|
2019-08-06 18:13:16 +00:00
|
|
|
c = Categorical(logits, {}) # dummy config dict
|
2017-07-13 14:53:57 -07:00
|
|
|
sample_op = c.sample()
|
|
|
|
sess = tf.Session()
|
|
|
|
sess.run(tf.global_variables_initializer())
|
|
|
|
samples = sess.run(sample_op, feed_dict={logits: data})
|
|
|
|
counts = np.zeros(10)
|
|
|
|
for sample in samples:
|
|
|
|
counts[sample] += 1.0
|
|
|
|
probs = np.exp(z) / np.sum(np.exp(z))
|
|
|
|
self.assertTrue(np.sum(np.abs(probs - counts / num_samples)) <= 0.01)
|
2017-03-07 23:42:44 -08:00
|
|
|
|
2017-05-16 14:12:18 -07:00
|
|
|
|
2017-03-07 23:42:44 -08:00
|
|
|
class UtilsTest(unittest.TestCase):
|
2017-07-13 14:53:57 -07:00
|
|
|
def testFlatten(self):
|
2018-07-19 15:30:36 -07:00
|
|
|
d = {
|
|
|
|
"s": np.array([[[1, -1], [2, -2]], [[3, -3], [4, -4]]]),
|
|
|
|
"a": np.array([[[5], [-5]], [[6], [-6]]])
|
|
|
|
}
|
2017-07-13 14:53:57 -07:00
|
|
|
flat = flatten(d.copy(), start=0, stop=2)
|
|
|
|
assert_allclose(d["s"][0][0][:], flat["s"][0][:])
|
|
|
|
assert_allclose(d["s"][0][1][:], flat["s"][1][:])
|
|
|
|
assert_allclose(d["s"][1][0][:], flat["s"][2][:])
|
|
|
|
assert_allclose(d["s"][1][1][:], flat["s"][3][:])
|
|
|
|
assert_allclose(d["a"][0][0], flat["a"][0])
|
|
|
|
assert_allclose(d["a"][0][1], flat["a"][1])
|
|
|
|
assert_allclose(d["a"][1][0], flat["a"][2])
|
|
|
|
assert_allclose(d["a"][1][1], flat["a"][3])
|
|
|
|
|
|
|
|
def testConcatenate(self):
|
|
|
|
d1 = {"s": np.array([0, 1]), "a": np.array([2, 3])}
|
|
|
|
d2 = {"s": np.array([4, 5]), "a": np.array([6, 7])}
|
|
|
|
d = concatenate([d1, d2])
|
|
|
|
assert_allclose(d["s"], np.array([0, 1, 4, 5]))
|
|
|
|
assert_allclose(d["a"], np.array([2, 3, 6, 7]))
|
|
|
|
|
|
|
|
D = concatenate([d])
|
|
|
|
assert_allclose(D["s"], np.array([0, 1, 4, 5]))
|
|
|
|
assert_allclose(D["a"], np.array([2, 3, 6, 7]))
|
2017-03-07 23:42:44 -08:00
|
|
|
|
2017-05-16 14:12:18 -07:00
|
|
|
|
2017-03-07 23:42:44 -08:00
|
|
|
if __name__ == "__main__":
|
2017-07-13 14:53:57 -07:00
|
|
|
unittest.main(verbosity=2)
|