ray/rllib/tests/test_eager_support.py
Sven Mika 5ac5ac9560
[RLlib] Fix broken example: tf-eager with custom-RNN (#6732). (#7021)
* WIP.

* Fix float32 conversion in OneHot preprocessor (would cause float64 in eager, then NN-matmul-failure).
Add proper seq-len + state-in construction in eager_tf_policy.py::_compute_gradients().

* LINT.

* eager_tf_policy.py: Only set samples["seq_lens"] if RNN. Otherwise, eager-tracing will throw flattened-dict key-mismatch error.

* Move issue code to examples folder.

Co-authored-by: Eric Liang <ekhliang@gmail.com>
2020-02-06 09:44:08 -08:00

70 lines
1.8 KiB
Python

import unittest
import ray
from ray import tune
from ray.rllib.agents.registry import get_agent_class
def check_support(alg, config, test_trace=True):
config["eager"] = True
if alg in ["APEX_DDPG", "TD3", "DDPG", "SAC"]:
config["env"] = "Pendulum-v0"
else:
config["env"] = "CartPole-v0"
a = get_agent_class(alg)
config["log_level"] = "ERROR"
config["eager_tracing"] = False
tune.run(a, config=config, stop={"training_iteration": 0})
if test_trace:
config["eager_tracing"] = True
tune.run(a, config=config, stop={"training_iteration": 0})
class TestEagerSupport(unittest.TestCase):
def setUp(self):
ray.init(num_cpus=4)
def tearDown(self):
ray.shutdown()
def testSimpleQ(self):
check_support("SimpleQ", {"num_workers": 0, "learning_starts": 0})
def testDQN(self):
check_support("DQN", {"num_workers": 0, "learning_starts": 0})
def testA2C(self):
check_support("A2C", {"num_workers": 0})
def testA3C(self):
check_support("A3C", {"num_workers": 1})
def testPG(self):
check_support("PG", {"num_workers": 0})
def testPPO(self):
check_support("PPO", {"num_workers": 0})
def testAPPO(self):
check_support("APPO", {"num_workers": 1, "num_gpus": 0})
def testIMPALA(self):
check_support("IMPALA", {"num_workers": 1, "num_gpus": 0})
def testAPEX_DQN(self):
check_support(
"APEX", {
"num_workers": 2,
"learning_starts": 0,
"num_gpus": 0,
"min_iter_time_s": 1,
"timesteps_per_iteration": 100
})
if __name__ == "__main__":
import pytest
import sys
sys.exit(pytest.main(["-v", __file__]))