import unittest import ray from ray import tune from ray.rllib.agents.registry import get_agent_class def check_support(alg, config, test_trace=True): config["eager"] = True if alg in ["APEX_DDPG", "TD3", "DDPG", "SAC"]: config["env"] = "Pendulum-v0" else: config["env"] = "CartPole-v0" a = get_agent_class(alg) config["log_level"] = "ERROR" config["eager_tracing"] = False tune.run(a, config=config, stop={"training_iteration": 0}) if test_trace: config["eager_tracing"] = True tune.run(a, config=config, stop={"training_iteration": 0}) class TestEagerSupport(unittest.TestCase): def setUp(self): ray.init(num_cpus=4) def tearDown(self): ray.shutdown() def testSimpleQ(self): check_support("SimpleQ", {"num_workers": 0, "learning_starts": 0}) def testDQN(self): check_support("DQN", {"num_workers": 0, "learning_starts": 0}) def testA2C(self): check_support("A2C", {"num_workers": 0}) def testA3C(self): # TODO(ekl) trace on is flaky check_support("A3C", {"num_workers": 1}, test_trace=False) def testPG(self): check_support("PG", {"num_workers": 0}) def testPPO(self): check_support("PPO", {"num_workers": 0}) def testAPPO(self): check_support("APPO", {"num_workers": 1, "num_gpus": 0}) def testIMPALA(self): check_support("IMPALA", {"num_workers": 1, "num_gpus": 0}) def testAPEX_DQN(self): check_support( "APEX", { "num_workers": 2, "learning_starts": 0, "num_gpus": 0, "min_iter_time_s": 1, "timesteps_per_iteration": 100 }) def testSAC(self): check_support("SAC", { "num_workers": 0, "learning_starts": 0, "timesteps_per_iteration": 100 }) if __name__ == "__main__": unittest.main(verbosity=2)