ray/rllib/tests/test_eager_support.py

import unittest

import ray
from ray import tune
from ray.rllib.agents.registry import get_agent_class


def check_support(alg, config, test_trace=True):
    config["eager"] = True
    if alg in ["APEX_DDPG", "TD3", "DDPG", "SAC"]:
        config["env"] = "Pendulum-v0"
    else:
        config["env"] = "CartPole-v0"
    a = get_agent_class(alg)
    config["log_level"] = "ERROR"

    config["eager_tracing"] = False
    tune.run(a, config=config, stop={"training_iteration": 1})

    if test_trace:
        config["eager_tracing"] = True
        tune.run(a, config=config, stop={"training_iteration": 1})


class TestEagerSupport(unittest.TestCase):
    def setUp(self):
        ray.init(num_cpus=4)

    def tearDown(self):
        ray.shutdown()

    def testSimpleQ(self):
        check_support("SimpleQ", {"num_workers": 0, "learning_starts": 0})

    def testDQN(self):
        check_support("DQN", {"num_workers": 0, "learning_starts": 0})

    def testA2C(self):
        check_support("A2C", {"num_workers": 0})

    def testA3C(self):
        check_support("A3C", {"num_workers": 1})

    def testPG(self):
        check_support("PG", {"num_workers": 0})

    def testPPO(self):
        check_support("PPO", {"num_workers": 0})

    def testAPPO(self):
        check_support("APPO", {"num_workers": 1, "num_gpus": 0})

    def testIMPALA(self):
        check_support("IMPALA", {"num_workers": 1, "num_gpus": 0})

    def testAPEX_DQN(self):
        check_support(
            "APEX", {
                "num_workers": 2,
                "learning_starts": 0,
                "num_gpus": 0,
                "min_iter_time_s": 1,
                "timesteps_per_iteration": 100,
                "optimizer": {
                    "num_replay_buffer_shards": 1,
                },
            })

    def testSAC(self):
        check_support("SAC", {"num_workers": 0})


if __name__ == "__main__":
    import pytest
    import sys
    sys.exit(pytest.main(["-v", __file__]))