ray/rllib/tests/test_supported_multi_agent.py

import unittest

import ray
from ray.rllib.agents.registry import get_trainer_class
from ray.rllib.examples.env.multi_agent import MultiAgentCartPole, MultiAgentMountainCar
from ray.rllib.policy.policy import PolicySpec
from ray.rllib.utils.test_utils import check_train_results, framework_iterator
from ray.tune import register_env


def check_support_multiagent(alg, config):
    register_env(
        "multi_agent_mountaincar", lambda _: MultiAgentMountainCar({"num_agents": 2})
    )
    register_env(
        "multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 2})
    )

    # Simulate a simple multi-agent setup.
    policies = {
        "policy_0": PolicySpec(config={"gamma": 0.99}),
        "policy_1": PolicySpec(config={"gamma": 0.95}),
    }
    policy_ids = list(policies.keys())

    def policy_mapping_fn(agent_id, episode, worker, **kwargs):
        pol_id = policy_ids[agent_id]
        return pol_id

    config["multiagent"] = {
        "policies": policies,
        "policy_mapping_fn": policy_mapping_fn,
    }

    for fw in framework_iterator(config):
        if fw in ["tf2", "tfe"] and alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]:
            continue
        if alg in ["DDPG", "APEX_DDPG", "SAC"]:
            a = get_trainer_class(alg)(config=config, env="multi_agent_mountaincar")
        else:
            a = get_trainer_class(alg)(config=config, env="multi_agent_cartpole")

        results = a.train()
        check_train_results(results)
        print(results)
        a.stop()


class TestSupportedMultiAgentPG(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=4)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_a3c_multiagent(self):
        check_support_multiagent(
            "A3C", {"num_workers": 1, "optimizer": {"grads_per_step": 1}}
        )

    def test_impala_multiagent(self):
        check_support_multiagent("IMPALA", {"num_gpus": 0})

    def test_pg_multiagent(self):
        check_support_multiagent("PG", {"num_workers": 1, "optimizer": {}})

    def test_ppo_multiagent(self):
        check_support_multiagent(
            "PPO",
            {
                "num_workers": 1,
                "num_sgd_iter": 1,
                "train_batch_size": 10,
                "rollout_fragment_length": 10,
                "sgd_minibatch_size": 1,
            },
        )


class TestSupportedMultiAgentOffPolicy(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=6)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_apex_multiagent(self):
        check_support_multiagent(
            "APEX",
            {
                "num_workers": 2,
                "min_sample_timesteps_per_iteration": 100,
                "num_gpus": 0,
                "replay_buffer_config": {
                    "capacity": 1000,
                    "learning_starts": 10,
                },
                "min_time_s_per_iteration": 1,
                "target_network_update_freq": 100,
                "optimizer": {
                    "num_replay_buffer_shards": 1,
                },
            },
        )

    def test_apex_ddpg_multiagent(self):
        check_support_multiagent(
            "APEX_DDPG",
            {
                "num_workers": 2,
                "min_sample_timesteps_per_iteration": 100,
                "replay_buffer_config": {
                    "capacity": 1000,
                    "learning_starts": 10,
                },
                "num_gpus": 0,
                "min_time_s_per_iteration": 1,
                "target_network_update_freq": 100,
                "use_state_preprocessor": True,
            },
        )

    def test_ddpg_multiagent(self):
        check_support_multiagent(
            "DDPG",
            {
                "min_sample_timesteps_per_iteration": 1,
                "replay_buffer_config": {
                    "capacity": 1000,
                    "learning_starts": 500,
                },
                "use_state_preprocessor": True,
            },
        )

    def test_dqn_multiagent(self):
        check_support_multiagent(
            "DQN",
            {
                "min_sample_timesteps_per_iteration": 1,
                "replay_buffer_config": {
                    "capacity": 1000,
                },
            },
        )

    def test_sac_multiagent(self):
        check_support_multiagent(
            "SAC",
            {
                "num_workers": 0,
                "replay_buffer_config": {
                    "capacity": 1000,
                },
                "normalize_actions": False,
            },
        )


if __name__ == "__main__":
    import pytest
    import sys

    # One can specify the specific TestCase class to run.
    # None for all unittest.TestCase classes in this file.
    class_ = sys.argv[1] if len(sys.argv) > 1 else None
    sys.exit(pytest.main(["-v", __file__ + ("" if class_ is None else "::" + class_)]))
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`import unittest`

			`import ray`
[RLlib] Allow `rllib rollout` to run distributed via evaluation workers. (#13718) 2021-02-08 12:05:16 +01:00			`from ray.rllib.agents.registry import get_trainer_class`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`from ray.rllib.examples.env.multi_agent import MultiAgentCartPole, MultiAgentMountainCar`
[RLlib] Issue 18280: A3C/IMPALA multi-agent not working. (#19100) 2021-10-07 23:57:53 +02:00			`from ray.rllib.policy.policy import PolicySpec`
[RLlib] Unify all RLlib Trainer.train() -> results[info][learner][policy ID][learner_stats] and add structure tests. (#18879) 2021-09-30 16:39:05 +02:00			`from ray.rllib.utils.test_utils import check_train_results, framework_iterator`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`from ray.tune import register_env`


			`def check_support_multiagent(alg, config):`
			`register_env(`
			`"multi_agent_mountaincar", lambda _: MultiAgentMountainCar({"num_agents": 2})`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`)`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`register_env(`
			`"multi_agent_cartpole", lambda _: MultiAgentCartPole({"num_agents": 2})`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`)`
[RLlib] Issue 18280: A3C/IMPALA multi-agent not working. (#19100) 2021-10-07 23:57:53 +02:00
			`# Simulate a simple multi-agent setup.`
			`policies = {`
			`"policy_0": PolicySpec(config={"gamma": 0.99}),`
			`"policy_1": PolicySpec(config={"gamma": 0.95}),`
			`}`
			`policy_ids = list(policies.keys())`

			`def policy_mapping_fn(agent_id, episode, worker, **kwargs):`
			`pol_id = policy_ids[agent_id]`
			`return pol_id`

			`config["multiagent"] = {`
			`"policies": policies,`
			`"policy_mapping_fn": policy_mapping_fn,`
			`}`

[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`for fw in framework_iterator(config):`
[RLlib] Tf2.x native. (#8752) 2020-07-11 22:06:35 +02:00			`if fw in ["tf2", "tfe"] and alg in ["A3C", "APEX", "APEX_DDPG", "IMPALA"]:`
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`continue`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`if alg in ["DDPG", "APEX_DDPG", "SAC"]:`
			`a = get_trainer_class(alg)(config=config, env="multi_agent_mountaincar")`
			`else:`
[RLlib] Allow `rllib rollout` to run distributed via evaluation workers. (#13718) 2021-02-08 12:05:16 +01:00			`a = get_trainer_class(alg)(config=config, env="multi_agent_cartpole")`
[RLlib] Prototype: Model Trajectory View API, part 0 (#9171) 2020-06-30 05:33:19 +02:00
[RLlib] Unify all RLlib Trainer.train() -> results[info][learner][policy ID][learner_stats] and add structure tests. (#18879) 2021-09-30 16:39:05 +02:00			`results = a.train()`
			`check_train_results(results)`
			`print(results)`
[RLlib] Prototype: Model Trajectory View API, part 0 (#9171) 2020-06-30 05:33:19 +02:00			`a.stop()`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00

[RLlib] Retire `try_import_tree` (should be installed along with other requirements). (#9211) - Retire try_import_tree. - Stabilize test_supported_multi_agent.py. 2020-07-02 13:06:34 +02:00			`class TestSupportedMultiAgentPG(unittest.TestCase):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`@classmethod`
			`def setUpClass(cls) -> None:`
			`ray.init(num_cpus=4)`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`@classmethod`
			`def tearDownClass(cls) -> None:`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`ray.shutdown()`

			`def test_a3c_multiagent(self):`
			`check_support_multiagent(`
			`"A3C", {"num_workers": 1, "optimizer": {"grads_per_step": 1}}`
			`)`

[RLlib] Retire `try_import_tree` (should be installed along with other requirements). (#9211) - Retire try_import_tree. - Stabilize test_supported_multi_agent.py. 2020-07-02 13:06:34 +02:00			`def test_impala_multiagent(self):`
			`check_support_multiagent("IMPALA", {"num_gpus": 0})`

			`def test_pg_multiagent(self):`
			`check_support_multiagent("PG", {"num_workers": 1, "optimizer": {}})`

			`def test_ppo_multiagent(self):`
			`check_support_multiagent(`
			`"PPO",`
			`{`
			`"num_workers": 1,`
			`"num_sgd_iter": 1,`
			`"train_batch_size": 10,`
			`"rollout_fragment_length": 10,`
			`"sgd_minibatch_size": 1,`
			`},`
			`)`


			`class TestSupportedMultiAgentOffPolicy(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls) -> None:`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`ray.init(num_cpus=6)`
[RLlib] Retire `try_import_tree` (should be installed along with other requirements). (#9211) - Retire try_import_tree. - Stabilize test_supported_multi_agent.py. 2020-07-02 13:06:34 +02:00
			`@classmethod`
			`def tearDownClass(cls) -> None:`
			`ray.shutdown()`

[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`def test_apex_multiagent(self):`
			`check_support_multiagent(`
			`"APEX",`
			`{`
			`"num_workers": 2,`
[RLlib] Trainer.training_iteration -> Trainer.training_step; Iterations vs reportings: Clarification of terms. (#25076) 2022-06-10 17:09:18 +02:00			`"min_sample_timesteps_per_iteration": 100,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"num_gpus": 0,`
[RLlib] Replay Buffer API and Ape-X. (#24506) 2022-05-17 13:43:49 +02:00			`"replay_buffer_config": {`
			`"capacity": 1000,`
			`"learning_starts": 10,`
			`},`
[RLlib] Trainer.training_iteration -> Trainer.training_step; Iterations vs reportings: Clarification of terms. (#25076) 2022-06-10 17:09:18 +02:00			`"min_time_s_per_iteration": 1,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"target_network_update_freq": 100,`
[RLlib] Extend on_learn_on_batch callback to allow for custom metrics to be added. (#13584) 2021-02-08 15:02:19 +01:00			`"optimizer": {`
			`"num_replay_buffer_shards": 1,`
			`},`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`},`
			`)`

			`def test_apex_ddpg_multiagent(self):`
			`check_support_multiagent(`
			`"APEX_DDPG",`
			`{`
			`"num_workers": 2,`
[RLlib] Trainer.training_iteration -> Trainer.training_step; Iterations vs reportings: Clarification of terms. (#25076) 2022-06-10 17:09:18 +02:00			`"min_sample_timesteps_per_iteration": 100,`
[RLlib] Replay Buffer API and Ape-X. (#24506) 2022-05-17 13:43:49 +02:00			`"replay_buffer_config": {`
			`"capacity": 1000,`
			`"learning_starts": 10,`
			`},`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"num_gpus": 0,`
[RLlib] Trainer.training_iteration -> Trainer.training_step; Iterations vs reportings: Clarification of terms. (#25076) 2022-06-10 17:09:18 +02:00			`"min_time_s_per_iteration": 1,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"target_network_update_freq": 100,`
			`"use_state_preprocessor": True,`
			`},`
			`)`

			`def test_ddpg_multiagent(self):`
			`check_support_multiagent(`
			`"DDPG",`
			`{`
[RLlib] Trainer.training_iteration -> Trainer.training_step; Iterations vs reportings: Clarification of terms. (#25076) 2022-06-10 17:09:18 +02:00			`"min_sample_timesteps_per_iteration": 1,`
[RLlib] Replay Buffer API and Ape-X. (#24506) 2022-05-17 13:43:49 +02:00			`"replay_buffer_config": {`
			`"capacity": 1000,`
			`"learning_starts": 500,`
			`},`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"use_state_preprocessor": True,`
			`},`
			`)`

			`def test_dqn_multiagent(self):`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`check_support_multiagent(`
			`"DQN",`
			`{`
[RLlib] Trainer.training_iteration -> Trainer.training_step; Iterations vs reportings: Clarification of terms. (#25076) 2022-06-10 17:09:18 +02:00			`"min_sample_timesteps_per_iteration": 1,`
[RLlib] Replay Buffer API and Ape-X. (#24506) 2022-05-17 13:43:49 +02:00			`"replay_buffer_config": {`
			`"capacity": 1000,`
			`},`
[RLlib] Issue 8769 broken OOM tests_dir cases (R & S). (#8770) 2020-06-05 08:34:21 +02:00			`},`
			`)`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
			`def test_sac_multiagent(self):`
			`check_support_multiagent(`
			`"SAC",`
			`{`
			`"num_workers": 0,`
[RLlib] Replay Buffer API and Ape-X. (#24506) 2022-05-17 13:43:49 +02:00			`"replay_buffer_config": {`
			`"capacity": 1000,`
			`},`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"normalize_actions": False,`
			`},`
			`)`


			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00
[RLlib] Retire `try_import_tree` (should be installed along with other requirements). (#9211) - Retire try_import_tree. - Stabilize test_supported_multi_agent.py. 2020-07-02 13:06:34 +02:00			`# One can specify the specific TestCase class to run.`
			`# None for all unittest.TestCase classes in this file.`
[RLlib] Tf2.x native. (#8752) 2020-07-11 22:06:35 +02:00			`class_ = sys.argv[1] if len(sys.argv) > 1 else None`
ci: Redo `format.sh --all` script & backfill lint fixes (#9956) 2020-08-07 16:49:49 -07:00			`sys.exit(pytest.main(["-v", __file__ + ("" if class_ is None else "::" + class_)]))`