ray/rllib/agents/qmix/tests/test_qmix.py

from gym.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple
import numpy as np
import unittest

import ray
from ray.tune import register_env
from ray.rllib.agents.qmix import QMixConfig
from ray.rllib.env.multi_agent_env import MultiAgentEnv


class AvailActionsTestEnv(MultiAgentEnv):
    num_actions = 10
    action_space = Discrete(num_actions)
    observation_space = Dict(
        {
            "obs": Dict(
                {
                    "test": Dict({"a": Discrete(2), "b": MultiDiscrete([2, 3, 4])}),
                    "state": MultiDiscrete([2, 2, 2]),
                }
            ),
            "action_mask": Box(0, 1, (num_actions,)),
        }
    )

    def __init__(self, env_config):
        super().__init__()
        self.state = None
        self.avail = env_config.get("avail_actions", [3])
        self.action_mask = np.array([0] * 10)
        for a in self.avail:
            self.action_mask[a] = 1

    def reset(self):
        self.state = 0
        return {
            "agent_1": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
            "agent_2": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
        }

    def step(self, action_dict):
        if self.state > 0:
            assert (
                action_dict["agent_1"] in self.avail
                and action_dict["agent_2"] in self.avail
            ), "Failed to obey available actions mask!"
        self.state += 1
        rewards = {"agent_1": 1, "agent_2": 0.5}
        obs = {
            "agent_1": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
            "agent_2": {
                "obs": self.observation_space["obs"].sample(),
                "action_mask": self.action_mask,
            },
        }
        dones = {"__all__": self.state >= 20}
        return obs, rewards, dones, {}


class TestQMix(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_avail_actions_qmix(self):
        grouping = {
            "group_1": ["agent_1", "agent_2"],
        }
        obs_space = Tuple(
            [
                AvailActionsTestEnv.observation_space,
                AvailActionsTestEnv.observation_space,
            ]
        )
        act_space = Tuple(
            [AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space]
        )
        register_env(
            "action_mask_test",
            lambda config: AvailActionsTestEnv(config).with_agent_groups(
                grouping, obs_space=obs_space, act_space=act_space
            ),
        )

        config = QMixConfig()\
            .framework(framework="torch")\
            .environment(
                env="action_mask_test",
                env_config={"avail_actions": [3, 4, 8]},
            )\
            .rollouts(num_envs_per_worker=5) # Test with vectorization on.

        trainer = config.build()

        for _ in range(4):
            trainer.train()  # OK if it doesn't trip the action assertion error

        assert trainer.train()["episode_reward_mean"] == 30.0
        trainer.stop()
        ray.shutdown()


if __name__ == "__main__":
    import pytest
    import sys

    sys.exit(pytest.main(["-v", __file__]))
[RLlib] Add QMIX support for complex obs spaces (Issue 8523). (#8533) 2020-05-22 10:17:51 +02:00			`from gym.spaces import Box, Dict, Discrete, MultiDiscrete, Tuple`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import numpy as np`
			`import unittest`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00
			`import ray`
			`from ray.tune import register_env`
[RLlib] QMix TrainerConfig objects. (#24775) 2022-05-13 18:50:28 +02:00			`from ray.rllib.agents.qmix import QMixConfig`
[RLlib] Add 2 Transformer learning test cases on StatelessCartPole (PPO and IMPALA). (#8624) 2020-05-27 10:19:47 +02:00			`from ray.rllib.env.multi_agent_env import MultiAgentEnv`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00

			`class AvailActionsTestEnv(MultiAgentEnv):`
[RLlib] Add QMIX support for complex obs spaces (Issue 8523). (#8533) 2020-05-22 10:17:51 +02:00			`num_actions = 10`
			`action_space = Discrete(num_actions)`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`observation_space = Dict(`
			`{`
			`"obs": Dict(`
			`{`
			`"test": Dict({"a": Discrete(2), "b": MultiDiscrete([2, 3, 4])}),`
			`"state": MultiDiscrete([2, 2, 2]),`
			`}`
			`),`
			`"action_mask": Box(0, 1, (num_actions,)),`
			`}`
			`)`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00
			`def __init__(self, env_config):`
[RLlib] Base env pre-checker. (#21569) 2022-01-18 07:34:06 -08:00			`super().__init__()`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00			`self.state = None`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`self.avail = env_config.get("avail_actions", [3])`
[rllib] validate observation in NoPreprocessor (#4546) 2019-04-07 16:11:50 -07:00			`self.action_mask = np.array([0] * 10)`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`for a in self.avail:`
			`self.action_mask[a] = 1`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00
			`def reset(self):`
			`self.state = 0`
			`return {`
			`"agent_1": {`
[RLlib] Add QMIX support for complex obs spaces (Issue 8523). (#8533) 2020-05-22 10:17:51 +02:00			`"obs": self.observation_space["obs"].sample(),`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"action_mask": self.action_mask,`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`},`
			`"agent_2": {`
			`"obs": self.observation_space["obs"].sample(),`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"action_mask": self.action_mask,`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`},`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00			`}`

			`def step(self, action_dict):`
			`if self.state > 0:`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`assert (`
			`action_dict["agent_1"] in self.avail`
			`and action_dict["agent_2"] in self.avail`
			`), "Failed to obey available actions mask!"`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00			`self.state += 1`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`rewards = {"agent_1": 1, "agent_2": 0.5}`
[RLlib] Add QMIX support for complex obs spaces (Issue 8523). (#8533) 2020-05-22 10:17:51 +02:00			`obs = {`
			`"agent_1": {`
			`"obs": self.observation_space["obs"].sample(),`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"action_mask": self.action_mask,`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`},`
			`"agent_2": {`
			`"obs": self.observation_space["obs"].sample(),`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"action_mask": self.action_mask,`
			`},`
[RLlib] Add QMIX support for complex obs spaces (Issue 8523). (#8533) 2020-05-22 10:17:51 +02:00			`}`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`dones = {"__all__": self.state >= 20}`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00			`return obs, rewards, dones, {}`


[RLlib] Issue 8384: QMIX doesn't learn anything. (#9527) 2020-07-17 12:14:34 +02:00			`class TestQMix(unittest.TestCase):`
[RLlib] Issue 15973: Trainer.with_updates(validate_config=...) behaves confusingly. (#16429) 2021-06-19 22:42:00 +02:00			`@classmethod`
			`def setUpClass(cls) -> None:`
			`ray.init()`

			`@classmethod`
			`def tearDownClass(cls) -> None:`
			`ray.shutdown()`

[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`def test_avail_actions_qmix(self):`
			`grouping = {`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`"group_1": ["agent_1", "agent_2"],`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`}`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`obs_space = Tuple(`
			`[`
			`AvailActionsTestEnv.observation_space,`
			`AvailActionsTestEnv.observation_space,`
			`]`
			`)`
			`act_space = Tuple(`
			`[AvailActionsTestEnv.action_space, AvailActionsTestEnv.action_space]`
			`)`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`register_env(`
			`"action_mask_test",`
			`lambda config: AvailActionsTestEnv(config).with_agent_groups(`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`grouping, obs_space=obs_space, act_space=act_space`
			`),`
			`)`
[rllib] Q-Mix implementation (Q-Mix, VDN, IQN, and Ape-X variants) (#3548) 2018-12-18 10:40:01 -08:00
[RLlib] QMix TrainerConfig objects. (#24775) 2022-05-13 18:50:28 +02:00			`config = QMixConfig()\`
			`.framework(framework="torch")\`
			`.environment(`
			`env="action_mask_test",`
			`env_config={"avail_actions": [3, 4, 8]},`
			`)\`
			`.rollouts(num_envs_per_worker=5) # Test with vectorization on.`

			`trainer = config.build()`

[RLlib] Issue 15973: Trainer.with_updates(validate_config=...) behaves confusingly. (#16429) 2021-06-19 22:42:00 +02:00			`for _ in range(4):`
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`trainer.train() # OK if it doesn't trip the action assertion error`
[RLlib] QMix TrainerConfig objects. (#24775) 2022-05-13 18:50:28 +02:00
[RLlib] QMIX better defaults + added to CI learning tests (#21332) 2022-01-04 08:54:41 +01:00			`assert trainer.train()["episode_reward_mean"] == 30.0`
			`trainer.stop()`
[RLlib] Issue 8714: QMIX init error w/ tuple obs space. (#8936) 2020-06-16 08:50:53 +02:00			`ray.shutdown()`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00

			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`sys.exit(pytest.main(["-v", __file__]))`