ray/rllib/agents/bandit/tests/test_bandits.py

import unittest

import ray
from ray.rllib.agents.bandit import bandit
from ray.rllib.examples.env.bandit_envs_discrete import SimpleContextualBandit
from ray.rllib.utils.test_utils import check_train_results, framework_iterator


class TestBandits(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_bandit_lin_ts_compilation(self):
        """Test whether a BanditLinTSTrainer can be built on all frameworks."""
        config = (
            bandit.BanditLinTSConfig()
            .environment(env=SimpleContextualBandit)
            .rollouts(num_rollout_workers=2, num_envs_per_worker=2)
        )
        num_iterations = 5

        for _ in framework_iterator(config, frameworks="torch"):
            for train_batch_size in [1, 10]:
                config.training(train_batch_size=train_batch_size)
                trainer = config.build()
                results = None
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                # Force good learning behavior (this is a very simple env).
                self.assertTrue(results["episode_reward_mean"] == 10.0)
                trainer.stop()

    def test_bandit_lin_ucb_compilation(self):
        """Test whether a BanditLinUCBTrainer can be built on all frameworks."""
        config = (
            bandit.BanditLinUCBConfig()
            .environment(env=SimpleContextualBandit)
            .rollouts(num_envs_per_worker=2)
        )

        num_iterations = 5

        for _ in framework_iterator(config, frameworks="torch"):
            for train_batch_size in [1, 10]:
                config.training(train_batch_size=train_batch_size)
                trainer = config.build()
                results = None
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                # Force good learning behavior (this is a very simple env).
                self.assertTrue(results["episode_reward_mean"] == 10.0)
                trainer.stop()


if __name__ == "__main__":
    import pytest
    import sys

    sys.exit(pytest.main(["-v", __file__]))
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`import unittest`

			`import ray`
[RLlib] Bandits use TrainerConfig objects. (#24687) 2022-05-12 21:02:15 +01:00			`from ray.rllib.agents.bandit import bandit`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`from ray.rllib.examples.env.bandit_envs_discrete import SimpleContextualBandit`
			`from ray.rllib.utils.test_utils import check_train_results, framework_iterator`


			`class TestBandits(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls) -> None:`
			`ray.init()`

			`@classmethod`
			`def tearDownClass(cls) -> None:`
			`ray.shutdown()`

			`def test_bandit_lin_ts_compilation(self):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"""Test whether a BanditLinTSTrainer can be built on all frameworks."""`
[RLlib] Bandits use TrainerConfig objects. (#24687) 2022-05-12 21:02:15 +01:00			`config = (`
			`bandit.BanditLinTSConfig()`
			`.environment(env=SimpleContextualBandit)`
			`.rollouts(num_rollout_workers=2, num_envs_per_worker=2)`
			`)`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`num_iterations = 5`

			`for _ in framework_iterator(config, frameworks="torch"):`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`for train_batch_size in [1, 10]:`
[RLlib] Bandits use TrainerConfig objects. (#24687) 2022-05-12 21:02:15 +01:00			`config.training(train_batch_size=train_batch_size)`
			`trainer = config.build()`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`results = None`
			`for i in range(num_iterations):`
			`results = trainer.train()`
			`check_train_results(results)`
			`print(results)`
			`# Force good learning behavior (this is a very simple env).`
			`self.assertTrue(results["episode_reward_mean"] == 10.0)`
			`trainer.stop()`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00			`def test_bandit_lin_ucb_compilation(self):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"""Test whether a BanditLinUCBTrainer can be built on all frameworks."""`
[RLlib] Bandits use TrainerConfig objects. (#24687) 2022-05-12 21:02:15 +01:00			`config = (`
			`bandit.BanditLinUCBConfig()`
			`.environment(env=SimpleContextualBandit)`
			`.rollouts(num_envs_per_worker=2)`
			`)`
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00
			`num_iterations = 5`

			`for _ in framework_iterator(config, frameworks="torch"):`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`for train_batch_size in [1, 10]:`
[RLlib] Bandits use TrainerConfig objects. (#24687) 2022-05-12 21:02:15 +01:00			`config.training(train_batch_size=train_batch_size)`
			`trainer = config.build()`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`results = None`
			`for i in range(num_iterations):`
			`results = trainer.train()`
			`check_train_results(results)`
			`print(results)`
			`# Force good learning behavior (this is a very simple env).`
			`self.assertTrue(results["episode_reward_mean"] == 10.0)`
			`trainer.stop()`
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00
			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`sys.exit(pytest.main(["-v", __file__]))`