ray/rllib/agents/bandit/tests/test_bandits.py

import unittest

import ray
import ray.rllib.agents.bandit.bandit as bandit
from ray.rllib.examples.env.bandit_envs_discrete import SimpleContextualBandit
from ray.rllib.utils.test_utils import check_train_results, framework_iterator


class TestBandits(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_bandit_lin_ts_compilation(self):
        """Test whether a BanditLinTSTrainer can be built on all frameworks."""
        config = {
            # Use a simple bandit-friendly env.
            "env": SimpleContextualBandit,
            "num_envs_per_worker": 2,  # Test batched inference.
            "num_workers": 2,  # Test distributed bandits.
        }

        num_iterations = 5

        for _ in framework_iterator(config, frameworks="torch"):
            for train_batch_size in [1, 10]:
                config["train_batch_size"] = train_batch_size
                trainer = bandit.BanditLinTSTrainer(config=config)
                results = None
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                # Force good learning behavior (this is a very simple env).
                self.assertTrue(results["episode_reward_mean"] == 10.0)
                trainer.stop()

    def test_bandit_lin_ucb_compilation(self):
        """Test whether a BanditLinUCBTrainer can be built on all frameworks."""
        config = {
            # Use a simple bandit-friendly env.
            "env": SimpleContextualBandit,
            "num_envs_per_worker": 2,  # Test batched inference.
        }

        num_iterations = 5

        for _ in framework_iterator(config, frameworks="torch"):
            for train_batch_size in [1, 10]:
                config["train_batch_size"] = train_batch_size
                trainer = bandit.BanditLinUCBTrainer(config=config)
                results = None
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                # Force good learning behavior (this is a very simple env).
                self.assertTrue(results["episode_reward_mean"] == 10.0)
                trainer.stop()


if __name__ == "__main__":
    import pytest
    import sys

    sys.exit(pytest.main(["-v", __file__]))
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`import unittest`

			`import ray`
			`import ray.rllib.agents.bandit.bandit as bandit`
			`from ray.rllib.examples.env.bandit_envs_discrete import SimpleContextualBandit`
			`from ray.rllib.utils.test_utils import check_train_results, framework_iterator`


			`class TestBandits(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls) -> None:`
			`ray.init()`

			`@classmethod`
			`def tearDownClass(cls) -> None:`
			`ray.shutdown()`

			`def test_bandit_lin_ts_compilation(self):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"""Test whether a BanditLinTSTrainer can be built on all frameworks."""`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`config = {`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`# Use a simple bandit-friendly env.`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`"env": SimpleContextualBandit,`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`"num_envs_per_worker": 2, # Test batched inference.`
			`"num_workers": 2, # Test distributed bandits.`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`}`

			`num_iterations = 5`

			`for _ in framework_iterator(config, frameworks="torch"):`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`for train_batch_size in [1, 10]:`
			`config["train_batch_size"] = train_batch_size`
			`trainer = bandit.BanditLinTSTrainer(config=config)`
			`results = None`
			`for i in range(num_iterations):`
			`results = trainer.train()`
			`check_train_results(results)`
			`print(results)`
			`# Force good learning behavior (this is a very simple env).`
			`self.assertTrue(results["episode_reward_mean"] == 10.0)`
			`trainer.stop()`
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00			`def test_bandit_lin_ucb_compilation(self):`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`"""Test whether a BanditLinUCBTrainer can be built on all frameworks."""`
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00			`config = {`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`# Use a simple bandit-friendly env.`
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00			`"env": SimpleContextualBandit,`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`"num_envs_per_worker": 2, # Test batched inference.`
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00			`}`

			`num_iterations = 5`

			`for _ in framework_iterator(config, frameworks="torch"):`
[RLlib] Enable Bandits to work in batches mode(s) (vector envs + multiple workers + train_batch_sizes > 1). (#22465) 2022-02-17 22:32:26 +01:00			`for train_batch_size in [1, 10]:`
			`config["train_batch_size"] = train_batch_size`
			`trainer = bandit.BanditLinUCBTrainer(config=config)`
			`results = None`
			`for i in range(num_iterations):`
			`results = trainer.train()`
			`check_train_results(results)`
			`print(results)`
			`# Force good learning behavior (this is a very simple env).`
			`self.assertTrue(results["episode_reward_mean"] == 10.0)`
			`trainer.stop()`
[RLlib] Some more `bandit` cleanup/tests. (#21932) 2022-01-28 12:03:26 +01:00
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00
			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00
[RLlib] Move bandits into main agents folder; Make RecSim adapter more accessible; (#21773) 2022-01-27 13:58:12 +01:00			`sys.exit(pytest.main(["-v", __file__]))`