ray/rllib/agents/impala/tests/test_impala.py

import copy
import unittest

import ray
import ray.rllib.agents.impala as impala
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import check, \
    check_compute_single_action, framework_iterator

tf1, tf, tfv = try_import_tf()


class TestIMPALA(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init()

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def test_impala_compilation(self):
        """Test whether an ImpalaTrainer can be built with both frameworks."""
        config = impala.DEFAULT_CONFIG.copy()
        config["num_gpus"] = 0
        config["model"]["lstm_use_prev_action"] = True
        config["model"]["lstm_use_prev_reward"] = True
        num_iterations = 1
        env = "CartPole-v0"

        for _ in framework_iterator(config):
            local_cfg = config.copy()
            for lstm in [False, True]:
                local_cfg["num_aggregation_workers"] = 0 if not lstm else 1
                local_cfg["model"]["use_lstm"] = lstm
                print("lstm={} aggregation-worker={}".format(
                    lstm, local_cfg["num_aggregation_workers"]))
                # Test with and w/o aggregation workers (this has nothing
                # to do with LSTMs, though).
                trainer = impala.ImpalaTrainer(config=local_cfg, env=env)
                for i in range(num_iterations):
                    print(trainer.train())
                check_compute_single_action(
                    trainer,
                    include_state=lstm,
                    include_prev_action_reward=lstm,
                )
                trainer.stop()

    def test_impala_lr_schedule(self):
        config = impala.DEFAULT_CONFIG.copy()
        config["num_gpus"] = 0
        # Test whether we correctly ignore the "lr" setting.
        # The first lr should be 0.0005.
        config["lr"] = 0.1
        config["lr_schedule"] = [
            [0, 0.0005],
            [10000, 0.000001],
        ]
        config["num_gpus"] = 0  # Do not use any (fake) GPUs.
        config["env"] = "CartPole-v0"

        def get_lr(result):
            return result["info"]["learner"][DEFAULT_POLICY_ID]["cur_lr"]

        for fw in framework_iterator(config, frameworks=("tf", "torch")):
            trainer = impala.ImpalaTrainer(config=config)
            policy = trainer.get_policy()

            try:
                if fw == "tf":
                    check(policy.get_session().run(policy.cur_lr), 0.0005)
                else:
                    check(policy.cur_lr, 0.0005)
                r1 = trainer.train()
                r2 = trainer.train()
                assert get_lr(r2) < get_lr(r1), (r1, r2)
            finally:
                trainer.stop()

    def test_impala_fake_multi_gpu_learning(self):
        """Test whether IMPALATrainer can learn CartPole w/ faked multi-GPU."""
        config = copy.deepcopy(impala.DEFAULT_CONFIG)
        # Fake GPU setup.
        config["_fake_gpus"] = True
        config["num_gpus"] = 2

        config["train_batch_size"] *= 2

        # Test w/ LSTMs.
        config["model"]["use_lstm"] = True

        for _ in framework_iterator(config, frameworks=("tf", "torch")):
            trainer = impala.ImpalaTrainer(config=config, env="CartPole-v0")
            num_iterations = 200
            learnt = False
            for i in range(num_iterations):
                results = trainer.train()
                print(results)
                if results["episode_reward_mean"] > 55.0:
                    learnt = True
                    break
            assert learnt, \
                "IMPALA multi-GPU (with fake-GPUs) did not learn CartPole!"
            trainer.stop()


if __name__ == "__main__":
    import pytest
    import sys
    sys.exit(pytest.main(["-v", __file__]))
[RLlib] Refactor: All tf static graph code should reside inside Policy class. (#17169) 2021-07-20 14:58:13 -04:00			`import copy`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00			`import unittest`

			`import ray`
			`import ray.rllib.agents.impala as impala`
[RLlib] BC/MARWIL/recurrent nets minor cleanups and bug fixes. (#13064) 2020-12-27 09:46:03 -05:00			`from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00			`from ray.rllib.utils.framework import try_import_tf`
[RLlib] Discussion 1928: Initial lr wrong if schedule used that includes ts=0 (both tf and torch). (#15538) 2021-04-27 17:19:52 +02:00			`from ray.rllib.utils.test_utils import check, \`
			`check_compute_single_action, framework_iterator`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00
[RLlib] Tf2x preparation; part 2 (upgrading `try_import_tf()`). (#9136) * WIP. * Fixes. * LINT. * WIP. * WIP. * Fixes. * Fixes. * Fixes. * Fixes. * WIP. * Fixes. * Test * Fix. * Fixes and LINT. * Fixes and LINT. * LINT. 2020-06-30 10:13:20 +02:00			`tf1, tf, tfv = try_import_tf()`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00

			`class TestIMPALA(unittest.TestCase):`
			`@classmethod`
This PR fixes the currently broken lstm_use_prev_action_reward flag for default lstm models (model.use_lstm=True). (#8970) 2020-06-27 20:50:01 +02:00			`def setUpClass(cls) -> None:`
			`ray.init()`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00
			`@classmethod`
This PR fixes the currently broken lstm_use_prev_action_reward flag for default lstm models (model.use_lstm=True). (#8970) 2020-06-27 20:50:01 +02:00			`def tearDownClass(cls) -> None:`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00			`ray.shutdown()`

			`def test_impala_compilation(self):`
			`"""Test whether an ImpalaTrainer can be built with both frameworks."""`
			`config = impala.DEFAULT_CONFIG.copy()`
[RLlib] Issues: 17397, 17425, 16715, 17174. When on driver, Torch\|TFPolicy should not use `ray.get_gpu_ids()` (b/c no GPUs assigned by ray). (#17444) 2021-08-02 17:29:59 -04:00			`config["num_gpus"] = 0`
[RLlib] Issue #13802: Enhance metrics for `multiagent->count_steps_by=agent_steps` setting. (#14033) 2021-03-18 20:27:41 +01:00			`config["model"]["lstm_use_prev_action"] = True`
			`config["model"]["lstm_use_prev_reward"] = True`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00			`num_iterations = 1`
[RLlib] Issue #13802: Enhance metrics for `multiagent->count_steps_by=agent_steps` setting. (#14033) 2021-03-18 20:27:41 +01:00			`env = "CartPole-v0"`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00
[RLlib] Tf2.x native. (#8752) 2020-07-11 22:06:35 +02:00			`for _ in framework_iterator(config):`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00			`local_cfg = config.copy()`
[RLlib] Issue #13802: Enhance metrics for `multiagent->count_steps_by=agent_steps` setting. (#14033) 2021-03-18 20:27:41 +01:00			`for lstm in [False, True]:`
			`local_cfg["num_aggregation_workers"] = 0 if not lstm else 1`
			`local_cfg["model"]["use_lstm"] = lstm`
			`print("lstm={} aggregation-worker={}".format(`
			`lstm, local_cfg["num_aggregation_workers"]))`
			`# Test with and w/o aggregation workers (this has nothing`
			`# to do with LSTMs, though).`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00			`trainer = impala.ImpalaTrainer(config=local_cfg, env=env)`
			`for i in range(num_iterations):`
			`print(trainer.train())`
This PR fixes the currently broken lstm_use_prev_action_reward flag for default lstm models (model.use_lstm=True). (#8970) 2020-06-27 20:50:01 +02:00			`check_compute_single_action(`
			`trainer,`
[RLlib] Issue #13802: Enhance metrics for `multiagent->count_steps_by=agent_steps` setting. (#14033) 2021-03-18 20:27:41 +01:00			`include_state=lstm,`
			`include_prev_action_reward=lstm,`
			`)`
[rllib] Distributed exec workflow for impala (#8321) 2020-05-11 20:24:43 -07:00			`trainer.stop()`
[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00
[RLlib] Minor fixes (torch GPU bugs + some cleanup). (#11609) 2020-10-27 10:00:24 +01:00			`def test_impala_lr_schedule(self):`
			`config = impala.DEFAULT_CONFIG.copy()`
[RLlib] Issues: 17397, 17425, 16715, 17174. When on driver, Torch\|TFPolicy should not use `ray.get_gpu_ids()` (b/c no GPUs assigned by ray). (#17444) 2021-08-02 17:29:59 -04:00			`config["num_gpus"] = 0`
[RLlib] Discussion 1928: Initial lr wrong if schedule used that includes ts=0 (both tf and torch). (#15538) 2021-04-27 17:19:52 +02:00			`# Test whether we correctly ignore the "lr" setting.`
			`# The first lr should be 0.0005.`
			`config["lr"] = 0.1`
[RLlib] Minor fixes (torch GPU bugs + some cleanup). (#11609) 2020-10-27 10:00:24 +01:00			`config["lr_schedule"] = [`
			`[0, 0.0005],`
			`[10000, 0.000001],`
			`]`
[RLlib] Refactor: All tf static graph code should reside inside Policy class. (#17169) 2021-07-20 14:58:13 -04:00			`config["num_gpus"] = 0 # Do not use any (fake) GPUs.`
[RLlib] Discussion 1928: Initial lr wrong if schedule used that includes ts=0 (both tf and torch). (#15538) 2021-04-27 17:19:52 +02:00			`config["env"] = "CartPole-v0"`
[RLlib] Minor fixes (torch GPU bugs + some cleanup). (#11609) 2020-10-27 10:00:24 +01:00
			`def get_lr(result):`
[RLlib] BC/MARWIL/recurrent nets minor cleanups and bug fixes. (#13064) 2020-12-27 09:46:03 -05:00			`return result["info"]["learner"][DEFAULT_POLICY_ID]["cur_lr"]`
[RLlib] Minor fixes (torch GPU bugs + some cleanup). (#11609) 2020-10-27 10:00:24 +01:00
[RLlib] Discussion 1928: Initial lr wrong if schedule used that includes ts=0 (both tf and torch). (#15538) 2021-04-27 17:19:52 +02:00			`for fw in framework_iterator(config, frameworks=("tf", "torch")):`
			`trainer = impala.ImpalaTrainer(config=config)`
			`policy = trainer.get_policy()`

			`try:`
			`if fw == "tf":`
[RLlib] Implement policy_maps (multi-agent case) in RolloutWorkers as LRU caches. (#17031) 2021-07-19 13:16:03 -04:00			`check(policy.get_session().run(policy.cur_lr), 0.0005)`
[RLlib] Discussion 1928: Initial lr wrong if schedule used that includes ts=0 (both tf and torch). (#15538) 2021-04-27 17:19:52 +02:00			`else:`
			`check(policy.cur_lr, 0.0005)`
			`r1 = trainer.train()`
			`r2 = trainer.train()`
			`assert get_lr(r2) < get_lr(r1), (r1, r2)`
			`finally:`
			`trainer.stop()`
[RLlib] Minor fixes (torch GPU bugs + some cleanup). (#11609) 2020-10-27 10:00:24 +01:00
[RLlib] Refactor: All tf static graph code should reside inside Policy class. (#17169) 2021-07-20 14:58:13 -04:00			`def test_impala_fake_multi_gpu_learning(self):`
			`"""Test whether IMPALATrainer can learn CartPole w/ faked multi-GPU."""`
			`config = copy.deepcopy(impala.DEFAULT_CONFIG)`
			`# Fake GPU setup.`
			`config["_fake_gpus"] = True`
			`config["num_gpus"] = 2`

			`config["train_batch_size"] *= 2`

			`# Test w/ LSTMs.`
			`config["model"]["use_lstm"] = True`

			`for _ in framework_iterator(config, frameworks=("tf", "torch")):`
			`trainer = impala.ImpalaTrainer(config=config, env="CartPole-v0")`
			`num_iterations = 200`
			`learnt = False`
			`for i in range(num_iterations):`
			`results = trainer.train()`
			`print(results)`
			`if results["episode_reward_mean"] > 55.0:`
			`learnt = True`
			`break`
			`assert learnt, \`
			`"IMPALA multi-GPU (with fake-GPUs) did not learn CartPole!"`
			`trainer.stop()`

[RLlib] IMPALA PyTorch (#8287) This PR adds an IMPALA PyTorch implementation. - adds compilation tests for LSTM and w/o LSTM. - adds learning test for CartPole. 2020-05-03 13:44:25 +02:00
			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
			`sys.exit(pytest.main(["-v", __file__]))`