ray/rllib/agents/a3c/tests/test_a3c.py

import unittest

import ray
import ray.rllib.agents.a3c as a3c
from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID
from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, \
    LEARNER_STATS_KEY
from ray.rllib.utils.test_utils import check_compute_single_action, \
    check_train_results, framework_iterator


class TestA3C(unittest.TestCase):
    """Sanity tests for A2C exec impl."""

    def setUp(self):
        ray.init(num_cpus=4)

    def tearDown(self):
        ray.shutdown()

    def test_a3c_compilation(self):
        """Test whether an A3CTrainer can be built with both frameworks."""
        config = a3c.DEFAULT_CONFIG.copy()
        config["num_workers"] = 2
        config["num_envs_per_worker"] = 2

        num_iterations = 1

        # Test against all frameworks.
        for _ in framework_iterator(config):
            for env in ["CartPole-v1", "Pendulum-v0", "PongDeterministic-v0"]:
                print("env={}".format(env))
                config["model"]["use_lstm"] = env == "CartPole-v1"
                trainer = a3c.A3CTrainer(config=config, env=env)
                for i in range(num_iterations):
                    results = trainer.train()
                    check_train_results(results)
                    print(results)
                check_compute_single_action(
                    trainer, include_state=config["model"]["use_lstm"])
                trainer.stop()

    def test_a3c_entropy_coeff_schedule(self):
        """Test A3CTrainer entropy coeff schedule support."""
        config = a3c.DEFAULT_CONFIG.copy()
        config["num_workers"] = 1
        config["num_envs_per_worker"] = 1
        config["train_batch_size"] = 20
        config["batch_mode"] = "truncate_episodes"
        config["rollout_fragment_length"] = 10
        config["timesteps_per_iteration"] = 20
        # 0 metrics reporting delay, this makes sure timestep,
        # which entropy coeff depends on, is updated after each worker rollout.
        config["min_iter_time_s"] = 0
        # Initial lr, doesn't really matter because of the schedule below.
        config["entropy_coeff"] = 0.01
        schedule = [
            [0, 0.01],
            [60, 0.001],
            [120, 0.0001],
        ]
        config["entropy_coeff_schedule"] = schedule

        def _step_n_times(trainer, n: int):
            """Step trainer n times.

            Returns:
                learning rate at the end of the execution.
            """
            for _ in range(n):
                results = trainer.train()
            return results["info"][LEARNER_INFO][DEFAULT_POLICY_ID][
                LEARNER_STATS_KEY]["entropy_coeff"]

        # Test against all frameworks.
        for _ in framework_iterator(config):
            trainer = a3c.A3CTrainer(config=config, env="CartPole-v1")

            coeff = _step_n_times(trainer, 3)  # 60 timesteps
            # PiecewiseSchedule does interpolation. So roughly 0.001 here.
            self.assertLessEqual(coeff, 0.005)
            self.assertGreaterEqual(coeff, 0.0005)

            coeff = _step_n_times(trainer, 3)  # 120 timesteps
            # PiecewiseSchedule does interpolation. So roughly 0.0001 here.
            self.assertLessEqual(coeff, 0.0005)
            self.assertGreaterEqual(coeff, 0.00005)

            trainer.stop()


if __name__ == "__main__":
    import pytest
    import sys
    sys.exit(pytest.main(["-v", __file__]))
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`import unittest`

			`import ray`
			`import ray.rllib.agents.a3c as a3c`
[RLlib] Added LearningRateSchedule and EntropyCoeffSchedule to TF and Torch versions of A3C and PPO (#19276) 2021-10-25 10:39:35 +03:00			`from ray.rllib.policy.sample_batch import DEFAULT_POLICY_ID`
			`from ray.rllib.utils.metrics.learner_info import LEARNER_INFO, \`
			`LEARNER_STATS_KEY`
[RLlib] Add testing `Policy.compute_single_action()` for all agents. (#8903) 2020-06-13 17:51:50 +02:00			`from ray.rllib.utils.test_utils import check_compute_single_action, \`
[RLlib] Unify all RLlib Trainer.train() -> results[info][learner][policy ID][learner_stats] and add structure tests. (#18879) 2021-09-30 16:39:05 +02:00			`check_train_results, framework_iterator`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00

			`class TestA3C(unittest.TestCase):`
			`"""Sanity tests for A2C exec impl."""`

			`def setUp(self):`
			`ray.init(num_cpus=4)`

			`def tearDown(self):`
			`ray.shutdown()`

			`def test_a3c_compilation(self):`
			`"""Test whether an A3CTrainer can be built with both frameworks."""`
			`config = a3c.DEFAULT_CONFIG.copy()`
			`config["num_workers"] = 2`
			`config["num_envs_per_worker"] = 2`

			`num_iterations = 1`

			`# Test against all frameworks.`
[RLlib] Solve PyTorch/TF-eager A3C async race condition between calling model and its value function. (#13467) 2021-01-18 19:29:03 +01:00			`for _ in framework_iterator(config):`
[RLlib] Test cases/BUILD cleanup; split "everything else" (longest running one rn) tests in 2. (#17640) 2021-08-16 22:01:01 +02:00			`for env in ["CartPole-v1", "Pendulum-v0", "PongDeterministic-v0"]:`
[RLlib] Curiosity enhancements. (#10373) 2020-09-05 13:14:24 +02:00			`print("env={}".format(env))`
[RLlib] Test cases/BUILD cleanup; split "everything else" (longest running one rn) tests in 2. (#17640) 2021-08-16 22:01:01 +02:00			`config["model"]["use_lstm"] = env == "CartPole-v1"`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`trainer = a3c.A3CTrainer(config=config, env=env)`
			`for i in range(num_iterations):`
			`results = trainer.train()`
[RLlib] Unify all RLlib Trainer.train() -> results[info][learner][policy ID][learner_stats] and add structure tests. (#18879) 2021-09-30 16:39:05 +02:00			`check_train_results(results)`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`print(results)`
[RLlib] Test cases/BUILD cleanup; split "everything else" (longest running one rn) tests in 2. (#17640) 2021-08-16 22:01:01 +02:00			`check_compute_single_action(`
			`trainer, include_state=config["model"]["use_lstm"])`
[RLlib] DDPG and SAC eager support (preparation for tf2.x) (#9204) 2020-07-08 16:12:20 +02:00			`trainer.stop()`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
[RLlib] Added LearningRateSchedule and EntropyCoeffSchedule to TF and Torch versions of A3C and PPO (#19276) 2021-10-25 10:39:35 +03:00			`def test_a3c_entropy_coeff_schedule(self):`
			`"""Test A3CTrainer entropy coeff schedule support."""`
			`config = a3c.DEFAULT_CONFIG.copy()`
			`config["num_workers"] = 1`
			`config["num_envs_per_worker"] = 1`
			`config["train_batch_size"] = 20`
			`config["batch_mode"] = "truncate_episodes"`
			`config["rollout_fragment_length"] = 10`
			`config["timesteps_per_iteration"] = 20`
			`# 0 metrics reporting delay, this makes sure timestep,`
			`# which entropy coeff depends on, is updated after each worker rollout.`
			`config["min_iter_time_s"] = 0`
			`# Initial lr, doesn't really matter because of the schedule below.`
			`config["entropy_coeff"] = 0.01`
			`schedule = [`
			`[0, 0.01],`
			`[60, 0.001],`
			`[120, 0.0001],`
			`]`
			`config["entropy_coeff_schedule"] = schedule`

			`def _step_n_times(trainer, n: int):`
			`"""Step trainer n times.`

			`Returns:`
			`learning rate at the end of the execution.`
			`"""`
			`for _ in range(n):`
			`results = trainer.train()`
			`return results["info"][LEARNER_INFO][DEFAULT_POLICY_ID][`
			`LEARNER_STATS_KEY]["entropy_coeff"]`

			`# Test against all frameworks.`
			`for _ in framework_iterator(config):`
			`trainer = a3c.A3CTrainer(config=config, env="CartPole-v1")`

			`coeff = _step_n_times(trainer, 3) # 60 timesteps`
			`# PiecewiseSchedule does interpolation. So roughly 0.001 here.`
			`self.assertLessEqual(coeff, 0.005)`
			`self.assertGreaterEqual(coeff, 0.0005)`

			`coeff = _step_n_times(trainer, 3) # 120 timesteps`
			`# PiecewiseSchedule does interpolation. So roughly 0.0001 here.`
			`self.assertLessEqual(coeff, 0.0005)`
			`self.assertGreaterEqual(coeff, 0.00005)`

			`trainer.stop()`

[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00
			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
			`sys.exit(pytest.main(["-v", __file__]))`