ray/rllib/algorithms/bc/tests/test_bc.py

import os
from pathlib import Path
import unittest

import ray
import ray.rllib.algorithms.bc as bc
from ray.rllib.utils.framework import try_import_tf
from ray.rllib.utils.test_utils import (
    check_compute_single_action,
    check_train_results,
    framework_iterator,
)

tf1, tf, tfv = try_import_tf()


class TestBC(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        ray.init()

    @classmethod
    def tearDownClass(cls):
        ray.shutdown()

    def test_bc_compilation_and_learning_from_offline_file(self):
        """Test whether BC can be built with all frameworks.

        And learns from a historic-data file (while being evaluated on an
        actual env using evaluation_num_workers > 0).
        """
        rllib_dir = Path(__file__).parent.parent.parent.parent
        print("rllib dir={}".format(rllib_dir))
        data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json")
        print("data_file={} exists={}".format(data_file, os.path.isfile(data_file)))

        config = (
            bc.BCConfig()
            .evaluation(
                evaluation_interval=3,
                evaluation_num_workers=1,
                evaluation_duration=5,
                evaluation_parallel_to_training=True,
                evaluation_config={"input": "sampler"},
            )
            .offline_data(input_=[data_file])
        )
        num_iterations = 350
        min_reward = 75.0

        # Test for all frameworks.
        for _ in framework_iterator(config, frameworks=("tf", "torch")):
            trainer = config.build(env="CartPole-v0")
            learnt = False
            for i in range(num_iterations):
                results = trainer.train()
                check_train_results(results)
                print(results)

                eval_results = results.get("evaluation")
                if eval_results:
                    print("iter={} R={}".format(i, eval_results["episode_reward_mean"]))
                    # Learn until good reward is reached in the actual env.
                    if eval_results["episode_reward_mean"] > min_reward:
                        print("learnt!")
                        learnt = True
                        break

            if not learnt:
                raise ValueError(
                    "`BC` did not reach {} reward from expert offline "
                    "data!".format(min_reward)
                )

            check_compute_single_action(trainer, include_prev_action_reward=True)

            trainer.stop()


if __name__ == "__main__":
    import pytest
    import sys

    sys.exit(pytest.main(["-v", __file__]))
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00			`import os`
			`from pathlib import Path`
			`import unittest`

			`import ray`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`import ray.rllib.algorithms.bc as bc`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00			`from ray.rllib.utils.framework import try_import_tf`
			`from ray.rllib.utils.test_utils import (`
			`check_compute_single_action,`
[RLlib] Unify all RLlib Trainer.train() -> results[info][learner][policy ID][learner_stats] and add structure tests. (#18879) 2021-09-30 16:39:05 +02:00			`check_train_results,`
			`framework_iterator,`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`)`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00
			`tf1, tf, tfv = try_import_tf()`


			`class TestBC(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls):`
			`ray.init()`

			`@classmethod`
			`def tearDownClass(cls):`
			`ray.shutdown()`

			`def test_bc_compilation_and_learning_from_offline_file(self):`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`"""Test whether BC can be built with all frameworks.`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00
[RLlib] BC/MARWIL/recurrent nets minor cleanups and bug fixes. (#13064) 2020-12-27 09:46:03 -05:00			`And learns from a historic-data file (while being evaluated on an`
			`actual env using evaluation_num_workers > 0).`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00			`"""`
			`rllib_dir = Path(__file__).parent.parent.parent.parent`
			`print("rllib dir={}".format(rllib_dir))`
			`data_file = os.path.join(rllib_dir, "tests/data/cartpole/large.json")`
			`print("data_file={} exists={}".format(data_file, os.path.isfile(data_file)))`

[RLlib] MARWIL and BC Config. (#24853) 2022-05-21 03:50:20 -07:00			`config = (`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`bc.BCConfig()`
[RLlib] MARWIL and BC Config. (#24853) 2022-05-21 03:50:20 -07:00			`.evaluation(`
			`evaluation_interval=3,`
			`evaluation_num_workers=1,`
			`evaluation_duration=5,`
			`evaluation_parallel_to_training=True,`
			`evaluation_config={"input": "sampler"},`
			`)`
			`.offline_data(input_=[data_file])`
			`)`
[RLlib] Fix test_bc.py test case. (#11722) * Fix large json test file. * Fix large json test file. * WIP. 2020-10-31 08:16:09 +01:00			`num_iterations = 350`
[RLlib] MARWIL and BC Config. (#24853) 2022-05-21 03:50:20 -07:00			`min_reward = 75.0`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00
			`# Test for all frameworks.`
			`for _ in framework_iterator(config, frameworks=("tf", "torch")):`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			`trainer = config.build(env="CartPole-v0")`
[RLlib] Fix test_bc.py test case. (#11722) * Fix large json test file. * Fix large json test file. * WIP. 2020-10-31 08:16:09 +01:00			`learnt = False`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00			`for i in range(num_iterations):`
[RLlib] Unify all RLlib Trainer.train() -> results[info][learner][policy ID][learner_stats] and add structure tests. (#18879) 2021-09-30 16:39:05 +02:00			`results = trainer.train()`
			`check_train_results(results)`
			`print(results)`

			`eval_results = results.get("evaluation")`
[RLlib] MARWIL + BC: Various fixes and enhancements. (#16218) 2021-06-03 22:29:00 +02:00			`if eval_results:`
			`print("iter={} R={}".format(i, eval_results["episode_reward_mean"]))`
			`# Learn until good reward is reached in the actual env.`
			`if eval_results["episode_reward_mean"] > min_reward:`
			`print("learnt!")`
			`learnt = True`
			`break`
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00
[RLlib] Fix test_bc.py test case. (#11722) * Fix large json test file. * Fix large json test file. * WIP. 2020-10-31 08:16:09 +01:00			`if not learnt:`
			`raise ValueError(`
[RLlib] Move all remaining algos into `algorithms` directory. (#25366) 2022-06-04 07:35:24 +02:00			"`BC` did not reach {} reward from expert offline "
[RLlib] Fix test_bc.py test case. (#11722) * Fix large json test file. * Fix large json test file. * WIP. 2020-10-31 08:16:09 +01:00			`"data!".format(min_reward)`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00			`)`
[RLlib] Fix test_bc.py test case. (#11722) * Fix large json test file. * Fix large json test file. * WIP. 2020-10-31 08:16:09 +01:00
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00			`check_compute_single_action(trainer, include_prev_action_reward=True)`

			`trainer.stop()`


			`if __name__ == "__main__":`
			`import pytest`
			`import sys`
[CI] Format Python code with Black (#21975) See #21316 and #21311 for the motivation behind these changes. 2022-01-29 18:41:57 -08:00
[RLlib] Behavioral Cloning (from MARWIL). (#10619) 2020-09-09 17:33:21 +02:00			`sys.exit(pytest.main(["-v", __file__]))`