ray/rllib/tests/test_checkpoint_restore.py

#!/usr/bin/env python

import numpy as np
import unittest

import ray
from ray.rllib.agents.registry import get_trainer_class
from ray.rllib.utils.test_utils import check, framework_iterator


def get_mean_action(alg, obs):
    out = []
    for _ in range(2000):
        out.append(float(alg.compute_action(obs)))
    return np.mean(out)


CONFIGS = {
    "A3C": {
        "explore": False,
        "num_workers": 1,
    },
    "APEX_DDPG": {
        "explore": False,
        "observation_filter": "MeanStdFilter",
        "num_workers": 2,
        "min_iter_time_s": 1,
        "optimizer": {
            "num_replay_buffer_shards": 1,
        },
    },
    "ARS": {
        "explore": False,
        "num_rollouts": 10,
        "num_workers": 2,
        "noise_size": 2500000,
        "observation_filter": "MeanStdFilter",
    },
    "DDPG": {
        "explore": False,
        "timesteps_per_iteration": 100,
    },
    "DQN": {
        "explore": False,
    },
    "ES": {
        "explore": False,
        "episodes_per_batch": 10,
        "train_batch_size": 100,
        "num_workers": 2,
        "noise_size": 2500000,
        "observation_filter": "MeanStdFilter",
    },
    "PPO": {
        "explore": False,
        "num_sgd_iter": 5,
        "train_batch_size": 1000,
        "num_workers": 2,
    },
    "SAC": {
        "explore": False,
    },
}


def ckpt_restore_test(alg_name, tfe=False, object_store=False):
    config = CONFIGS[alg_name]
    frameworks = (["tfe"] if tfe else []) + ["torch", "tf"]
    for fw in framework_iterator(config, frameworks=frameworks):
        for use_object_store in ([False, True] if object_store else [False]):
            print("use_object_store={}".format(use_object_store))
            cls = get_trainer_class(alg_name)
            if "DDPG" in alg_name or "SAC" in alg_name:
                alg1 = cls(config=config, env="Pendulum-v0")
                alg2 = cls(config=config, env="Pendulum-v0")
            else:
                alg1 = cls(config=config, env="CartPole-v0")
                alg2 = cls(config=config, env="CartPole-v0")

            policy1 = alg1.get_policy()

            for _ in range(1):
                res = alg1.train()
                print("current status: " + str(res))

            # Check optimizer state as well.
            optim_state = policy1.get_state().get("_optimizer_variables")

            # Sync the models
            if use_object_store:
                alg2.restore_from_object(alg1.save_to_object())
            else:
                alg2.restore(alg1.save())

            # Compare optimizer state with re-loaded one.
            if optim_state:
                s2 = alg2.get_policy().get_state().get("_optimizer_variables")
                # Tf -> Compare states 1:1.
                if fw in ["tf2", "tf", "tfe"]:
                    check(s2, optim_state)
                # For torch, optimizers have state_dicts with keys=params,
                # which are different for the two models (ignore these
                # different keys, but compare all values nevertheless).
                else:
                    for i, s2_ in enumerate(s2):
                        check(
                            list(s2_["state"].values()),
                            list(optim_state[i]["state"].values()))

            for _ in range(1):
                if "DDPG" in alg_name or "SAC" in alg_name:
                    obs = np.clip(
                        np.random.uniform(size=3),
                        policy1.observation_space.low,
                        policy1.observation_space.high)
                else:
                    obs = np.clip(
                        np.random.uniform(size=4),
                        policy1.observation_space.low,
                        policy1.observation_space.high)
                a1 = get_mean_action(alg1, obs)
                a2 = get_mean_action(alg2, obs)
                print("Checking computed actions", alg1, obs, a1, a2)
                if abs(a1 - a2) > .1:
                    raise AssertionError("algo={} [a1={} a2={}]".format(
                        alg_name, a1, a2))
            # Stop both Trainers.
            alg1.stop()
            alg2.stop()


class TestCheckpointRestorePG(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        ray.init(num_cpus=5)

    @classmethod
    def tearDownClass(cls):
        ray.shutdown()

    def test_a3c_checkpoint_restore(self):
        ckpt_restore_test("A3C")

    def test_ppo_checkpoint_restore(self):
        ckpt_restore_test("PPO", object_store=True)


class TestCheckpointRestoreOffPolicy(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        ray.init(num_cpus=5)

    @classmethod
    def tearDownClass(cls):
        ray.shutdown()

    def test_apex_ddpg_checkpoint_restore(self):
        ckpt_restore_test("APEX_DDPG")

    def test_ddpg_checkpoint_restore(self):
        ckpt_restore_test("DDPG")

    def test_dqn_checkpoint_restore(self):
        ckpt_restore_test("DQN", object_store=True)

    def test_sac_checkpoint_restore(self):
        ckpt_restore_test("SAC")


class TestCheckpointRestoreEvolutionAlgos(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        ray.init(num_cpus=5)

    @classmethod
    def tearDownClass(cls):
        ray.shutdown()

    def test_ars_checkpoint_restore(self):
        ckpt_restore_test("ARS")

    def test_es_checkpoint_restore(self):
        ckpt_restore_test("ES")


if __name__ == "__main__":
    import pytest
    import sys

    # One can specify the specific TestCase class to run.
    # None for all unittest.TestCase classes in this file.
    class_ = sys.argv[1] if len(sys.argv) > 1 else None
    sys.exit(
        pytest.main(
            ["-v", __file__ + ("" if class_ is None else "::" + class_)]))
[rllib] Full checkpoint/restore for all algorithms (#875) * wip * working for all but dqn * update * add train * rename * update * Update test 2017-08-27 18:56:52 -07:00			`#!/usr/bin/env python`

[rllib] Fix issues with PPO model restoration (#1018) * fix filter * add test * lint * fix * commit * Update a3c.py 2017-09-28 13:12:06 -07:00			`import numpy as np`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import unittest`
[rllib] Full checkpoint/restore for all algorithms (#875) * wip * working for all but dqn * update * add train * rename * update * Update test 2017-08-27 18:56:52 -07:00
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import ray`
[RLlib] Allow `rllib rollout` to run distributed via evaluation workers. (#13718) 2021-02-08 12:05:16 +01:00			`from ray.rllib.agents.registry import get_trainer_class`
[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`from ray.rllib.utils.test_utils import check, framework_iterator`
[rllib] Fix issues with PPO model restoration (#1018) * fix filter * add test * lint * fix * commit * Update a3c.py 2017-09-28 13:12:06 -07:00

			`def get_mean_action(alg, obs):`
			`out = []`
			`for _ in range(2000):`
			`out.append(float(alg.compute_action(obs)))`
			`return np.mean(out)`


[rllib] Initial work on integrating hyperparameter search tool (#1107) * clean up train * update * update train script * add tuned examples * add agent catalog * add tune lib * update * fix * testS * remove * train docs * comments * todo * fix resource parsing * fix cr test * add test * try to fix travis test 2017-10-13 16:18:16 -07:00			`CONFIGS = {`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`"A3C": {`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`"explore": False,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"num_workers": 1,`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`},`
[rllib] Auto-synchronize filters for all agents (#2791) This makes sure we always update the local filter, and adds an option to synchronize the remote filters as well. In APEX_DDPG we previously didn't do either. The first is needed for checkpoint correctness, the second might help performance. 2018-09-03 20:01:53 -07:00			`"APEX_DDPG": {`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`"explore": False,`
[rllib] Auto-synchronize filters for all agents (#2791) This makes sure we always update the local filter, and adds an option to synchronize the remote filters as well. In APEX_DDPG we previously didn't do either. The first is needed for checkpoint correctness, the second might help performance. 2018-09-03 20:01:53 -07:00			`"observation_filter": "MeanStdFilter",`
			`"num_workers": 2,`
			`"min_iter_time_s": 1,`
			`"optimizer": {`
			`"num_replay_buffer_shards": 1,`
			`},`
			`},`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`"ARS": {`
			`"explore": False,`
			`"num_rollouts": 10,`
			`"num_workers": 2,`
			`"noise_size": 2500000,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"observation_filter": "MeanStdFilter",`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`},`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`"DDPG": {`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`"explore": False,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"timesteps_per_iteration": 100,`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`},`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`"DQN": {`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"explore": False,`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`},`
			`"ES": {`
			`"explore": False,`
			`"episodes_per_batch": 10,`
			`"train_batch_size": 100,`
			`"num_workers": 2,`
			`"noise_size": 2500000,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"observation_filter": "MeanStdFilter",`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`},`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`"PPO": {`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`"explore": False,`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`"num_sgd_iter": 5,`
[rllib] clarify train batch size for PPO (#2793) It's possible to configure PPO in a way that ends up discarding most of the samples (they are treated as "stragglers"). Add a warning when this happens, and raise an exception if the waste is particularly egregious. 2018-09-05 12:06:13 -07:00			`"train_batch_size": 1000,`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`"num_workers": 2,`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`},`
[RLlib] Issue 7046 cannot restore keras model from h5 file. (#7482) 2020-03-23 20:19:30 +01:00			`"SAC": {`
[RLlib] DDPG refactor and Exploration API action noise classes. (#7314) * WIP. * WIP. * WIP. * WIP. * WIP. * Fix * WIP. * Add TD3 quick Pendulum regresison. * Cleanup. * Fix. * LINT. * Fix. * Sort quick_learning test cases, add TD3. * Sort quick_learning test cases, add TD3. * Revert test_checkpoint_restore.py (debugging) changes. * Fix old soft_q settings in documentation and test configs. * More doc fixes. * Fix test case. * Fix test case. * Lower test load. * WIP. 2020-03-01 20:53:35 +01:00			`"explore": False,`
[rllib] format with yapf (#2427) * initial yapf * manual fix yapf bugs 2018-07-19 15:30:36 -07:00			`},`
[rllib] Initial work on integrating hyperparameter search tool (#1107) * clean up train * update * update train script * add tuned examples * add agent catalog * add tune lib * update * fix * testS * remove * train docs * comments * todo * fix resource parsing * fix cr test * add test * try to fix travis test 2017-10-13 16:18:16 -07:00			`}`

[tune] [rllib] Allow checkpointing to object store instead of local disk (#1212) * wip * use normal pickle * fix checkpoint test * comment * Comment * fix test * fix lint * fix py 3.5 * Update agent.py * fix lint 2017-11-19 00:36:43 -08:00
[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`def ckpt_restore_test(alg_name, tfe=False, object_store=False):`
[RLlib] Auto-framework, retire `use_pytorch` in favor of `framework=...` (#8520) 2020-05-27 16:19:13 +02:00			`config = CONFIGS[alg_name]`
[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`frameworks = (["tfe"] if tfe else []) + ["torch", "tf"]`
			`for fw in framework_iterator(config, frameworks=frameworks):`
[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`for use_object_store in ([False, True] if object_store else [False]):`
[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`print("use_object_store={}".format(use_object_store))`
[RLlib] Allow `rllib rollout` to run distributed via evaluation workers. (#13718) 2021-02-08 12:05:16 +01:00			`cls = get_trainer_class(alg_name)`
[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`if "DDPG" in alg_name or "SAC" in alg_name:`
			`alg1 = cls(config=config, env="Pendulum-v0")`
			`alg2 = cls(config=config, env="Pendulum-v0")`
			`else:`
			`alg1 = cls(config=config, env="CartPole-v0")`
			`alg2 = cls(config=config, env="CartPole-v0")`

			`policy1 = alg1.get_policy()`

			`for _ in range(1):`
			`res = alg1.train()`
			`print("current status: " + str(res))`

			`# Check optimizer state as well.`
			`optim_state = policy1.get_state().get("_optimizer_variables")`

			`# Sync the models`
			`if use_object_store:`
			`alg2.restore_from_object(alg1.save_to_object())`
			`else:`
			`alg2.restore(alg1.save())`

			`# Compare optimizer state with re-loaded one.`
			`if optim_state:`
			`s2 = alg2.get_policy().get_state().get("_optimizer_variables")`
			`# Tf -> Compare states 1:1.`
[RLlib] Fix all example scripts to run on GPUs. (#11105) 2020-10-02 23:07:44 +02:00			`if fw in ["tf2", "tf", "tfe"]:`
[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`check(s2, optim_state)`
			`# For torch, optimizers have state_dicts with keys=params,`
			`# which are different for the two models (ignore these`
			`# different keys, but compare all values nevertheless).`
			`else:`
			`for i, s2_ in enumerate(s2):`
			`check(`
			`list(s2_["state"].values()),`
			`list(optim_state[i]["state"].values()))`

			`for _ in range(1):`
			`if "DDPG" in alg_name or "SAC" in alg_name:`
			`obs = np.clip(`
			`np.random.uniform(size=3),`
			`policy1.observation_space.low,`
			`policy1.observation_space.high)`
			`else:`
			`obs = np.clip(`
			`np.random.uniform(size=4),`
			`policy1.observation_space.low,`
			`policy1.observation_space.high)`
			`a1 = get_mean_action(alg1, obs)`
			`a2 = get_mean_action(alg2, obs)`
			`print("Checking computed actions", alg1, obs, a1, a2)`
			`if abs(a1 - a2) > .1:`
			`raise AssertionError("algo={} [a1={} a2={}]".format(`
			`alg_name, a1, a2))`
Issue 8919 checkpoint at end ignored (#8933) 2020-06-16 08:51:20 +02:00			`# Stop both Trainers.`
			`alg1.stop()`
			`alg2.stop()`
[tune] [rllib] Allow checkpointing to object store instead of local disk (#1212) * wip * use normal pickle * fix checkpoint test * comment * Comment * fix test * fix lint * fix py 3.5 * Update agent.py * fix lint 2017-11-19 00:36:43 -08:00

[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`class TestCheckpointRestorePG(unittest.TestCase):`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`@classmethod`
			`def setUpClass(cls):`
[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`ray.init(num_cpus=5)`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00
			`@classmethod`
			`def tearDownClass(cls):`
			`ray.shutdown()`

[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`def test_a3c_checkpoint_restore(self):`
			`ckpt_restore_test("A3C")`

[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`def test_ppo_checkpoint_restore(self):`
			`ckpt_restore_test("PPO", object_store=True)`


			`class TestCheckpointRestoreOffPolicy(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls):`
			`ray.init(num_cpus=5)`

			`@classmethod`
			`def tearDownClass(cls):`
			`ray.shutdown()`

[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00			`def test_apex_ddpg_checkpoint_restore(self):`
			`ckpt_restore_test("APEX_DDPG")`

			`def test_ddpg_checkpoint_restore(self):`
			`ckpt_restore_test("DDPG")`

			`def test_dqn_checkpoint_restore(self):`
[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`ckpt_restore_test("DQN", object_store=True)`
[RLlib] Issue 8412 (Adam vars not stored in ModelV2). (#8480) 2020-06-05 21:07:02 +02:00
			`def test_sac_checkpoint_restore(self):`
			`ckpt_restore_test("SAC")`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00

[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00			`class TestCheckpointRestoreEvolutionAlgos(unittest.TestCase):`
			`@classmethod`
			`def setUpClass(cls):`
			`ray.init(num_cpus=5)`

			`@classmethod`
			`def tearDownClass(cls):`
			`ray.shutdown()`

			`def test_ars_checkpoint_restore(self):`
			`ckpt_restore_test("ARS")`

			`def test_es_checkpoint_restore(self):`
			`ckpt_restore_test("ES")`


[tune] [rllib] Allow checkpointing to object store instead of local disk (#1212) * wip * use normal pickle * fix checkpoint test * comment * Comment * fix test * fix lint * fix py 3.5 * Update agent.py * fix lint 2017-11-19 00:36:43 -08:00			`if __name__ == "__main__":`
[RLlib] Cleanup/unify all test cases. (#7533) 2020-03-12 04:39:47 +01:00			`import pytest`
			`import sys`
[RLlib] Split test_checkpoint_restore tests into 3 and make each "large" (from "enormous"). (#15499) 2021-04-30 12:33:12 +02:00
			`# One can specify the specific TestCase class to run.`
			`# None for all unittest.TestCase classes in this file.`
			`class_ = sys.argv[1] if len(sys.argv) > 1 else None`
			`sys.exit(`
			`pytest.main(`
			`["-v", __file__ + ("" if class_ is None else "::" + class_)]))`