import gym
import numpy as np
import unittest

import ray
import ray.rllib.algorithms.pg as pg
from ray.rllib.algorithms.registry import get_algorithm_class
from ray.rllib.examples.env.random_env import RandomEnv
from ray.rllib.utils.test_utils import framework_iterator
from ray.tune.registry import register_env


class FaultInjectEnv(gym.Env):
    """Env that fails upon calling `step()`, but only for some remote worker indices.

    The worker indices that should produce the failure (a ValueError) can be
    provided by a list (of ints) under the "bad_indices" key in the env's
    config.

    Examples:
        >>> from ray.rllib.env.env_context import EnvContext
        >>> # This env will fail for workers 1 and 2 (not for the local worker
        >>> # or any others with an index != [1|2]).
        >>> bad_env = FaultInjectEnv(
        ...    EnvContext({"bad_indices": [1, 2]},
        ...               worker_index=1, num_workers=3))

        >>> from ray.rllib.env.env_context import EnvContext
        >>> # This env will fail only on the first evaluation worker, not on the first
        >>> # regular rollout worker.
        >>> bad_env = FaultInjectEnv(
        ...    EnvContext({"bad_indices": [1], "eval_only": True},
        ...               worker_index=2, num_workers=5))
    """

    def __init__(self, config):
        self.env = gym.make("CartPole-v0")
        self._skip_env_checking = True
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
        self.config = config

    def reset(self):
        return self.env.reset()

    def step(self, action):
        # Only fail on the original workers with the specified indices.
        # Once on a recreated worker, don't fail anymore.
        if (
            self.config.worker_index in self.config.get("bad_indices", [])
            and not self.config.recreated_worker
        ):
            raise ValueError(
                "This is a simulated error from "
                f"worker-idx={self.config.worker_index}!"
            )
        return self.env.step(action)


class TestWorkerFailure(unittest.TestCase):
    @classmethod
    def setUpClass(cls) -> None:
        ray.init(num_cpus=6)

    @classmethod
    def tearDownClass(cls) -> None:
        ray.shutdown()

    def do_test(self, alg: str, config: dict, fn=None, eval_only=False):
        fn = fn or self._do_test_fault_ignore
        fn(alg, config, eval_only)

    def _do_test_fault_ignore(self, algo: str, config: dict, eval_only: bool = False):
        register_env("fault_env", lambda c: FaultInjectEnv(c))
        algo_cls = get_algorithm_class(algo)

        # Test fault handling
        if not eval_only:
            config["num_workers"] = 2
            config["ignore_worker_failures"] = True
            # Make worker idx=1 fail. Other workers will be ok.
            config["env_config"] = {"bad_indices": [1]}
        else:
            config["num_workers"] = 1
            config["evaluation_num_workers"] = 2
            config["evaluation_interval"] = 1
            config["evaluation_config"] = {
                "ignore_worker_failures": True,
                "env_config": {
                    # Make worker idx=1 fail. Other workers will be ok.
                    "bad_indices": [1],
                },
            }

        for _ in framework_iterator(config, frameworks=("tf2", "torch")):
            algo = algo_cls(config=config, env="fault_env")
            result = algo.train()
            if not eval_only:
                self.assertTrue(result["num_healthy_workers"] == 1)
            else:
                self.assertTrue(result["num_healthy_workers"] == 1)
                self.assertTrue(result["evaluation"]["num_healthy_workers"] == 1)
            algo.stop()

    def _do_test_fault_fatal(self, alg, config, eval_only=False):
        register_env("fault_env", lambda c: FaultInjectEnv(c))
        agent_cls = get_algorithm_class(alg)

        # Test raises real error when out of workers.
        if not eval_only:
            config["num_workers"] = 2
            config["ignore_worker_failures"] = True
            # Make both worker idx=1 and 2 fail.
            config["env_config"] = {"bad_indices": [1, 2]}
        else:
            config["num_workers"] = 1
            config["evaluation_num_workers"] = 1
            config["evaluation_interval"] = 1
            config["evaluation_config"] = {
                "ignore_worker_failures": True,
                # Make eval worker (index 1) fail.
                "env_config": {
                    "bad_indices": [1],
                },
            }

        for _ in framework_iterator(config, frameworks=("torch", "tf")):
            a = agent_cls(config=config, env="fault_env")
            self.assertRaises(Exception, lambda: a.train())
            a.stop()

    def _do_test_fault_fatal_but_recreate(self, alg, config, eval_only=False):
        register_env("fault_env", lambda c: FaultInjectEnv(c))
        agent_cls = get_algorithm_class(alg)

        # Test raises real error when out of workers.
        if not eval_only:
            config["num_workers"] = 2
            config["recreate_failed_workers"] = True
            # Make both worker idx=1 and 2 fail.
            config["env_config"] = {"bad_indices": [1, 2]}
        else:
            config["num_workers"] = 1
            config["evaluation_num_workers"] = 1
            config["evaluation_interval"] = 1
            config["evaluation_config"] = {
                "recreate_failed_workers": True,
                # Make eval worker (index 1) fail.
                "env_config": {
                    "bad_indices": [1],
                },
            }

        for _ in framework_iterator(config, frameworks=("tf2", "torch")):
            a = agent_cls(config=config, env="fault_env")
            # Expect this to go well and all faulty workers are recovered.
            self.assertTrue(
                not any(
                    ray.get(
                        worker.apply.remote(
                            lambda w: w.recreated_worker
                            or w.env_context.recreated_worker
                        )
                    )
                    for worker in a.workers.remote_workers()
                )
            )
            result = a.train()
            if not eval_only:
                self.assertTrue(result["num_healthy_workers"] == 2)
                self.assertTrue(
                    all(
                        ray.get(
                            worker.apply.remote(
                                lambda w: w.recreated_worker
                                and w.env_context.recreated_worker
                            )
                        )
                        for worker in a.workers.remote_workers()
                    )
                )
            else:
                self.assertTrue(result["num_healthy_workers"] == 1)
                self.assertTrue(result["evaluation"]["num_healthy_workers"] == 1)
            # This should also work several times.
            result = a.train()
            if not eval_only:
                self.assertTrue(result["num_healthy_workers"] == 2)
            else:
                self.assertTrue(result["num_healthy_workers"] == 1)
                self.assertTrue(result["evaluation"]["num_healthy_workers"] == 1)
            a.stop()

    def test_fatal(self):
        # Test the case where all workers fail (w/o recovery).
        self.do_test("PG", {"optimizer": {}}, fn=self._do_test_fault_fatal)

    def test_fatal_but_recover(self):
        # Test the case where all workers fail, but we chose to recover.
        self.do_test("PG", {"optimizer": {}}, fn=self._do_test_fault_fatal_but_recreate)

    def test_async_grads(self):
        self.do_test("A3C", {"optimizer": {"grads_per_step": 1}})

    def test_async_replay(self):
        self.do_test(
            "APEX",
            {
                "num_gpus": 0,
                "min_sample_timesteps_per_iteration": 1000,
                "min_time_s_per_iteration": 1,
                "explore": False,
                "learning_starts": 1000,
                "target_network_update_freq": 100,
                "optimizer": {
                    "num_replay_buffer_shards": 1,
                },
            },
        )

    def test_async_samples(self):
        self.do_test("IMPALA", {"num_gpus": 0})

    def test_sync_replay(self):
        self.do_test("DQN", {"min_sample_timesteps_per_iteration": 1})

    def test_multi_g_p_u(self):
        self.do_test(
            "PPO",
            {
                "num_sgd_iter": 1,
                "train_batch_size": 10,
                "rollout_fragment_length": 10,
                "sgd_minibatch_size": 1,
            },
        )

    def test_sync_samples(self):
        self.do_test("PG", {"optimizer": {}})

    def test_async_sampling_option(self):
        self.do_test("PG", {"optimizer": {}, "sample_async": True})

    def test_eval_workers_failing_ignore(self):
        # Test the case where one eval worker fails, but we chose to ignore.
        self.do_test(
            "PG",
            config={"model": {"fcnet_hiddens": [4]}},
            eval_only=True,
        )

    def test_eval_workers_failing_recreate(self):
        # Test the case where all eval workers fail, but we chose to recover.
        self.do_test(
            "PG",
            config={"model": {"fcnet_hiddens": [4]}},
            fn=self._do_test_fault_fatal_but_recreate,
            eval_only=True,
        )

    def test_eval_workers_failing_fatal(self):
        # Test the case where all eval workers fail (w/o recovery).
        self.do_test(
            "PG",
            config={"model": {"fcnet_hiddens": [4]}},
            fn=self._do_test_fault_fatal,
            eval_only=True,
        )

    def test_eval_workers_on_infinite_episodes(self):
        """Tests whether eval workers warn appropriately after some episode timeout."""
        # Create infinitely running episodes, but with horizon setting (RLlib will
        # auto-terminate the episode). However, in the eval workers, don't set a
        # horizon -> Expect warning and no proper evaluation results.
        config = (
            pg.PGConfig()
            .rollouts(num_rollout_workers=2, horizon=100)
            .reporting(metrics_episode_collection_timeout_s=5.0)
            .environment(env=RandomEnv, env_config={"p_done": 0.0})
            .evaluation(
                evaluation_num_workers=2,
                evaluation_interval=1,
                evaluation_sample_timeout_s=5.0,
                evaluation_config={
                    "horizon": None,
                },
            )
        )
        algo = config.build()
        results = algo.train()
        self.assertTrue(np.isnan(results["evaluation"]["episode_reward_mean"]))


if __name__ == "__main__":
    import sys

    import pytest

    sys.exit(pytest.main(["-v", __file__]))