from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import pickle

from gym import spaces
from gym.envs.registration import EnvSpec
import gym
import torch.nn as nn
import unittest

import ray
from ray.rllib.agents.a3c import A2CTrainer
from ray.rllib.agents.pg import PGTrainer
from ray.rllib.agents.pg.pg_policy import PGTFPolicy
from ray.rllib.env import MultiAgentEnv
from ray.rllib.env.base_env import BaseEnv
from ray.rllib.env.vector_env import VectorEnv
from ray.rllib.models import ModelCatalog
from ray.rllib.models.model import Model
from ray.rllib.models.torch.fcnet import FullyConnectedNetwork
from ray.rllib.models.torch.torch_modelv2 import TorchModelV2
from ray.rllib.rollout import rollout
from ray.rllib.tests.test_external_env import SimpleServing
from ray.tune.registry import register_env
from ray.rllib.utils import try_import_tf

tf = try_import_tf()

DICT_SPACE = spaces.Dict({
    "sensors": spaces.Dict({
        "position": spaces.Box(low=-100, high=100, shape=(3, )),
        "velocity": spaces.Box(low=-1, high=1, shape=(3, )),
        "front_cam": spaces.Tuple(
            (spaces.Box(low=0, high=1, shape=(10, 10, 3)),
             spaces.Box(low=0, high=1, shape=(10, 10, 3)))),
        "rear_cam": spaces.Box(low=0, high=1, shape=(10, 10, 3)),
    }),
    "inner_state": spaces.Dict({
        "charge": spaces.Discrete(100),
        "job_status": spaces.Dict({
            "task": spaces.Discrete(5),
            "progress": spaces.Box(low=0, high=100, shape=()),
        })
    })
})

DICT_SAMPLES = [DICT_SPACE.sample() for _ in range(10)]

TUPLE_SPACE = spaces.Tuple([
    spaces.Box(low=-100, high=100, shape=(3, )),
    spaces.Tuple((spaces.Box(low=0, high=1, shape=(10, 10, 3)),
                  spaces.Box(low=0, high=1, shape=(10, 10, 3)))),
    spaces.Discrete(5),
])

TUPLE_SAMPLES = [TUPLE_SPACE.sample() for _ in range(10)]


def one_hot(i, n):
    out = [0.0] * n
    out[i] = 1.0
    return out


class NestedDictEnv(gym.Env):
    def __init__(self):
        self.action_space = spaces.Discrete(2)
        self.observation_space = DICT_SPACE
        self._spec = EnvSpec("NestedDictEnv-v0")
        self.steps = 0

    def reset(self):
        self.steps = 0
        return DICT_SAMPLES[0]

    def step(self, action):
        self.steps += 1
        return DICT_SAMPLES[self.steps], 1, self.steps >= 5, {}


class NestedTupleEnv(gym.Env):
    def __init__(self):
        self.action_space = spaces.Discrete(2)
        self.observation_space = TUPLE_SPACE
        self._spec = EnvSpec("NestedTupleEnv-v0")
        self.steps = 0

    def reset(self):
        self.steps = 0
        return TUPLE_SAMPLES[0]

    def step(self, action):
        self.steps += 1
        return TUPLE_SAMPLES[self.steps], 1, self.steps >= 5, {}


class NestedMultiAgentEnv(MultiAgentEnv):
    def __init__(self):
        self.steps = 0

    def reset(self):
        return {
            "dict_agent": DICT_SAMPLES[0],
            "tuple_agent": TUPLE_SAMPLES[0],
        }

    def step(self, actions):
        self.steps += 1
        obs = {
            "dict_agent": DICT_SAMPLES[self.steps],
            "tuple_agent": TUPLE_SAMPLES[self.steps],
        }
        rew = {
            "dict_agent": 0,
            "tuple_agent": 0,
        }
        dones = {"__all__": self.steps >= 5}
        infos = {
            "dict_agent": {},
            "tuple_agent": {},
        }
        return obs, rew, dones, infos


class InvalidModel(Model):
    def _build_layers_v2(self, input_dict, num_outputs, options):
        return "not", "valid"


class InvalidModel2(Model):
    def _build_layers_v2(self, input_dict, num_outputs, options):
        return tf.constant(0), tf.constant(0)


class TorchSpyModel(TorchModelV2, nn.Module):
    capture_index = 0

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name):
        TorchModelV2.__init__(self, obs_space, action_space, num_outputs,
                              model_config, name)
        nn.Module.__init__(self)
        self.fc = FullyConnectedNetwork(
            obs_space.original_space.spaces["sensors"].spaces["position"],
            action_space, num_outputs, model_config, name)

    def forward(self, input_dict, state, seq_lens):
        pos = input_dict["obs"]["sensors"]["position"].numpy()
        front_cam = input_dict["obs"]["sensors"]["front_cam"][0].numpy()
        task = input_dict["obs"]["inner_state"]["job_status"]["task"].numpy()
        ray.experimental.internal_kv._internal_kv_put(
            "torch_spy_in_{}".format(TorchSpyModel.capture_index),
            pickle.dumps((pos, front_cam, task)),
            overwrite=True)
        TorchSpyModel.capture_index += 1
        return self.fc({
            "obs": input_dict["obs"]["sensors"]["position"]
        }, state, seq_lens)

    def value_function(self):
        return self.fc.value_function()


class DictSpyModel(Model):
    capture_index = 0

    def _build_layers_v2(self, input_dict, num_outputs, options):
        def spy(pos, front_cam, task):
            # TF runs this function in an isolated context, so we have to use
            # redis to communicate back to our suite
            ray.experimental.internal_kv._internal_kv_put(
                "d_spy_in_{}".format(DictSpyModel.capture_index),
                pickle.dumps((pos, front_cam, task)),
                overwrite=True)
            DictSpyModel.capture_index += 1
            return 0

        spy_fn = tf.py_func(
            spy, [
                input_dict["obs"]["sensors"]["position"],
                input_dict["obs"]["sensors"]["front_cam"][0],
                input_dict["obs"]["inner_state"]["job_status"]["task"]
            ],
            tf.int64,
            stateful=True)

        with tf.control_dependencies([spy_fn]):
            output = tf.layers.dense(input_dict["obs"]["sensors"]["position"],
                                     num_outputs)
        return output, output


class TupleSpyModel(Model):
    capture_index = 0

    def _build_layers_v2(self, input_dict, num_outputs, options):
        def spy(pos, cam, task):
            # TF runs this function in an isolated context, so we have to use
            # redis to communicate back to our suite
            ray.experimental.internal_kv._internal_kv_put(
                "t_spy_in_{}".format(TupleSpyModel.capture_index),
                pickle.dumps((pos, cam, task)),
                overwrite=True)
            TupleSpyModel.capture_index += 1
            return 0

        spy_fn = tf.py_func(
            spy, [
                input_dict["obs"][0],
                input_dict["obs"][1][0],
                input_dict["obs"][2],
            ],
            tf.int64,
            stateful=True)

        with tf.control_dependencies([spy_fn]):
            output = tf.layers.dense(input_dict["obs"][0], num_outputs)
        return output, output


class NestedSpacesTest(unittest.TestCase):
    def testInvalidModel(self):
        ModelCatalog.register_custom_model("invalid", InvalidModel)
        self.assertRaises(ValueError, lambda: PGTrainer(
            env="CartPole-v0", config={
                "model": {
                    "custom_model": "invalid",
                },
            }))

    def testInvalidModel2(self):
        ModelCatalog.register_custom_model("invalid2", InvalidModel2)
        self.assertRaisesRegexp(
            ValueError, "Expected output.*",
            lambda: PGTrainer(
                env="CartPole-v0", config={
                    "model": {
                        "custom_model": "invalid2",
                    },
                }))

    def doTestNestedDict(self, make_env, test_lstm=False):
        ModelCatalog.register_custom_model("composite", DictSpyModel)
        register_env("nested", make_env)
        pg = PGTrainer(
            env="nested",
            config={
                "num_workers": 0,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "model": {
                    "custom_model": "composite",
                    "use_lstm": test_lstm,
                },
            })
        pg.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "d_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)

    def doTestNestedTuple(self, make_env):
        ModelCatalog.register_custom_model("composite2", TupleSpyModel)
        register_env("nested2", make_env)
        pg = PGTrainer(
            env="nested2",
            config={
                "num_workers": 0,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "model": {
                    "custom_model": "composite2",
                },
            })
        pg.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "t_spy_in_{}".format(i)))
            pos_i = TUPLE_SAMPLES[i][0].tolist()
            cam_i = TUPLE_SAMPLES[i][1][0].tolist()
            task_i = one_hot(TUPLE_SAMPLES[i][2], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)

    def testNestedDictGym(self):
        self.doTestNestedDict(lambda _: NestedDictEnv())

    def testNestedDictGymLSTM(self):
        self.doTestNestedDict(lambda _: NestedDictEnv(), test_lstm=True)

    def testNestedDictVector(self):
        self.doTestNestedDict(
            lambda _: VectorEnv.wrap(lambda i: NestedDictEnv()))

    def testNestedDictServing(self):
        self.doTestNestedDict(lambda _: SimpleServing(NestedDictEnv()))

    def testNestedDictAsync(self):
        self.doTestNestedDict(lambda _: BaseEnv.to_base_env(NestedDictEnv()))

    def testNestedTupleGym(self):
        self.doTestNestedTuple(lambda _: NestedTupleEnv())

    def testNestedTupleVector(self):
        self.doTestNestedTuple(
            lambda _: VectorEnv.wrap(lambda i: NestedTupleEnv()))

    def testNestedTupleServing(self):
        self.doTestNestedTuple(lambda _: SimpleServing(NestedTupleEnv()))

    def testNestedTupleAsync(self):
        self.doTestNestedTuple(lambda _: BaseEnv.to_base_env(NestedTupleEnv()))

    def testMultiAgentComplexSpaces(self):
        ModelCatalog.register_custom_model("dict_spy", DictSpyModel)
        ModelCatalog.register_custom_model("tuple_spy", TupleSpyModel)
        register_env("nested_ma", lambda _: NestedMultiAgentEnv())
        act_space = spaces.Discrete(2)
        pg = PGTrainer(
            env="nested_ma",
            config={
                "num_workers": 0,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "multiagent": {
                    "policies": {
                        "tuple_policy": (
                            PGTFPolicy, TUPLE_SPACE, act_space,
                            {"model": {"custom_model": "tuple_spy"}}),
                        "dict_policy": (
                            PGTFPolicy, DICT_SPACE, act_space,
                            {"model": {"custom_model": "dict_spy"}}),
                    },
                    "policy_mapping_fn": lambda a: {
                        "tuple_agent": "tuple_policy",
                        "dict_agent": "dict_policy"}[a],
                },
            })
        pg.train()

        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "d_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)

        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "t_spy_in_{}".format(i)))
            pos_i = TUPLE_SAMPLES[i][0].tolist()
            cam_i = TUPLE_SAMPLES[i][1][0].tolist()
            task_i = one_hot(TUPLE_SAMPLES[i][2], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)

    def testRolloutDictSpace(self):
        register_env("nested", lambda _: NestedDictEnv())
        agent = PGTrainer(env="nested")
        agent.train()
        path = agent.save()
        agent.stop()

        # Test train works on restore
        agent2 = PGTrainer(env="nested")
        agent2.restore(path)
        agent2.train()

        # Test rollout works on restore
        rollout(agent2, "nested", 100)

    def testPyTorchModel(self):
        ModelCatalog.register_custom_model("composite", TorchSpyModel)
        register_env("nested", lambda _: NestedDictEnv())
        a2c = A2CTrainer(
            env="nested",
            config={
                "num_workers": 0,
                "use_pytorch": True,
                "sample_batch_size": 5,
                "train_batch_size": 5,
                "model": {
                    "custom_model": "composite",
                },
            })

        a2c.train()

        # Check that the model sees the correct reconstructed observations
        for i in range(4):
            seen = pickle.loads(
                ray.experimental.internal_kv._internal_kv_get(
                    "torch_spy_in_{}".format(i)))
            pos_i = DICT_SAMPLES[i]["sensors"]["position"].tolist()
            cam_i = DICT_SAMPLES[i]["sensors"]["front_cam"][0].tolist()
            task_i = one_hot(
                DICT_SAMPLES[i]["inner_state"]["job_status"]["task"], 5)
            self.assertEqual(seen[0][0].tolist(), pos_i)
            self.assertEqual(seen[1][0].tolist(), cam_i)
            self.assertEqual(seen[2][0].tolist(), task_i)


if __name__ == "__main__":
    ray.init(num_cpus=5)
    unittest.main(verbosity=2)