"""
Example of a custom gym environment and model. Run this for a demo.

This example shows:
  - using a custom environment
  - using a custom model
  - using Tune for grid search

You can visualize experiment results in ~/ray_results using TensorBoard.
"""

import gym
from gym.spaces import Tuple, Discrete
import numpy as np

from ray.rllib.agents.ppo import PPOTrainer
from ray.rllib.utils import try_import_tf

tf = try_import_tf()


class RandomEnv(gym.Env):
    """
    A randomly acting environment that can be instantiated with arbitrary
    action and observation spaces.
    """

    def __init__(self, config):
        # Action space.
        self.action_space = config["action_space"]
        # Observation space from which to sample.
        self.observation_space = config["observation_space"]
        # Reward space from which to sample.
        self.reward_space = config.get(
            "reward_space",
            gym.spaces.Box(low=-1.0, high=1.0, shape=(), dtype=np.float32))
        # Chance that an episode ends at any step.
        self.p_done = config.get("p_done", 0.1)

    def reset(self):
        return self.observation_space.sample()

    def step(self, action):
        return self.observation_space.sample(), \
            float(self.reward_space.sample()), \
            bool(np.random.choice(
                [True, False], p=[self.p_done, 1.0 - self.p_done]
            )), {}


if __name__ == "__main__":
    trainer = PPOTrainer(
        config={
            "model": {
                "use_lstm": True,
            },
            "vf_share_layers": False,
            "num_workers": 0,  # no parallelism
            "env_config": {
                "action_space": Discrete(2),
                # Test a simple Tuple observation space.
                "observation_space": Tuple([Discrete(3),
                                            Discrete(2)])
            }
        },
        env=RandomEnv,
    )
    results = trainer.train()
    print(results)