# __rllib-custom-gym-env-begin__
import gym

import ray
from ray.rllib.agents import ppo


class SimpleCorridor(gym.Env):
    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = gym.spaces.Discrete(2)  # right/left
        self.observation_space = gym.spaces.Discrete(self.end_pos)

    def reset(self):
        self.cur_pos = 0
        return self.cur_pos

    def step(self, action):
        if action == 0 and self.cur_pos > 0:  # move right (towards goal)
            self.cur_pos -= 1
        elif action == 1:  # move left (towards start)
            self.cur_pos += 1
        if self.cur_pos >= self.end_pos:
            return 0, 1.0, True, {}
        else:
            return self.cur_pos, -0.1, False, {}


ray.init()
config = {
    "env": SimpleCorridor,
    "env_config": {
        "corridor_length": 5,
    },
}

trainer = ppo.PPOTrainer(config=config)
for _ in range(3):
    print(trainer.train())
# __rllib-custom-gym-env-end__