ray/rllib/examples/env/simple_corridor.py

import gym
from gym.spaces import Box, Discrete
import numpy as np


class SimpleCorridor(gym.Env):
    """Example of a custom env in which you have to walk down a corridor.

    You can configure the length of the corridor via the env config."""

    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = Discrete(2)
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)

    def set_corridor_length(self, length):
        self.end_pos = length
        self.observation_space = Box(
            0.0, self.end_pos, shape=(1, ), dtype=np.float32)
        print("Updated corridor length to {}".format(length))

    def reset(self):
        self.cur_pos = 0
        return [self.cur_pos]

    def step(self, action):
        assert action in [0, 1], action
        if action == 0 and self.cur_pos > 0:
            self.cur_pos -= 1
        elif action == 1:
            self.cur_pos += 1
        done = self.cur_pos >= self.end_pos
        return [self.cur_pos], 1 if done else 0, done, {}
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`import gym`
			`from gym.spaces import Box, Discrete`
			`import numpy as np`


			`class SimpleCorridor(gym.Env):`
			`"""Example of a custom env in which you have to walk down a corridor.`

			`You can configure the length of the corridor via the env config."""`

			`def __init__(self, config):`
			`self.end_pos = config["corridor_length"]`
			`self.cur_pos = 0`
			`self.action_space = Discrete(2)`
			`self.observation_space = Box(`
			`0.0, self.end_pos, shape=(1, ), dtype=np.float32)`

			`def set_corridor_length(self, length):`
			`self.end_pos = length`
			`self.observation_space = Box(`
			`0.0, self.end_pos, shape=(1, ), dtype=np.float32)`
			`print("Updated corridor length to {}".format(length))`

			`def reset(self):`
			`self.cur_pos = 0`
			`return [self.cur_pos]`

			`def step(self, action):`
			`assert action in [0, 1], action`
			`if action == 0 and self.cur_pos > 0:`
			`self.cur_pos -= 1`
			`elif action == 1:`
			`self.cur_pos += 1`
			`done = self.cur_pos >= self.end_pos`
			`return [self.cur_pos], 1 if done else 0, done, {}`