ray/rllib/examples/env/cliff_walking_wall_env.py

import gym
from gym import spaces

ACTION_UP = 0
ACTION_RIGHT = 1
ACTION_DOWN = 2
ACTION_LEFT = 3


class CliffWalkingWallEnv(gym.Env):
    """Modified version of the CliffWalking environment from OpenAI Gym
    with walls instead of a cliff.

    ### Description
    The board is a 4x12 matrix, with (using NumPy matrix indexing):
    - [3, 0] or obs==36 as the start at bottom-left
    - [3, 11] or obs==47 as the goal at bottom-right
    - [3, 1..10] or obs==37...46 as the cliff at bottom-center

    An episode terminates when the agent reaches the goal.

    ### Actions
    There are 4 discrete deterministic actions:
    - 0: move up
    - 1: move right
    - 2: move down
    - 3: move left
    You can also use the constants ACTION_UP, ACTION_RIGHT, ... defined above.

    ### Observations
    There are 3x12 + 2 possible states, not including the walls. If an action
    would move an agent into one of the walls, it simply stays in the same position.

    ### Reward
    Each time step incurs -1 reward, except reaching the goal which gives +10 reward.
    """

    def __init__(self, seed=42) -> None:
        self.observation_space = spaces.Discrete(48)
        self.action_space = spaces.Discrete(4)
        self.observation_space.seed(seed)
        self.action_space.seed(seed)

    def reset(self):
        self.position = 36
        return self.position

    def step(self, action):
        x = self.position // 12
        y = self.position % 12
        # UP
        if action == ACTION_UP:
            x = max(x - 1, 0)
        # RIGHT
        elif action == ACTION_RIGHT:
            if self.position != 36:
                y = min(y + 1, 11)
        # DOWN
        elif action == ACTION_DOWN:
            if self.position < 25 or self.position > 34:
                x = min(x + 1, 3)
        # LEFT
        elif action == ACTION_LEFT:
            if self.position != 47:
                y = max(y - 1, 0)
        else:
            raise ValueError(f"action {action} not in {self.action_space}")
        self.position = x * 12 + y
        done = self.position == 47
        reward = -1 if not done else 10
        return self.position, reward, done, {}
[RLlib] Add OPE Learning Tests (#27154) 2022-08-02 17:51:38 -07:00			`import gym`
			`from gym import spaces`

			`ACTION_UP = 0`
			`ACTION_RIGHT = 1`
			`ACTION_DOWN = 2`
			`ACTION_LEFT = 3`


			`class CliffWalkingWallEnv(gym.Env):`
			`"""Modified version of the CliffWalking environment from OpenAI Gym`
			`with walls instead of a cliff.`

			`### Description`
			`The board is a 4x12 matrix, with (using NumPy matrix indexing):`
			`- [3, 0] or obs==36 as the start at bottom-left`
			`- [3, 11] or obs==47 as the goal at bottom-right`
			`- [3, 1..10] or obs==37...46 as the cliff at bottom-center`

			`An episode terminates when the agent reaches the goal.`

			`### Actions`
			`There are 4 discrete deterministic actions:`
			`- 0: move up`
			`- 1: move right`
			`- 2: move down`
			`- 3: move left`
			`You can also use the constants ACTION_UP, ACTION_RIGHT, ... defined above.`

			`### Observations`
			`There are 3x12 + 2 possible states, not including the walls. If an action`
			`would move an agent into one of the walls, it simply stays in the same position.`

			`### Reward`
			`Each time step incurs -1 reward, except reaching the goal which gives +10 reward.`
			`"""`

[RLlib] Fix test_ope flakiness (#27676) 2022-08-09 16:12:30 -07:00			`def __init__(self, seed=42) -> None:`
[RLlib] Add OPE Learning Tests (#27154) 2022-08-02 17:51:38 -07:00			`self.observation_space = spaces.Discrete(48)`
			`self.action_space = spaces.Discrete(4)`
[RLlib] Fix test_ope flakiness (#27676) 2022-08-09 16:12:30 -07:00			`self.observation_space.seed(seed)`
			`self.action_space.seed(seed)`
[RLlib] Add OPE Learning Tests (#27154) 2022-08-02 17:51:38 -07:00
			`def reset(self):`
			`self.position = 36`
			`return self.position`

			`def step(self, action):`
			`x = self.position // 12`
			`y = self.position % 12`
			`# UP`
			`if action == ACTION_UP:`
			`x = max(x - 1, 0)`
			`# RIGHT`
			`elif action == ACTION_RIGHT:`
			`if self.position != 36:`
			`y = min(y + 1, 11)`
			`# DOWN`
			`elif action == ACTION_DOWN:`
			`if self.position < 25 or self.position > 34:`
			`x = min(x + 1, 3)`
			`# LEFT`
			`elif action == ACTION_LEFT:`
			`if self.position != 47:`
			`y = max(y - 1, 0)`
			`else:`
			`raise ValueError(f"action {action} not in {self.action_space}")`
			`self.position = x * 12 + y`
			`done = self.position == 47`
			`reward = -1 if not done else 10`
			`return self.position, reward, done, {}`