ray/rllib/examples/documentation/custom_gym_env.py

# __rllib-custom-gym-env-begin__
import gym

import ray
from ray.rllib.agents import ppo


class SimpleCorridor(gym.Env):
    def __init__(self, config):
        self.end_pos = config["corridor_length"]
        self.cur_pos = 0
        self.action_space = gym.spaces.Discrete(2)  # right/left
        self.observation_space = gym.spaces.Discrete(self.end_pos)

    def reset(self):
        self.cur_pos = 0
        return self.cur_pos

    def step(self, action):
        if action == 0 and self.cur_pos > 0:  # move right (towards goal)
            self.cur_pos -= 1
        elif action == 1:  # move left (towards start)
            self.cur_pos += 1
        if self.cur_pos >= self.end_pos:
            return 0, 1.0, True, {}
        else:
            return self.cur_pos, -0.1, False, {}


ray.init()
config = {
    "env": SimpleCorridor,
    "env_config": {
        "corridor_length": 5,
    },
}

trainer = ppo.PPOTrainer(config=config)
for _ in range(3):
    print(trainer.train())
# __rllib-custom-gym-env-end__
[RLlib; Documentation] Some docstring cleanups; Rename RemoteVectorEnv into RemoteBaseEnv for clarity. (#20250) 2021-11-17 21:40:16 +01:00			`# __rllib-custom-gym-env-begin__`
			`import gym`

			`import ray`
			`from ray.rllib.agents import ppo`


			`class SimpleCorridor(gym.Env):`
			`def __init__(self, config):`
			`self.end_pos = config["corridor_length"]`
			`self.cur_pos = 0`
			`self.action_space = gym.spaces.Discrete(2) # right/left`
			`self.observation_space = gym.spaces.Discrete(self.end_pos)`

			`def reset(self):`
			`self.cur_pos = 0`
			`return self.cur_pos`

			`def step(self, action):`
			`if action == 0 and self.cur_pos > 0: # move right (towards goal)`
			`self.cur_pos -= 1`
			`elif action == 1: # move left (towards start)`
			`self.cur_pos += 1`
			`if self.cur_pos >= self.end_pos:`
			`return 0, 1.0, True, {}`
			`else:`
			`return self.cur_pos, -0.1, False, {}`


			`ray.init()`
			`config = {`
			`"env": SimpleCorridor,`
			`"env_config": {`
			`"corridor_length": 5,`
			`},`
			`}`

			`trainer = ppo.PPOTrainer(config=config)`
			`for _ in range(3):`
			`print(trainer.train())`
			`# __rllib-custom-gym-env-end__`