ray/rllib/examples/env/rock_paper_scissors.py

83 lines
3 KiB
Python
Raw Normal View History

from gym.spaces import Discrete
from ray.rllib.env.multi_agent_env import MultiAgentEnv
class RockPaperScissors(MultiAgentEnv):
"""Two-player environment for the famous rock paper scissors game.
The observation is simply the last opponent action."""
ROCK = 0
PAPER = 1
SCISSORS = 2
LIZARD = 3
SPOCK = 4
def __init__(self, config):
self.action_space = Discrete(3)
self.observation_space = Discrete(3)
self.sheldon_cooper = config.get("sheldon_cooper", False)
self.player1 = "player1"
self.player2 = "player2"
self.last_move = None
self.num_moves = 0
def reset(self):
self.last_move = (0, 0)
self.num_moves = 0
return {
self.player1: self.last_move[1],
self.player2: self.last_move[0],
}
def step(self, action_dict):
move1 = action_dict[self.player1]
move2 = action_dict[self.player2]
if self.sheldon_cooper is False:
assert move1 not in [self.LIZARD, self.SPOCK]
assert move2 not in [self.LIZARD, self.SPOCK]
self.last_move = (move1, move2)
obs = {
self.player1: self.last_move[1],
self.player2: self.last_move[0],
}
r1, r2 = {
(self.ROCK, self.ROCK): (0, 0),
(self.ROCK, self.PAPER): (-1, 1),
(self.ROCK, self.SCISSORS): (1, -1),
(self.PAPER, self.ROCK): (1, -1),
(self.PAPER, self.PAPER): (0, 0),
(self.PAPER, self.SCISSORS): (-1, 1),
(self.SCISSORS, self.ROCK): (-1, 1),
(self.SCISSORS, self.PAPER): (1, -1),
(self.SCISSORS, self.SCISSORS): (0, 0),
# Sheldon Cooper extension:
(self.LIZARD, self.LIZARD): (0, 0),
(self.LIZARD, self.SPOCK): (1, -1), # Lizard poisons Spock
(self.LIZARD, self.ROCK): (-1, 1), # Rock crushes lizard
(self.LIZARD, self.PAPER): (1, -1), # Lizard eats paper
(self.LIZARD, self.SCISSORS): (-1, 1), # Scissors decapitate Lizrd
(self.ROCK, self.LIZARD): (1, -1), # Rock crushes lizard
(self.PAPER, self.LIZARD): (-1, 1), # Lizard eats paper
(self.SCISSORS, self.LIZARD): (1, -1), # Scissors decapitate Lizrd
(self.SPOCK, self.SPOCK): (0, 0),
(self.SPOCK, self.LIZARD): (-1, 1), # Lizard poisons Spock
(self.SPOCK, self.ROCK): (1, -1), # Spock vaporizes rock
(self.SPOCK, self.PAPER): (-1, 1), # Paper disproves Spock
(self.SPOCK, self.SCISSORS): (1, -1), # Spock smashes scissors
(self.ROCK, self.SPOCK): (-1, 1), # Spock vaporizes rock
(self.PAPER, self.SPOCK): (1, -1), # Paper disproves Spock
(self.SCISSORS, self.SPOCK): (-1, 1), # Spock smashes scissors
}[move1, move2]
rew = {
self.player1: r1,
self.player2: r2,
}
self.num_moves += 1
done = {
"__all__": self.num_moves >= 10,
}
return obs, rew, done, {}