ray/rllib/examples/policy/rock_paper_scissors_dummies.py

import gym
import numpy as np
import random

from ray.rllib.examples.env.rock_paper_scissors import RockPaperScissors
from ray.rllib.policy.policy import Policy
from ray.rllib.policy.view_requirement import ViewRequirement


class AlwaysSameHeuristic(Policy):
    """Pick a random move and stick with it for the entire episode."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.exploration = self._create_exploration()
        self.view_requirements.update({
            "state_in_0": ViewRequirement(
                "state_out_0",
                shift=-1,
                space=gym.spaces.Box(0, 100, shape=(), dtype=np.int32))
        })

    def get_initial_state(self):
        return [
            random.choice([
                RockPaperScissors.ROCK, RockPaperScissors.PAPER,
                RockPaperScissors.SCISSORS
            ])
        ]

    def compute_actions(self,
                        obs_batch,
                        state_batches=None,
                        prev_action_batch=None,
                        prev_reward_batch=None,
                        info_batch=None,
                        episodes=None,
                        **kwargs):
        return state_batches[0], state_batches, {}


class BeatLastHeuristic(Policy):
    """Play the move that would beat the last move of the opponent."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.exploration = self._create_exploration()

    def compute_actions(self,
                        obs_batch,
                        state_batches=None,
                        prev_action_batch=None,
                        prev_reward_batch=None,
                        info_batch=None,
                        episodes=None,
                        **kwargs):
        def successor(x):
            if x[RockPaperScissors.ROCK] == 1:
                return RockPaperScissors.PAPER
            elif x[RockPaperScissors.PAPER] == 1:
                return RockPaperScissors.SCISSORS
            elif x[RockPaperScissors.SCISSORS] == 1:
                return RockPaperScissors.ROCK

        return [successor(x) for x in obs_batch], [], {}

    def learn_on_batch(self, samples):
        pass

    def get_weights(self):
        pass

    def set_weights(self, weights):
        pass
[RLlib] Trajectory view API: Simple List Collector (on by default for PPO); LSTM-agnostic (#11056) 2020-10-01 16:57:10 +02:00			`import gym`
			`import numpy as np`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`import random`
Revert "RockPaperScissors Pettingzoo" (#16886) This reverts commit bf3e3225b62e813c04982ba72b9ff8d41c62f6e8. 2021-07-06 09:43:47 -07:00
			`from ray.rllib.examples.env.rock_paper_scissors import RockPaperScissors`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`from ray.rllib.policy.policy import Policy`
[RLlib] Trajectory view API: Simple List Collector (on by default for PPO); LSTM-agnostic (#11056) 2020-10-01 16:57:10 +02:00			`from ray.rllib.policy.view_requirement import ViewRequirement`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00

			`class AlwaysSameHeuristic(Policy):`
			`"""Pick a random move and stick with it for the entire episode."""`

			`def __init__(self, args, *kwargs):`
			`super().__init__(args, *kwargs)`
			`self.exploration = self._create_exploration()`
[RLlib] Trajectory view API: Simple List Collector (on by default for PPO); LSTM-agnostic (#11056) 2020-10-01 16:57:10 +02:00			`self.view_requirements.update({`
			`"state_in_0": ViewRequirement(`
			`"state_out_0",`
[RLlib] Attention Net prep PR #3. (#12450) 2020-12-07 13:08:17 +01:00			`shift=-1,`
[RLlib] Trajectory view API: Simple List Collector (on by default for PPO); LSTM-agnostic (#11056) 2020-10-01 16:57:10 +02:00			`space=gym.spaces.Box(0, 100, shape=(), dtype=np.int32))`
			`})`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`def get_initial_state(self):`
Revert "RockPaperScissors Pettingzoo" (#16886) This reverts commit bf3e3225b62e813c04982ba72b9ff8d41c62f6e8. 2021-07-06 09:43:47 -07:00			`return [`
			`random.choice([`
			`RockPaperScissors.ROCK, RockPaperScissors.PAPER,`
			`RockPaperScissors.SCISSORS`
			`])`
			`]`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`def compute_actions(self,`
			`obs_batch,`
			`state_batches=None,`
			`prev_action_batch=None,`
			`prev_reward_batch=None,`
			`info_batch=None,`
			`episodes=None,`
			`**kwargs):`
[RLlib] Trajectory view API: Enable by default for PPO, IMPALA, PG, A3C (tf and torch). (#11747) 2020-11-12 16:27:34 +01:00			`return state_batches[0], state_batches, {}`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00

			`class BeatLastHeuristic(Policy):`
			`"""Play the move that would beat the last move of the opponent."""`

			`def __init__(self, args, *kwargs):`
			`super().__init__(args, *kwargs)`
			`self.exploration = self._create_exploration()`

			`def compute_actions(self,`
			`obs_batch,`
			`state_batches=None,`
			`prev_action_batch=None,`
			`prev_reward_batch=None,`
			`info_batch=None,`
			`episodes=None,`
			`**kwargs):`
			`def successor(x):`
Revert "RockPaperScissors Pettingzoo" (#16886) This reverts commit bf3e3225b62e813c04982ba72b9ff8d41c62f6e8. 2021-07-06 09:43:47 -07:00			`if x[RockPaperScissors.ROCK] == 1:`
			`return RockPaperScissors.PAPER`
			`elif x[RockPaperScissors.PAPER] == 1:`
			`return RockPaperScissors.SCISSORS`
			`elif x[RockPaperScissors.SCISSORS] == 1:`
			`return RockPaperScissors.ROCK`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`return [successor(x) for x in obs_batch], [], {}`

			`def learn_on_batch(self, samples):`
			`pass`

			`def get_weights(self):`
			`pass`

			`def set_weights(self, weights):`
			`pass`