ray/rllib/examples/policy/random_policy.py

import random

import numpy as np
from gym.spaces import Box

from ray.rllib.policy.policy import Policy
from ray.rllib.utils.annotations import override
from ray.rllib.utils.typing import ModelWeights


class RandomPolicy(Policy):
    """Hand-coded policy that returns random actions."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # Whether for compute_actions, the bounds given in action_space
        # should be ignored (default: False). This is to test action-clipping
        # and any Env's reaction to bounds breaches.
        if self.config.get("ignore_action_bounds", False) and \
                isinstance(self.action_space, Box):
            self.action_space_for_sampling = Box(
                -float("inf"),
                float("inf"),
                shape=self.action_space.shape,
                dtype=self.action_space.dtype)
        else:
            self.action_space_for_sampling = self.action_space

    @override(Policy)
    def compute_actions(self,
                        obs_batch,
                        state_batches=None,
                        prev_action_batch=None,
                        prev_reward_batch=None,
                        **kwargs):
        # Alternatively, a numpy array would work here as well.
        # e.g.: np.array([random.choice([0, 1])] * len(obs_batch))
        return [self.action_space_for_sampling.sample() for _ in obs_batch], \
               [], {}

    @override(Policy)
    def learn_on_batch(self, samples):
        """No learning."""
        return {}

    @override(Policy)
    def compute_log_likelihoods(self,
                                actions,
                                obs_batch,
                                state_batches=None,
                                prev_action_batch=None,
                                prev_reward_batch=None):
        return np.array([random.random()] * len(obs_batch))

    @override(Policy)
    def get_weights(self) -> ModelWeights:
        """No weights to save."""
        return {}

    @override(Policy)
    def set_weights(self, weights: ModelWeights) -> None:
        """No weights to set."""
        pass
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`import random`

[RLlib] Implement the SlateQ algorithm (#11450) 2020-11-03 00:52:04 -08:00			`import numpy as np`
			`from gym.spaces import Box`

[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`from ray.rllib.policy.policy import Policy`
			`from ray.rllib.utils.annotations import override`
[RLlib] Implement the SlateQ algorithm (#11450) 2020-11-03 00:52:04 -08:00			`from ray.rllib.utils.typing import ModelWeights`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00

			`class RandomPolicy(Policy):`
			`"""Hand-coded policy that returns random actions."""`

			`def __init__(self, args, *kwargs):`
			`super().__init__(args, *kwargs)`

[RLlib] Enhance reward clipping test; add action_clipping tests. (#9684) 2020-07-28 10:44:54 +02:00			`# Whether for compute_actions, the bounds given in action_space`
			`# should be ignored (default: False). This is to test action-clipping`
			`# and any Env's reaction to bounds breaches.`
			`if self.config.get("ignore_action_bounds", False) and \`
			`isinstance(self.action_space, Box):`
			`self.action_space_for_sampling = Box(`
ci: Redo `format.sh --all` script & backfill lint fixes (#9956) 2020-08-07 16:49:49 -07:00			`-float("inf"),`
			`float("inf"),`
			`shape=self.action_space.shape,`
			`dtype=self.action_space.dtype)`
[RLlib] Enhance reward clipping test; add action_clipping tests. (#9684) 2020-07-28 10:44:54 +02:00			`else:`
			`self.action_space_for_sampling = self.action_space`

[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00			`@override(Policy)`
			`def compute_actions(self,`
			`obs_batch,`
			`state_batches=None,`
			`prev_action_batch=None,`
			`prev_reward_batch=None,`
			`**kwargs):`
			`# Alternatively, a numpy array would work here as well.`
			`# e.g.: np.array([random.choice([0, 1])] * len(obs_batch))`
[RLlib] Enhance reward clipping test; add action_clipping tests. (#9684) 2020-07-28 10:44:54 +02:00			`return [self.action_space_for_sampling.sample() for _ in obs_batch], \`
			`[], {}`
[RLlib] Examples folder restructuring (models) part 1 (#8353) 2020-05-08 08:20:18 +02:00
			`@override(Policy)`
			`def learn_on_batch(self, samples):`
			`"""No learning."""`
			`return {}`

			`@override(Policy)`
			`def compute_log_likelihoods(self,`
			`actions,`
			`obs_batch,`
			`state_batches=None,`
			`prev_action_batch=None,`
			`prev_reward_batch=None):`
			`return np.array([random.random()] * len(obs_batch))`
[RLlib] Implement the SlateQ algorithm (#11450) 2020-11-03 00:52:04 -08:00
			`@override(Policy)`
			`def get_weights(self) -> ModelWeights:`
			`"""No weights to save."""`
			`return {}`

			`@override(Policy)`
			`def set_weights(self, weights: ModelWeights) -> None:`
			`"""No weights to set."""`
			`pass`