ray/rllib/examples/env/env_using_remote_actor.py

"""
Example of an environment that uses a named remote actor as parameter
server.

"""
from gym.envs.classic_control.cartpole import CartPoleEnv
from gym.utils import seeding

import ray


@ray.remote
class ParameterStorage:
    def get_params(self, rng):
        return {
            "MASSCART": rng.uniform(low=0.5, high=2.0),
        }


class CartPoleWithRemoteParamServer(CartPoleEnv):
    """CartPoleMassEnv varies the weights of the cart and the pole.
    """

    def __init__(self, env_config):
        self.env_config = env_config
        super().__init__()
        # Get our param server (remote actor) by name.
        self._handler = ray.get_actor(
            env_config.get("param_server", "param-server"))
        self.rng_seed = None
        self.np_random, _ = seeding.np_random(self.rng_seed)

    def seed(self, rng_seed: int = None):
        if not rng_seed:
            return

        print(f"Seeding env (worker={self.env_config.worker_index}) "
              f"with {rng_seed}")

        self.rng_seed = rng_seed
        self.np_random, _ = seeding.np_random(rng_seed)

    def reset(self):
        # Pass in our RNG to guarantee no race conditions.
        # If `self._handler` had its own RNG, this may clash with other
        # envs trying to use the same param-server.
        params = ray.get(self._handler.get_params.remote(self.np_random))

        # IMPORTANT: Advance the state of our RNG (self._rng was passed
        # above via ray (serialized) and thus not altered locally here!).
        # Or create a new RNG from another random number:
        # Seed the RNG with a deterministic seed if set, otherwise, create
        # a random one.
        new_seed = (self.np_random.randint(0, 1000000)
                    if not self.rng_seed else self.rng_seed)
        self.np_random, _ = seeding.np_random(new_seed)

        print(f"Env worker-idx={self.env_config.worker_index} "
              f"mass={params['MASSCART']}")

        self.masscart = params["MASSCART"]
        self.total_mass = (self.masspole + self.masscart)
        self.polemass_length = (self.masspole * self.length)

        return super().reset()