ray/rllib/examples/env/multi_agent.py

import gym
import numpy as np
import random

from ray.rllib.env.multi_agent_env import MultiAgentEnv, make_multi_agent
from ray.rllib.examples.env.mock_env import MockEnv, MockEnv2
from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole
from ray.rllib.utils.deprecation import Deprecated


@Deprecated(
    old="ray.rllib.examples.env.multi_agent.make_multiagent",
    new="ray.rllib.env.multi_agent_env.make_multi_agent",
    error=False,
)
def make_multiagent(env_name_or_creator):
    return make_multi_agent(env_name_or_creator)


class BasicMultiAgent(MultiAgentEnv):
    """Env of N independent agents, each of which exits after 25 steps."""

    metadata = {
        "render.modes": ["rgb_array"],
    }

    def __init__(self, num):
        super().__init__()
        self.agents = [MockEnv(25) for _ in range(num)]
        self._agent_ids = set(range(num))
        self.dones = set()
        self.observation_space = gym.spaces.Discrete(2)
        self.action_space = gym.spaces.Discrete(2)
        self.resetted = False

    def reset(self):
        self.resetted = True
        self.dones = set()
        return {i: a.reset() for i, a in enumerate(self.agents)}

    def step(self, action_dict):
        obs, rew, done, info = {}, {}, {}, {}
        for i, action in action_dict.items():
            obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
            if done[i]:
                self.dones.add(i)
        done["__all__"] = len(self.dones) == len(self.agents)
        return obs, rew, done, info

    def render(self, mode="rgb_array"):
        # Just generate a random image here for demonstration purposes.
        # Also see `gym/envs/classic_control/cartpole.py` for
        # an example on how to use a Viewer object.
        return np.random.randint(0, 256, size=(200, 300, 3), dtype=np.uint8)


class EarlyDoneMultiAgent(MultiAgentEnv):
    """Env for testing when the env terminates (after agent 0 does)."""

    def __init__(self):
        super().__init__()
        self.agents = [MockEnv(3), MockEnv(5)]
        self._agent_ids = set(range(len(self.agents)))
        self.dones = set()
        self.last_obs = {}
        self.last_rew = {}
        self.last_done = {}
        self.last_info = {}
        self.i = 0
        self.observation_space = gym.spaces.Discrete(10)
        self.action_space = gym.spaces.Discrete(2)

    def reset(self):
        self.dones = set()
        self.last_obs = {}
        self.last_rew = {}
        self.last_done = {}
        self.last_info = {}
        self.i = 0
        for i, a in enumerate(self.agents):
            self.last_obs[i] = a.reset()
            self.last_rew[i] = 0
            self.last_done[i] = False
            self.last_info[i] = {}
        obs_dict = {self.i: self.last_obs[self.i]}
        self.i = (self.i + 1) % len(self.agents)
        return obs_dict

    def step(self, action_dict):
        assert len(self.dones) != len(self.agents)
        for i, action in action_dict.items():
            (
                self.last_obs[i],
                self.last_rew[i],
                self.last_done[i],
                self.last_info[i],
            ) = self.agents[i].step(action)
        obs = {self.i: self.last_obs[self.i]}
        rew = {self.i: self.last_rew[self.i]}
        done = {self.i: self.last_done[self.i]}
        info = {self.i: self.last_info[self.i]}
        if done[self.i]:
            rew[self.i] = 0
            self.dones.add(self.i)
        self.i = (self.i + 1) % len(self.agents)
        done["__all__"] = len(self.dones) == len(self.agents) - 1
        return obs, rew, done, info


class FlexAgentsMultiAgent(MultiAgentEnv):
    """Env of independent agents, each of which exits after n steps."""

    def __init__(self):
        super().__init__()
        self.agents = {}
        self._agent_ids = set()
        self.agentID = 0
        self.dones = set()
        self.observation_space = gym.spaces.Discrete(2)
        self.action_space = gym.spaces.Discrete(2)
        self.resetted = False

    def spawn(self):
        # Spawn a new agent into the current episode.
        agentID = self.agentID
        self.agents[agentID] = MockEnv(25)
        self._agent_ids.add(agentID)
        self.agentID += 1
        return agentID

    def reset(self):
        self.agents = {}
        self._agent_ids = set()
        self.spawn()
        self.resetted = True
        self.dones = set()
        obs = {}
        for i, a in self.agents.items():
            obs[i] = a.reset()

        return obs

    def step(self, action_dict):
        obs, rew, done, info = {}, {}, {}, {}
        # Apply the actions.
        for i, action in action_dict.items():
            obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
            if done[i]:
                self.dones.add(i)

        # Sometimes, add a new agent to the episode.
        if random.random() > 0.75 and len(action_dict) > 0:
            i = self.spawn()
            obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)
            if done[i]:
                self.dones.add(i)

        # Sometimes, kill an existing agent.
        if len(self.agents) > 1 and random.random() > 0.25:
            keys = list(self.agents.keys())
            key = random.choice(keys)
            done[key] = True
            del self.agents[key]

        done["__all__"] = len(self.dones) == len(self.agents)
        return obs, rew, done, info


class RoundRobinMultiAgent(MultiAgentEnv):
    """Env of N independent agents, each of which exits after 5 steps.

    On each step() of the env, only one agent takes an action."""

    def __init__(self, num, increment_obs=False):
        super().__init__()
        if increment_obs:
            # Observations are 0, 1, 2, 3... etc. as time advances
            self.agents = [MockEnv2(5) for _ in range(num)]
        else:
            # Observations are all zeros
            self.agents = [MockEnv(5) for _ in range(num)]
        self._agent_ids = set(range(num))
        self.dones = set()
        self.last_obs = {}
        self.last_rew = {}
        self.last_done = {}
        self.last_info = {}
        self.i = 0
        self.num = num
        self.observation_space = gym.spaces.Discrete(10)
        self.action_space = gym.spaces.Discrete(2)

    def reset(self):
        self.dones = set()
        self.last_obs = {}
        self.last_rew = {}
        self.last_done = {}
        self.last_info = {}
        self.i = 0
        for i, a in enumerate(self.agents):
            self.last_obs[i] = a.reset()
            self.last_rew[i] = 0
            self.last_done[i] = False
            self.last_info[i] = {}
        obs_dict = {self.i: self.last_obs[self.i]}
        self.i = (self.i + 1) % self.num
        return obs_dict

    def step(self, action_dict):
        assert len(self.dones) != len(self.agents)
        for i, action in action_dict.items():
            (
                self.last_obs[i],
                self.last_rew[i],
                self.last_done[i],
                self.last_info[i],
            ) = self.agents[i].step(action)
        obs = {self.i: self.last_obs[self.i]}
        rew = {self.i: self.last_rew[self.i]}
        done = {self.i: self.last_done[self.i]}
        info = {self.i: self.last_info[self.i]}
        if done[self.i]:
            rew[self.i] = 0
            self.dones.add(self.i)
        self.i = (self.i + 1) % self.num
        done["__all__"] = len(self.dones) == len(self.agents)
        return obs, rew, done, info


MultiAgentCartPole = make_multi_agent("CartPole-v0")
MultiAgentMountainCar = make_multi_agent("MountainCarContinuous-v0")
MultiAgentPendulum = make_multi_agent("Pendulum-v1")
MultiAgentStatelessCartPole = make_multi_agent(lambda config: StatelessCartPole(config))
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`import gym`
[RLlib] Issue 21629: Video recorder env wrapper not working. Added test case. (#21670) 2022-01-24 19:38:21 +01:00			`import numpy as np`
[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`import random`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00
[RLlib] Env directory cleanup and tests. (#13082) 2021-01-19 10:09:39 +01:00			`from ray.rllib.env.multi_agent_env import MultiAgentEnv, make_multi_agent`
[RLlib] Batch-size for truncate_episode batch_mode should be confgurable in agent-steps (rather than env-steps), if needed. (#12420) 2020-12-09 01:41:45 +01:00			`from ray.rllib.examples.env.mock_env import MockEnv, MockEnv2`
[RLlib] Trajectory View API (preparatory cleanup and enhancements). (#9678) 2020-07-29 21:15:09 +02:00			`from ray.rllib.examples.env.stateless_cartpole import StatelessCartPole`
[RLlib; Docs overhaul] Docstring cleanup: rllib/utils (#19829) 2021-11-01 21:46:02 +01:00			`from ray.rllib.utils.deprecation import Deprecated`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00

[RLlib] Add @Deprecated decorator to simplify/unify deprecation of classes, methods, functions. (#17530) 2021-08-03 18:30:02 -04:00			`@Deprecated(`
			`old="ray.rllib.examples.env.multi_agent.make_multiagent",`
			`new="ray.rllib.env.multi_agent_env.make_multi_agent",`
			`error=False,`
			`)`
[RLlib] Unity3D integration (n Unity3D clients vs learning server). (#8590) 2020-05-30 22:48:34 +02:00			`def make_multiagent(env_name_or_creator):`
[RLlib] Env directory cleanup and tests. (#13082) 2021-01-19 10:09:39 +01:00			`return make_multi_agent(env_name_or_creator)`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00

			`class BasicMultiAgent(MultiAgentEnv):`
			`"""Env of N independent agents, each of which exits after 25 steps."""`

[RLlib] Issue 21629: Video recorder env wrapper not working. Added test case. (#21670) 2022-01-24 19:38:21 +01:00			`metadata = {`
			`"render.modes": ["rgb_array"],`
			`}`

[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`def __init__(self, num):`
[RLlib] Base env pre-checker. (#21569) 2022-01-18 07:34:06 -08:00			`super().__init__()`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.agents = [MockEnv(25) for _ in range(num)]`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self._agent_ids = set(range(num))`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.dones = set()`
			`self.observation_space = gym.spaces.Discrete(2)`
			`self.action_space = gym.spaces.Discrete(2)`
			`self.resetted = False`

			`def reset(self):`
			`self.resetted = True`
			`self.dones = set()`
			`return {i: a.reset() for i, a in enumerate(self.agents)}`

			`def step(self, action_dict):`
			`obs, rew, done, info = {}, {}, {}, {}`
			`for i, action in action_dict.items():`
			`obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)`
			`if done[i]:`
			`self.dones.add(i)`
			`done["__all__"] = len(self.dones) == len(self.agents)`
			`return obs, rew, done, info`

[RLlib] Issue 21629: Video recorder env wrapper not working. Added test case. (#21670) 2022-01-24 19:38:21 +01:00			`def render(self, mode="rgb_array"):`
			`# Just generate a random image here for demonstration purposes.`
			# Also see `gym/envs/classic_control/cartpole.py` for
			`# an example on how to use a Viewer object.`
			`return np.random.randint(0, 256, size=(200, 300, 3), dtype=np.uint8)`

[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00
			`class EarlyDoneMultiAgent(MultiAgentEnv):`
			`"""Env for testing when the env terminates (after agent 0 does)."""`

			`def __init__(self):`
[RLlib] Base env pre-checker. (#21569) 2022-01-18 07:34:06 -08:00			`super().__init__()`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.agents = [MockEnv(3), MockEnv(5)]`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self._agent_ids = set(range(len(self.agents)))`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.dones = set()`
			`self.last_obs = {}`
			`self.last_rew = {}`
			`self.last_done = {}`
			`self.last_info = {}`
			`self.i = 0`
			`self.observation_space = gym.spaces.Discrete(10)`
			`self.action_space = gym.spaces.Discrete(2)`

			`def reset(self):`
			`self.dones = set()`
			`self.last_obs = {}`
			`self.last_rew = {}`
			`self.last_done = {}`
			`self.last_info = {}`
			`self.i = 0`
			`for i, a in enumerate(self.agents):`
			`self.last_obs[i] = a.reset()`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self.last_rew[i] = 0`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.last_done[i] = False`
			`self.last_info[i] = {}`
			`obs_dict = {self.i: self.last_obs[self.i]}`
			`self.i = (self.i + 1) % len(self.agents)`
			`return obs_dict`

			`def step(self, action_dict):`
			`assert len(self.dones) != len(self.agents)`
			`for i, action in action_dict.items():`
			`(`
			`self.last_obs[i],`
			`self.last_rew[i],`
			`self.last_done[i],`
			`self.last_info[i],`
			`) = self.agents[i].step(action)`
			`obs = {self.i: self.last_obs[self.i]}`
			`rew = {self.i: self.last_rew[self.i]}`
			`done = {self.i: self.last_done[self.i]}`
			`info = {self.i: self.last_info[self.i]}`
			`if done[self.i]:`
			`rew[self.i] = 0`
			`self.dones.add(self.i)`
			`self.i = (self.i + 1) % len(self.agents)`
			`done["__all__"] = len(self.dones) == len(self.agents) - 1`
			`return obs, rew, done, info`


[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`class FlexAgentsMultiAgent(MultiAgentEnv):`
			`"""Env of independent agents, each of which exits after n steps."""`

			`def __init__(self):`
[RLlib] Base env pre-checker. (#21569) 2022-01-18 07:34:06 -08:00			`super().__init__()`
[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`self.agents = {}`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self._agent_ids = set()`
[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`self.agentID = 0`
			`self.dones = set()`
			`self.observation_space = gym.spaces.Discrete(2)`
			`self.action_space = gym.spaces.Discrete(2)`
			`self.resetted = False`

			`def spawn(self):`
			`# Spawn a new agent into the current episode.`
			`agentID = self.agentID`
			`self.agents[agentID] = MockEnv(25)`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self._agent_ids.add(agentID)`
[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`self.agentID += 1`
			`return agentID`

			`def reset(self):`
			`self.agents = {}`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self._agent_ids = set()`
[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`self.spawn()`
			`self.resetted = True`
			`self.dones = set()`
			`obs = {}`
			`for i, a in self.agents.items():`
			`obs[i] = a.reset()`

			`return obs`

			`def step(self, action_dict):`
			`obs, rew, done, info = {}, {}, {}, {}`
			`# Apply the actions.`
			`for i, action in action_dict.items():`
			`obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)`
			`if done[i]:`
			`self.dones.add(i)`

			`# Sometimes, add a new agent to the episode.`
[RLlib] Memory leak finding toolset using tracemalloc + CI memory leak tests. (#15412) 2022-04-12 07:50:09 +02:00			`if random.random() > 0.75 and len(action_dict) > 0:`
[RLlib] Issue #14022: Trajectory View API fails in MA-env where a new agent terminates right away (done=True right after initial obs). (#14031) 2021-02-18 14:07:49 +01:00			`i = self.spawn()`
			`obs[i], rew[i], done[i], info[i] = self.agents[i].step(action)`
			`if done[i]:`
			`self.dones.add(i)`

			`# Sometimes, kill an existing agent.`
			`if len(self.agents) > 1 and random.random() > 0.25:`
			`keys = list(self.agents.keys())`
			`key = random.choice(keys)`
			`done[key] = True`
			`del self.agents[key]`

			`done["__all__"] = len(self.dones) == len(self.agents)`
			`return obs, rew, done, info`


[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`class RoundRobinMultiAgent(MultiAgentEnv):`
			`"""Env of N independent agents, each of which exits after 5 steps.`

			`On each step() of the env, only one agent takes an action."""`

			`def __init__(self, num, increment_obs=False):`
[RLlib] Base env pre-checker. (#21569) 2022-01-18 07:34:06 -08:00			`super().__init__()`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`if increment_obs:`
			`# Observations are 0, 1, 2, 3... etc. as time advances`
			`self.agents = [MockEnv2(5) for _ in range(num)]`
			`else:`
			`# Observations are all zeros`
			`self.agents = [MockEnv(5) for _ in range(num)]`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self._agent_ids = set(range(num))`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.dones = set()`
			`self.last_obs = {}`
			`self.last_rew = {}`
			`self.last_done = {}`
			`self.last_info = {}`
			`self.i = 0`
			`self.num = num`
			`self.observation_space = gym.spaces.Discrete(10)`
			`self.action_space = gym.spaces.Discrete(2)`

			`def reset(self):`
			`self.dones = set()`
			`self.last_obs = {}`
			`self.last_rew = {}`
			`self.last_done = {}`
			`self.last_info = {}`
			`self.i = 0`
			`for i, a in enumerate(self.agents):`
			`self.last_obs[i] = a.reset()`
[RLlib] Put env-checker on critical path. (#22191) 2022-02-17 05:06:14 -08:00			`self.last_rew[i] = 0`
[RLlib] rllib/examples folder restructuring (#8250) Cleans up of the rllib/examples folder by moving all example Envs into rllibexamples/env (so they can be used by other scripts and tests as well). 2020-05-01 22:59:34 +02:00			`self.last_done[i] = False`
			`self.last_info[i] = {}`
			`obs_dict = {self.i: self.last_obs[self.i]}`
			`self.i = (self.i + 1) % self.num`
			`return obs_dict`

			`def step(self, action_dict):`
			`assert len(self.dones) != len(self.agents)`
			`for i, action in action_dict.items():`
			`(`
			`self.last_obs[i],`
			`self.last_rew[i],`
			`self.last_done[i],`
			`self.last_info[i],`
			`) = self.agents[i].step(action)`
			`obs = {self.i: self.last_obs[self.i]}`
			`rew = {self.i: self.last_rew[self.i]}`
			`done = {self.i: self.last_done[self.i]}`
			`info = {self.i: self.last_info[self.i]}`
			`if done[self.i]:`
			`rew[self.i] = 0`
			`self.dones.add(self.i)`
			`self.i = (self.i + 1) % self.num`
			`done["__all__"] = len(self.dones) == len(self.agents)`
			`return obs, rew, done, info`


[RLlib] Env directory cleanup and tests. (#13082) 2021-01-19 10:09:39 +01:00			`MultiAgentCartPole = make_multi_agent("CartPole-v0")`
			`MultiAgentMountainCar = make_multi_agent("MountainCarContinuous-v0")`
[RLlib] Upgrade gym version to 0.21 and deprecate pendulum-v0. (#19535) * Fix QMix, SAC, and MADDPA too. * Unpin gym and deprecate pendulum v0 Many tests in rllib depended on pendulum v0, however in gym 0.21, pendulum v0 was deprecated in favor of pendulum v1. This may change reward thresholds, so will have to potentially rerun all of the pendulum v1 benchmarks, or use another environment in favor. The same applies to frozen lake v0 and frozen lake v1 Lastly, all of the RLlib tests and have been moved to python 3.7 * Add gym installation based on python version. Pin python<= 3.6 to gym 0.19 due to install issues with atari roms in gym 0.20 * Reformatting * Fixing tests * Move atari-py install conditional to req.txt * migrate to new ale install method * Fix QMix, SAC, and MADDPA too. * Unpin gym and deprecate pendulum v0 Many tests in rllib depended on pendulum v0, however in gym 0.21, pendulum v0 was deprecated in favor of pendulum v1. This may change reward thresholds, so will have to potentially rerun all of the pendulum v1 benchmarks, or use another environment in favor. The same applies to frozen lake v0 and frozen lake v1 Lastly, all of the RLlib tests and have been moved to python 3.7 * Add gym installation based on python version. Pin python<= 3.6 to gym 0.19 due to install issues with atari roms in gym 0.20 Move atari-py install conditional to req.txt migrate to new ale install method Make parametric_actions_cartpole return float32 actions/obs Adding type conversions if obs/actions don't match space Add utils to make elements match gym space dtypes Co-authored-by: Jun Gong <jungong@anyscale.com> Co-authored-by: sven1977 <svenmika1977@gmail.com> 2021-11-03 08:24:00 -07:00			`MultiAgentPendulum = make_multi_agent("Pendulum-v1")`
[RLlib] Env directory cleanup and tests. (#13082) 2021-01-19 10:09:39 +01:00			`MultiAgentStatelessCartPole = make_multi_agent(lambda config: StatelessCartPole(config))`