[RLlib] Revert "Revert "updated pettingzoo wrappers, env versions, urls"" (#21339)

2025-03-05 10:01:43 -05:00 · 2022-01-04 18:30:26 +01:00 · 2022-01-04 18:30:26 +01:00 · c01245763e
commit c01245763e
parent 94242e3e6e
8 changed files with 46 additions and 50 deletions
--- a/.buildkite/pipeline.gpu.large.yml
+++ b/.buildkite/pipeline.gpu.large.yml
@ -22,7 +22,9 @@
  conditions: ["RAY_CI_RLLIB_AFFECTED"]
  commands:
    - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
-    - RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh
+    - RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
+    # Because Python version changed, we need to re-install Ray here
+    - rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/travis/ci.sh build
    - pip install -Ur ./python/requirements_ml_docker.txt
    - ./ci/travis/env_info.sh
    # --jobs 2 is necessary as we only need to have at least 2 gpus on the machine
--- a/doc/source/rllib-env.rst
+++ b/doc/source/rllib-env.rst
@ -213,17 +213,17 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across
 PettingZoo Multi-Agent Environments
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

-`PettingZoo <https://github.com/PettingZoo-Team/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
+`PettingZoo <https://github.com/Farama-Foundation/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example

 .. code-block:: python

    from ray.tune.registry import register_env
    # import the pettingzoo environment
-    from pettingzoo.butterfly import prison_v2
+    from pettingzoo.butterfly import prison_v3
    # import rllib pettingzoo interface
    from ray.rllib.env import PettingZooEnv
    # define how to make the environment. This way takes an optional environment config, num_floors
-    env_creator = lambda config: prison_v2.env(num_floors=config.get("num_floors", 4))
+    env_creator = lambda config: prison_v3.env(num_floors=config.get("num_floors", 4))
    # register that way to make the environment under an rllib name
    register_env('prison', lambda config: PettingZooEnv(env_creator(config)))
    # now you can use `prison` as an environment
--- a/python/requirements/ml/requirements_rllib.txt
+++ b/python/requirements/ml/requirements_rllib.txt
@ -11,9 +11,9 @@ kaggle_environments==1.7.11
 # Unity3D testing
 mlagents_envs==0.27.0
 # For tests on PettingZoo's multi-agent envs.
-pettingzoo==1.11.1
+pettingzoo==1.14.0; python_version >= '3.7'
 pymunk==6.0.0
-supersuit==2.6.6
+supersuit==3.3.2; python_version >= '3.7'
 # For testing in MuJoCo-like envs (in PyBullet).
 pybullet==3.2.0
 # For tests on RecSim and Kaggle envs.
--- a/rllib/env/tests/test_remote_worker_envs.py
+++ b/rllib/env/tests/test_remote_worker_envs.py
@ -1,6 +1,6 @@
 import gym
 import numpy as np
-from pettingzoo.butterfly import pistonball_v4
+from pettingzoo.butterfly import pistonball_v5
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
 import unittest

@ -15,7 +15,7 @@ from ray import tune

 # Function that outputs the environment you wish to register.
 def env_creator(config):
-    env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
+    env = pistonball_v5.env()
    env = dtype_v0(env, dtype=np.float32)
    env = color_reduction_v0(env, mode="R")
    env = normalize_obs_v0(env)
--- a/rllib/env/wrappers/pettingzoo_env.py
+++ b/rllib/env/wrappers/pettingzoo_env.py
@ -4,7 +4,7 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv
 class PettingZooEnv(MultiAgentEnv):
    """An interface to the PettingZoo MARL environment library.

-    See: https://github.com/PettingZoo-Team/PettingZoo
+    See: https://github.com/Farama-Foundation/PettingZoo

    Inherits from MultiAgentEnv and exposes a given AEC
    (actor-environment-cycle) game from the PettingZoo project via the
@ -15,7 +15,7 @@ class PettingZooEnv(MultiAgentEnv):
    1. All agents have the same action_spaces and observation_spaces.
       Note: If, within your aec game, agents do not have homogeneous action /
       observation spaces, apply SuperSuit wrappers
-       to apply padding functionality: https://github.com/PettingZoo-Team/
+       to apply padding functionality: https://github.com/Farama-Foundation/
       SuperSuit#built-in-multi-agent-only-functions
    2. Environments are positive sum games (-> Agents are expected to cooperate
       to maximize reward). This isn't a hard restriction, it just that
@ -23,8 +23,8 @@ class PettingZooEnv(MultiAgentEnv):
       games.

    Examples:
-        >>> from pettingzoo.butterfly import prison_v2
-        >>> env = PettingZooEnv(prison_v2.env())
+        >>> from pettingzoo.butterfly import prison_v3
+        >>> env = PettingZooEnv(prison_v3.env())
        >>> obs = env.reset()
        >>> print(obs)
        # only returns the observation for the agent which should be stepping
@ -68,34 +68,26 @@ class PettingZooEnv(MultiAgentEnv):

    def __init__(self, env):
        self.env = env
-        # agent idx list
-        self.agents = self.env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.env.observation_spaces
-        self.action_spaces = self.env.action_spaces
+        env.reset()

        # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
+        self.observation_space = self.env.observation_space(self.env.agents[0])

        # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
+        self.action_space = self.env.action_space(self.env.agents[0])

-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.env.observation_spaces.values()), \
+        assert all(self.env.observation_space(agent) == self.observation_space
+                   for agent in self.env.agents), \
            "Observation spaces for all agents must be identical. Perhaps " \
            "SuperSuit's pad_observations wrapper can help (useage: " \
            "`supersuit.aec_wrappers.pad_observations(env)`"

-        assert all(act_space == self.action_space
-                   for act_space in self.env.action_spaces.values()), \
+        assert all(self.env.action_space(agent) == self.action_space
+                   for agent in self.env.agents), \
            "Action spaces for all agents must be identical. Perhaps " \
-            "SuperSuit's pad_action_space wrapper can help (useage: " \
+            "SuperSuit's pad_action_space wrapper can help (usage: " \
            "`supersuit.aec_wrappers.pad_action_space(env)`"

-        self.reset()
-
    def reset(self):
        self.env.reset()
        return {
@ -135,38 +127,36 @@ class PettingZooEnv(MultiAgentEnv):
    def render(self, mode="human"):
        return self.env.render(mode)

+    @property
+    def get_sub_environments(self):
+        return self.env.unwrapped
+

 class ParallelPettingZooEnv(MultiAgentEnv):
    def __init__(self, env):
        self.par_env = env
-        # agent idx list
-        self.agents = self.par_env.possible_agents
-
-        # Get dictionaries of obs_spaces and act_spaces
-        self.observation_spaces = self.par_env.observation_spaces
-        self.action_spaces = self.par_env.action_spaces
+        self.par_env.reset()

        # Get first observation space, assuming all agents have equal space
-        self.observation_space = self.observation_spaces[self.agents[0]]
+        self.observation_space = self.par_env.observation_space(
+            self.par_env.agents[0])

        # Get first action space, assuming all agents have equal space
-        self.action_space = self.action_spaces[self.agents[0]]
+        self.action_space = self.par_env.action_space(self.par_env.agents[0])

-        assert all(obs_space == self.observation_space
-                   for obs_space
-                   in self.par_env.observation_spaces.values()), \
+        assert all(
+            self.par_env.observation_space(agent) == self.observation_space
+            for agent in self.par_env.agents), \
            "Observation spaces for all agents must be identical. Perhaps " \
            "SuperSuit's pad_observations wrapper can help (useage: " \
            "`supersuit.aec_wrappers.pad_observations(env)`"

-        assert all(act_space == self.action_space
-                   for act_space in self.par_env.action_spaces.values()), \
+        assert all(self.par_env.action_space(agent) == self.action_space
+                   for agent in self.par_env.agents), \
            "Action spaces for all agents must be identical. Perhaps " \
            "SuperSuit's pad_action_space wrapper can help (useage: " \
            "`supersuit.aec_wrappers.pad_action_space(env)`"

-        self.reset()
-
    def reset(self):
        return self.par_env.reset()

@ -183,3 +173,7 @@ class ParallelPettingZooEnv(MultiAgentEnv):

    def render(self, mode="human"):
        return self.par_env.render(mode)
+
+    @property
+    def unwrapped(self):
+        return self.par_env.unwrapped
--- a/rllib/examples/multi_agent_independent_learning.py
+++ b/rllib/examples/multi_agent_independent_learning.py
@ -1,7 +1,7 @@
 from ray import tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v2
+from pettingzoo.sisl import waterworld_v3

 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl

@ -9,7 +9,7 @@ if __name__ == "__main__":
    # RDQN - Rainbow DQN
    # ADQN - Apex DQN
    def env_creator(args):
-        return PettingZooEnv(waterworld_v2.env())
+        return PettingZooEnv(waterworld_v3.env())

    env = env_creator({})
    register_env("waterworld", env_creator)
--- a/rllib/examples/multi_agent_parameter_sharing.py
+++ b/rllib/examples/multi_agent_parameter_sharing.py
@ -1,7 +1,7 @@
 from ray import tune
 from ray.tune.registry import register_env
 from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
-from pettingzoo.sisl import waterworld_v0
+from pettingzoo.sisl import waterworld_v3

 # Based on code from github.com/parametersharingmadrl/parametersharingmadrl

@ -9,7 +9,7 @@ if __name__ == "__main__":
    # RDQN - Rainbow DQN
    # ADQN - Apex DQN

-    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v0.env()))
+    register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))

    tune.run(
        "APEX_DDPG",
--- a/rllib/tests/test_pettingzoo_env.py
+++ b/rllib/tests/test_pettingzoo_env.py
@ -7,7 +7,7 @@ from ray.tune.registry import register_env
 from ray.rllib.env import PettingZooEnv
 from ray.rllib.agents.registry import get_trainer_class

-from pettingzoo.butterfly import pistonball_v4
+from pettingzoo.butterfly import pistonball_v5
 from pettingzoo.mpe import simple_spread_v2
 from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0

@ -19,9 +19,9 @@ class TestPettingZooEnv(unittest.TestCase):
    def tearDown(self) -> None:
        ray.shutdown()

-    def test_pettingzoo_pistonball_v4_policies_are_dict_env(self):
+    def test_pettingzoo_pistonball_v5_policies_are_dict_env(self):
        def env_creator(config):
-            env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
+            env = pistonball_v5.env()
            env = dtype_v0(env, dtype=float32)
            env = color_reduction_v0(env, mode="R")
            env = normalize_obs_v0(env)