[RLlib] Revert "Revert "updated pettingzoo wrappers, env versions, urls"" (#21339)

This commit is contained in:
Sven Mika 2022-01-04 18:30:26 +01:00 committed by GitHub
parent 94242e3e6e
commit c01245763e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 46 additions and 50 deletions

View file

@ -22,7 +22,9 @@
conditions: ["RAY_CI_RLLIB_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
- RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh
- RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
# Because Python version changed, we need to re-install Ray here
- rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/travis/ci.sh build
- pip install -Ur ./python/requirements_ml_docker.txt
- ./ci/travis/env_info.sh
# --jobs 2 is necessary as we only need to have at least 2 gpus on the machine

View file

@ -213,17 +213,17 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across
PettingZoo Multi-Agent Environments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
`PettingZoo <https://github.com/PettingZoo-Team/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
`PettingZoo <https://github.com/Farama-Foundation/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
.. code-block:: python
from ray.tune.registry import register_env
# import the pettingzoo environment
from pettingzoo.butterfly import prison_v2
from pettingzoo.butterfly import prison_v3
# import rllib pettingzoo interface
from ray.rllib.env import PettingZooEnv
# define how to make the environment. This way takes an optional environment config, num_floors
env_creator = lambda config: prison_v2.env(num_floors=config.get("num_floors", 4))
env_creator = lambda config: prison_v3.env(num_floors=config.get("num_floors", 4))
# register that way to make the environment under an rllib name
register_env('prison', lambda config: PettingZooEnv(env_creator(config)))
# now you can use `prison` as an environment

View file

@ -11,9 +11,9 @@ kaggle_environments==1.7.11
# Unity3D testing
mlagents_envs==0.27.0
# For tests on PettingZoo's multi-agent envs.
pettingzoo==1.11.1
pettingzoo==1.14.0; python_version >= '3.7'
pymunk==6.0.0
supersuit==2.6.6
supersuit==3.3.2; python_version >= '3.7'
# For testing in MuJoCo-like envs (in PyBullet).
pybullet==3.2.0
# For tests on RecSim and Kaggle envs.

View file

@ -1,6 +1,6 @@
import gym
import numpy as np
from pettingzoo.butterfly import pistonball_v4
from pettingzoo.butterfly import pistonball_v5
from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
import unittest
@ -15,7 +15,7 @@ from ray import tune
# Function that outputs the environment you wish to register.
def env_creator(config):
env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
env = pistonball_v5.env()
env = dtype_v0(env, dtype=np.float32)
env = color_reduction_v0(env, mode="R")
env = normalize_obs_v0(env)

View file

@ -4,7 +4,7 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv
class PettingZooEnv(MultiAgentEnv):
"""An interface to the PettingZoo MARL environment library.
See: https://github.com/PettingZoo-Team/PettingZoo
See: https://github.com/Farama-Foundation/PettingZoo
Inherits from MultiAgentEnv and exposes a given AEC
(actor-environment-cycle) game from the PettingZoo project via the
@ -15,7 +15,7 @@ class PettingZooEnv(MultiAgentEnv):
1. All agents have the same action_spaces and observation_spaces.
Note: If, within your aec game, agents do not have homogeneous action /
observation spaces, apply SuperSuit wrappers
to apply padding functionality: https://github.com/PettingZoo-Team/
to apply padding functionality: https://github.com/Farama-Foundation/
SuperSuit#built-in-multi-agent-only-functions
2. Environments are positive sum games (-> Agents are expected to cooperate
to maximize reward). This isn't a hard restriction, it just that
@ -23,8 +23,8 @@ class PettingZooEnv(MultiAgentEnv):
games.
Examples:
>>> from pettingzoo.butterfly import prison_v2
>>> env = PettingZooEnv(prison_v2.env())
>>> from pettingzoo.butterfly import prison_v3
>>> env = PettingZooEnv(prison_v3.env())
>>> obs = env.reset()
>>> print(obs)
# only returns the observation for the agent which should be stepping
@ -68,34 +68,26 @@ class PettingZooEnv(MultiAgentEnv):
def __init__(self, env):
self.env = env
# agent idx list
self.agents = self.env.possible_agents
# Get dictionaries of obs_spaces and act_spaces
self.observation_spaces = self.env.observation_spaces
self.action_spaces = self.env.action_spaces
env.reset()
# Get first observation space, assuming all agents have equal space
self.observation_space = self.observation_spaces[self.agents[0]]
self.observation_space = self.env.observation_space(self.env.agents[0])
# Get first action space, assuming all agents have equal space
self.action_space = self.action_spaces[self.agents[0]]
self.action_space = self.env.action_space(self.env.agents[0])
assert all(obs_space == self.observation_space
for obs_space
in self.env.observation_spaces.values()), \
assert all(self.env.observation_space(agent) == self.observation_space
for agent in self.env.agents), \
"Observation spaces for all agents must be identical. Perhaps " \
"SuperSuit's pad_observations wrapper can help (useage: " \
"`supersuit.aec_wrappers.pad_observations(env)`"
assert all(act_space == self.action_space
for act_space in self.env.action_spaces.values()), \
assert all(self.env.action_space(agent) == self.action_space
for agent in self.env.agents), \
"Action spaces for all agents must be identical. Perhaps " \
"SuperSuit's pad_action_space wrapper can help (useage: " \
"SuperSuit's pad_action_space wrapper can help (usage: " \
"`supersuit.aec_wrappers.pad_action_space(env)`"
self.reset()
def reset(self):
self.env.reset()
return {
@ -135,38 +127,36 @@ class PettingZooEnv(MultiAgentEnv):
def render(self, mode="human"):
return self.env.render(mode)
@property
def get_sub_environments(self):
return self.env.unwrapped
class ParallelPettingZooEnv(MultiAgentEnv):
def __init__(self, env):
self.par_env = env
# agent idx list
self.agents = self.par_env.possible_agents
# Get dictionaries of obs_spaces and act_spaces
self.observation_spaces = self.par_env.observation_spaces
self.action_spaces = self.par_env.action_spaces
self.par_env.reset()
# Get first observation space, assuming all agents have equal space
self.observation_space = self.observation_spaces[self.agents[0]]
self.observation_space = self.par_env.observation_space(
self.par_env.agents[0])
# Get first action space, assuming all agents have equal space
self.action_space = self.action_spaces[self.agents[0]]
self.action_space = self.par_env.action_space(self.par_env.agents[0])
assert all(obs_space == self.observation_space
for obs_space
in self.par_env.observation_spaces.values()), \
assert all(
self.par_env.observation_space(agent) == self.observation_space
for agent in self.par_env.agents), \
"Observation spaces for all agents must be identical. Perhaps " \
"SuperSuit's pad_observations wrapper can help (useage: " \
"`supersuit.aec_wrappers.pad_observations(env)`"
assert all(act_space == self.action_space
for act_space in self.par_env.action_spaces.values()), \
assert all(self.par_env.action_space(agent) == self.action_space
for agent in self.par_env.agents), \
"Action spaces for all agents must be identical. Perhaps " \
"SuperSuit's pad_action_space wrapper can help (useage: " \
"`supersuit.aec_wrappers.pad_action_space(env)`"
self.reset()
def reset(self):
return self.par_env.reset()
@ -183,3 +173,7 @@ class ParallelPettingZooEnv(MultiAgentEnv):
def render(self, mode="human"):
return self.par_env.render(mode)
@property
def unwrapped(self):
return self.par_env.unwrapped

View file

@ -1,7 +1,7 @@
from ray import tune
from ray.tune.registry import register_env
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
from pettingzoo.sisl import waterworld_v2
from pettingzoo.sisl import waterworld_v3
# Based on code from github.com/parametersharingmadrl/parametersharingmadrl
@ -9,7 +9,7 @@ if __name__ == "__main__":
# RDQN - Rainbow DQN
# ADQN - Apex DQN
def env_creator(args):
return PettingZooEnv(waterworld_v2.env())
return PettingZooEnv(waterworld_v3.env())
env = env_creator({})
register_env("waterworld", env_creator)

View file

@ -1,7 +1,7 @@
from ray import tune
from ray.tune.registry import register_env
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
from pettingzoo.sisl import waterworld_v0
from pettingzoo.sisl import waterworld_v3
# Based on code from github.com/parametersharingmadrl/parametersharingmadrl
@ -9,7 +9,7 @@ if __name__ == "__main__":
# RDQN - Rainbow DQN
# ADQN - Apex DQN
register_env("waterworld", lambda _: PettingZooEnv(waterworld_v0.env()))
register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))
tune.run(
"APEX_DDPG",

View file

@ -7,7 +7,7 @@ from ray.tune.registry import register_env
from ray.rllib.env import PettingZooEnv
from ray.rllib.agents.registry import get_trainer_class
from pettingzoo.butterfly import pistonball_v4
from pettingzoo.butterfly import pistonball_v5
from pettingzoo.mpe import simple_spread_v2
from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
@ -19,9 +19,9 @@ class TestPettingZooEnv(unittest.TestCase):
def tearDown(self) -> None:
ray.shutdown()
def test_pettingzoo_pistonball_v4_policies_are_dict_env(self):
def test_pettingzoo_pistonball_v5_policies_are_dict_env(self):
def env_creator(config):
env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
env = pistonball_v5.env()
env = dtype_v0(env, dtype=float32)
env = color_reduction_v0(env, mode="R")
env = normalize_obs_v0(env)