mirror of
https://github.com/vale981/ray
synced 2025-03-05 10:01:43 -05:00
[RLlib] Revert "Revert "updated pettingzoo wrappers, env versions, urls"" (#21339)
This commit is contained in:
parent
94242e3e6e
commit
c01245763e
8 changed files with 46 additions and 50 deletions
|
@ -22,7 +22,9 @@
|
|||
conditions: ["RAY_CI_RLLIB_AFFECTED"]
|
||||
commands:
|
||||
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/travis/upload_build_info.sh; fi }; trap cleanup EXIT
|
||||
- RLLIB_TESTING=1 ./ci/travis/install-dependencies.sh
|
||||
- RLLIB_TESTING=1 PYTHON=3.7 ./ci/travis/install-dependencies.sh
|
||||
# Because Python version changed, we need to re-install Ray here
|
||||
- rm -rf ./python/ray/thirdparty_files; rm -rf ./python/ray/pickle5_files; ./ci/travis/ci.sh build
|
||||
- pip install -Ur ./python/requirements_ml_docker.txt
|
||||
- ./ci/travis/env_info.sh
|
||||
# --jobs 2 is necessary as we only need to have at least 2 gpus on the machine
|
||||
|
|
|
@ -213,17 +213,17 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across
|
|||
PettingZoo Multi-Agent Environments
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
`PettingZoo <https://github.com/PettingZoo-Team/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
|
||||
`PettingZoo <https://github.com/Farama-Foundation/PettingZoo>`__ is a repository of over 50 diverse multi-agent environments. However, the API is not directly compatible with rllib, but it can be converted into an rllib MultiAgentEnv like in this example
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
from ray.tune.registry import register_env
|
||||
# import the pettingzoo environment
|
||||
from pettingzoo.butterfly import prison_v2
|
||||
from pettingzoo.butterfly import prison_v3
|
||||
# import rllib pettingzoo interface
|
||||
from ray.rllib.env import PettingZooEnv
|
||||
# define how to make the environment. This way takes an optional environment config, num_floors
|
||||
env_creator = lambda config: prison_v2.env(num_floors=config.get("num_floors", 4))
|
||||
env_creator = lambda config: prison_v3.env(num_floors=config.get("num_floors", 4))
|
||||
# register that way to make the environment under an rllib name
|
||||
register_env('prison', lambda config: PettingZooEnv(env_creator(config)))
|
||||
# now you can use `prison` as an environment
|
||||
|
|
|
@ -11,9 +11,9 @@ kaggle_environments==1.7.11
|
|||
# Unity3D testing
|
||||
mlagents_envs==0.27.0
|
||||
# For tests on PettingZoo's multi-agent envs.
|
||||
pettingzoo==1.11.1
|
||||
pettingzoo==1.14.0; python_version >= '3.7'
|
||||
pymunk==6.0.0
|
||||
supersuit==2.6.6
|
||||
supersuit==3.3.2; python_version >= '3.7'
|
||||
# For testing in MuJoCo-like envs (in PyBullet).
|
||||
pybullet==3.2.0
|
||||
# For tests on RecSim and Kaggle envs.
|
||||
|
|
4
rllib/env/tests/test_remote_worker_envs.py
vendored
4
rllib/env/tests/test_remote_worker_envs.py
vendored
|
@ -1,6 +1,6 @@
|
|||
import gym
|
||||
import numpy as np
|
||||
from pettingzoo.butterfly import pistonball_v4
|
||||
from pettingzoo.butterfly import pistonball_v5
|
||||
from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
|
||||
import unittest
|
||||
|
||||
|
@ -15,7 +15,7 @@ from ray import tune
|
|||
|
||||
# Function that outputs the environment you wish to register.
|
||||
def env_creator(config):
|
||||
env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
|
||||
env = pistonball_v5.env()
|
||||
env = dtype_v0(env, dtype=np.float32)
|
||||
env = color_reduction_v0(env, mode="R")
|
||||
env = normalize_obs_v0(env)
|
||||
|
|
64
rllib/env/wrappers/pettingzoo_env.py
vendored
64
rllib/env/wrappers/pettingzoo_env.py
vendored
|
@ -4,7 +4,7 @@ from ray.rllib.env.multi_agent_env import MultiAgentEnv
|
|||
class PettingZooEnv(MultiAgentEnv):
|
||||
"""An interface to the PettingZoo MARL environment library.
|
||||
|
||||
See: https://github.com/PettingZoo-Team/PettingZoo
|
||||
See: https://github.com/Farama-Foundation/PettingZoo
|
||||
|
||||
Inherits from MultiAgentEnv and exposes a given AEC
|
||||
(actor-environment-cycle) game from the PettingZoo project via the
|
||||
|
@ -15,7 +15,7 @@ class PettingZooEnv(MultiAgentEnv):
|
|||
1. All agents have the same action_spaces and observation_spaces.
|
||||
Note: If, within your aec game, agents do not have homogeneous action /
|
||||
observation spaces, apply SuperSuit wrappers
|
||||
to apply padding functionality: https://github.com/PettingZoo-Team/
|
||||
to apply padding functionality: https://github.com/Farama-Foundation/
|
||||
SuperSuit#built-in-multi-agent-only-functions
|
||||
2. Environments are positive sum games (-> Agents are expected to cooperate
|
||||
to maximize reward). This isn't a hard restriction, it just that
|
||||
|
@ -23,8 +23,8 @@ class PettingZooEnv(MultiAgentEnv):
|
|||
games.
|
||||
|
||||
Examples:
|
||||
>>> from pettingzoo.butterfly import prison_v2
|
||||
>>> env = PettingZooEnv(prison_v2.env())
|
||||
>>> from pettingzoo.butterfly import prison_v3
|
||||
>>> env = PettingZooEnv(prison_v3.env())
|
||||
>>> obs = env.reset()
|
||||
>>> print(obs)
|
||||
# only returns the observation for the agent which should be stepping
|
||||
|
@ -68,34 +68,26 @@ class PettingZooEnv(MultiAgentEnv):
|
|||
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
# agent idx list
|
||||
self.agents = self.env.possible_agents
|
||||
|
||||
# Get dictionaries of obs_spaces and act_spaces
|
||||
self.observation_spaces = self.env.observation_spaces
|
||||
self.action_spaces = self.env.action_spaces
|
||||
env.reset()
|
||||
|
||||
# Get first observation space, assuming all agents have equal space
|
||||
self.observation_space = self.observation_spaces[self.agents[0]]
|
||||
self.observation_space = self.env.observation_space(self.env.agents[0])
|
||||
|
||||
# Get first action space, assuming all agents have equal space
|
||||
self.action_space = self.action_spaces[self.agents[0]]
|
||||
self.action_space = self.env.action_space(self.env.agents[0])
|
||||
|
||||
assert all(obs_space == self.observation_space
|
||||
for obs_space
|
||||
in self.env.observation_spaces.values()), \
|
||||
assert all(self.env.observation_space(agent) == self.observation_space
|
||||
for agent in self.env.agents), \
|
||||
"Observation spaces for all agents must be identical. Perhaps " \
|
||||
"SuperSuit's pad_observations wrapper can help (useage: " \
|
||||
"`supersuit.aec_wrappers.pad_observations(env)`"
|
||||
|
||||
assert all(act_space == self.action_space
|
||||
for act_space in self.env.action_spaces.values()), \
|
||||
assert all(self.env.action_space(agent) == self.action_space
|
||||
for agent in self.env.agents), \
|
||||
"Action spaces for all agents must be identical. Perhaps " \
|
||||
"SuperSuit's pad_action_space wrapper can help (useage: " \
|
||||
"SuperSuit's pad_action_space wrapper can help (usage: " \
|
||||
"`supersuit.aec_wrappers.pad_action_space(env)`"
|
||||
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
self.env.reset()
|
||||
return {
|
||||
|
@ -135,38 +127,36 @@ class PettingZooEnv(MultiAgentEnv):
|
|||
def render(self, mode="human"):
|
||||
return self.env.render(mode)
|
||||
|
||||
@property
|
||||
def get_sub_environments(self):
|
||||
return self.env.unwrapped
|
||||
|
||||
|
||||
class ParallelPettingZooEnv(MultiAgentEnv):
|
||||
def __init__(self, env):
|
||||
self.par_env = env
|
||||
# agent idx list
|
||||
self.agents = self.par_env.possible_agents
|
||||
|
||||
# Get dictionaries of obs_spaces and act_spaces
|
||||
self.observation_spaces = self.par_env.observation_spaces
|
||||
self.action_spaces = self.par_env.action_spaces
|
||||
self.par_env.reset()
|
||||
|
||||
# Get first observation space, assuming all agents have equal space
|
||||
self.observation_space = self.observation_spaces[self.agents[0]]
|
||||
self.observation_space = self.par_env.observation_space(
|
||||
self.par_env.agents[0])
|
||||
|
||||
# Get first action space, assuming all agents have equal space
|
||||
self.action_space = self.action_spaces[self.agents[0]]
|
||||
self.action_space = self.par_env.action_space(self.par_env.agents[0])
|
||||
|
||||
assert all(obs_space == self.observation_space
|
||||
for obs_space
|
||||
in self.par_env.observation_spaces.values()), \
|
||||
assert all(
|
||||
self.par_env.observation_space(agent) == self.observation_space
|
||||
for agent in self.par_env.agents), \
|
||||
"Observation spaces for all agents must be identical. Perhaps " \
|
||||
"SuperSuit's pad_observations wrapper can help (useage: " \
|
||||
"`supersuit.aec_wrappers.pad_observations(env)`"
|
||||
|
||||
assert all(act_space == self.action_space
|
||||
for act_space in self.par_env.action_spaces.values()), \
|
||||
assert all(self.par_env.action_space(agent) == self.action_space
|
||||
for agent in self.par_env.agents), \
|
||||
"Action spaces for all agents must be identical. Perhaps " \
|
||||
"SuperSuit's pad_action_space wrapper can help (useage: " \
|
||||
"`supersuit.aec_wrappers.pad_action_space(env)`"
|
||||
|
||||
self.reset()
|
||||
|
||||
def reset(self):
|
||||
return self.par_env.reset()
|
||||
|
||||
|
@ -183,3 +173,7 @@ class ParallelPettingZooEnv(MultiAgentEnv):
|
|||
|
||||
def render(self, mode="human"):
|
||||
return self.par_env.render(mode)
|
||||
|
||||
@property
|
||||
def unwrapped(self):
|
||||
return self.par_env.unwrapped
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from ray import tune
|
||||
from ray.tune.registry import register_env
|
||||
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
|
||||
from pettingzoo.sisl import waterworld_v2
|
||||
from pettingzoo.sisl import waterworld_v3
|
||||
|
||||
# Based on code from github.com/parametersharingmadrl/parametersharingmadrl
|
||||
|
||||
|
@ -9,7 +9,7 @@ if __name__ == "__main__":
|
|||
# RDQN - Rainbow DQN
|
||||
# ADQN - Apex DQN
|
||||
def env_creator(args):
|
||||
return PettingZooEnv(waterworld_v2.env())
|
||||
return PettingZooEnv(waterworld_v3.env())
|
||||
|
||||
env = env_creator({})
|
||||
register_env("waterworld", env_creator)
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
from ray import tune
|
||||
from ray.tune.registry import register_env
|
||||
from ray.rllib.env.wrappers.pettingzoo_env import PettingZooEnv
|
||||
from pettingzoo.sisl import waterworld_v0
|
||||
from pettingzoo.sisl import waterworld_v3
|
||||
|
||||
# Based on code from github.com/parametersharingmadrl/parametersharingmadrl
|
||||
|
||||
|
@ -9,7 +9,7 @@ if __name__ == "__main__":
|
|||
# RDQN - Rainbow DQN
|
||||
# ADQN - Apex DQN
|
||||
|
||||
register_env("waterworld", lambda _: PettingZooEnv(waterworld_v0.env()))
|
||||
register_env("waterworld", lambda _: PettingZooEnv(waterworld_v3.env()))
|
||||
|
||||
tune.run(
|
||||
"APEX_DDPG",
|
||||
|
|
|
@ -7,7 +7,7 @@ from ray.tune.registry import register_env
|
|||
from ray.rllib.env import PettingZooEnv
|
||||
from ray.rllib.agents.registry import get_trainer_class
|
||||
|
||||
from pettingzoo.butterfly import pistonball_v4
|
||||
from pettingzoo.butterfly import pistonball_v5
|
||||
from pettingzoo.mpe import simple_spread_v2
|
||||
from supersuit import normalize_obs_v0, dtype_v0, color_reduction_v0
|
||||
|
||||
|
@ -19,9 +19,9 @@ class TestPettingZooEnv(unittest.TestCase):
|
|||
def tearDown(self) -> None:
|
||||
ray.shutdown()
|
||||
|
||||
def test_pettingzoo_pistonball_v4_policies_are_dict_env(self):
|
||||
def test_pettingzoo_pistonball_v5_policies_are_dict_env(self):
|
||||
def env_creator(config):
|
||||
env = pistonball_v4.env(local_ratio=config.get("local_ratio", 0.2))
|
||||
env = pistonball_v5.env()
|
||||
env = dtype_v0(env, dtype=float32)
|
||||
env = color_reduction_v0(env, mode="R")
|
||||
env = normalize_obs_v0(env)
|
||||
|
|
Loading…
Add table
Reference in a new issue