updated multiagent docs (#1523)

* updated multiagent docs * Update rllib.rst * Update multiagent_mountaincar_env.py * Update multiagent_pendulum_env.py
2025-04-23 06:25:52 -04:00 · 2018-02-11 16:35:03 -08:00 · 2018-02-11 16:35:03 -08:00 · 639df85fda
commit 639df85fda
parent 81a4be8f65
3 changed files with 36 additions and 4 deletions
--- a/doc/source/rllib.rst
+++ b/doc/source/rllib.rst
@ -240,6 +240,38 @@ these custom classes can be found in the

 For a full example of a custom model in code, see the `Carla RLlib model <https://github.com/ray-project/ray/blob/master/examples/carla/models.py>`__ and associated `training scripts <https://github.com/ray-project/ray/tree/master/examples/carla>`__. The ``CarlaModel`` class defined there operates over a composite (Tuple) observation space including both images and scalar measurements.

+Multi-Agent Models
+~~~~~~~~~~~~~~~~~~
+RLlib supports multi-agent training with PPO. Currently it supports both
+shared, i.e. all agents have the same model, and non-shared multi-agent models. However, it only supports shared
+rewards and does not yet support individual rewards for each agent. 
+
+
+While Generalized Advantage Estimation is supported in multiagent scenarios, 
+it is assumed that it possible for the estimator to access the observations of 
+all of the agents. 
+
+
+Important config parameters are described below
+
+.. code-block:: python
+
+    config["model"].update({"fcnet_hiddens": [256, 256]}) # dimension of value function
+    options = {"multiagent_obs_shapes": [3, 3], # length of each observation space
+               "multiagent_act_shapes": [1, 1], # length of each action space
+               "multiagent_shared_model": True, # whether the model should be shared
+               # list of dimensions of multiagent feedforward nets
+               "multiagent_fcnet_hiddens": [[32, 32]] * 2} 
+    config["model"].update({"custom_options": options})
+
+For a full example of a multiagent model in code, see the 
+`MultiAgent Pendulum <https://github.com/ray-project/ray/blob/master/python/ray/rllib/examples/multiagent_mountaincar.py>`__. 
+The ``MultiAgentPendulumEnv`` defined there operates
+over a composite (Tuple) enclosing a list of Boxes; each Box represents the 
+observation of an agent. The action space is a list of Discrete actions, each 
+element corresponding to half of the total torque. The environment will return a list of actions
+that can be iterated over and applied to each agent. 
+
 External Data API
 ~~~~~~~~~~~~~~~~~
 *coming soon!*
--- a/python/ray/rllib/examples/multiagent_mountaincar_env.py
+++ b/python/ray/rllib/examples/multiagent_mountaincar_env.py
@ -22,8 +22,8 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
        self.viewer = None

        self.action_space = [Discrete(3) for _ in range(2)]
-        self.observation_space = Tuple(tuple(Box(self.low, self.high)
-                                             for _ in range(2)))
+        self.observation_space = Tuple([
+            Box(self.low, self.high) for _ in range(2)])

        self._seed()
        self.reset()
--- a/python/ray/rllib/examples/multiagent_pendulum_env.py
+++ b/python/ray/rllib/examples/multiagent_pendulum_env.py
@ -24,8 +24,8 @@ class MultiAgentPendulumEnv(PendulumEnv):
        self.action_space = [Box(low=-self.max_torque / 2,
                                 high=self.max_torque / 2, shape=(1,))
                             for _ in range(2)]
-        self.observation_space = Tuple(tuple(Box(low=-high, high=high)
-                                             for _ in range(2)))
+        self.observation_space = Tuple([
+            Box(low=-high, high=high) for _ in range(2)])

        self._seed()