mirror of
https://github.com/vale981/ray
synced 2025-04-23 06:25:52 -04:00
updated multiagent docs (#1523)
* updated multiagent docs * Update rllib.rst * Update multiagent_mountaincar_env.py * Update multiagent_pendulum_env.py
This commit is contained in:
parent
81a4be8f65
commit
639df85fda
3 changed files with 36 additions and 4 deletions
|
@ -240,6 +240,38 @@ these custom classes can be found in the
|
|||
|
||||
For a full example of a custom model in code, see the `Carla RLlib model <https://github.com/ray-project/ray/blob/master/examples/carla/models.py>`__ and associated `training scripts <https://github.com/ray-project/ray/tree/master/examples/carla>`__. The ``CarlaModel`` class defined there operates over a composite (Tuple) observation space including both images and scalar measurements.
|
||||
|
||||
Multi-Agent Models
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
RLlib supports multi-agent training with PPO. Currently it supports both
|
||||
shared, i.e. all agents have the same model, and non-shared multi-agent models. However, it only supports shared
|
||||
rewards and does not yet support individual rewards for each agent.
|
||||
|
||||
|
||||
While Generalized Advantage Estimation is supported in multiagent scenarios,
|
||||
it is assumed that it possible for the estimator to access the observations of
|
||||
all of the agents.
|
||||
|
||||
|
||||
Important config parameters are described below
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
config["model"].update({"fcnet_hiddens": [256, 256]}) # dimension of value function
|
||||
options = {"multiagent_obs_shapes": [3, 3], # length of each observation space
|
||||
"multiagent_act_shapes": [1, 1], # length of each action space
|
||||
"multiagent_shared_model": True, # whether the model should be shared
|
||||
# list of dimensions of multiagent feedforward nets
|
||||
"multiagent_fcnet_hiddens": [[32, 32]] * 2}
|
||||
config["model"].update({"custom_options": options})
|
||||
|
||||
For a full example of a multiagent model in code, see the
|
||||
`MultiAgent Pendulum <https://github.com/ray-project/ray/blob/master/python/ray/rllib/examples/multiagent_mountaincar.py>`__.
|
||||
The ``MultiAgentPendulumEnv`` defined there operates
|
||||
over a composite (Tuple) enclosing a list of Boxes; each Box represents the
|
||||
observation of an agent. The action space is a list of Discrete actions, each
|
||||
element corresponding to half of the total torque. The environment will return a list of actions
|
||||
that can be iterated over and applied to each agent.
|
||||
|
||||
External Data API
|
||||
~~~~~~~~~~~~~~~~~
|
||||
*coming soon!*
|
||||
|
|
|
@ -22,8 +22,8 @@ class MultiAgentMountainCarEnv(MountainCarEnv):
|
|||
self.viewer = None
|
||||
|
||||
self.action_space = [Discrete(3) for _ in range(2)]
|
||||
self.observation_space = Tuple(tuple(Box(self.low, self.high)
|
||||
for _ in range(2)))
|
||||
self.observation_space = Tuple([
|
||||
Box(self.low, self.high) for _ in range(2)])
|
||||
|
||||
self._seed()
|
||||
self.reset()
|
||||
|
|
|
@ -24,8 +24,8 @@ class MultiAgentPendulumEnv(PendulumEnv):
|
|||
self.action_space = [Box(low=-self.max_torque / 2,
|
||||
high=self.max_torque / 2, shape=(1,))
|
||||
for _ in range(2)]
|
||||
self.observation_space = Tuple(tuple(Box(low=-high, high=high)
|
||||
for _ in range(2)))
|
||||
self.observation_space = Tuple([
|
||||
Box(low=-high, high=high) for _ in range(2)])
|
||||
|
||||
self._seed()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue