[rllib] Remove legacy multiagent support (#2975)

* remove legacy * remove reshaper
2025-03-06 18:41:40 -05:00 · 2018-10-01 13:07:11 -07:00 · 2018-10-01 13:07:11 -07:00 · 2019b4122b
commit 2019b4122b
parent fcef4edd46
8 changed files with 0 additions and 344 deletions
--- a/python/ray/rllib/examples/legacy_multiagent/init.py
+++ b/python/ray/rllib/examples/legacy_multiagent/init.py
--- a/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py
+++ b/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py
@ -1,59 +0,0 @@
-""" Multiagent mountain car. Each agent outputs an action which
-is summed to form the total action. This is a discrete
-multiagent example
-"""
-
-import gym
-from gym.envs.registration import register
-
-import ray
-import ray.rllib.agents.ppo as ppo
-from ray.tune.registry import register_env
-
-env_name = "MultiAgentMountainCarEnv"
-
-env_version_num = 0
-env_name = env_name + '-v' + str(env_version_num)
-
-
-def pass_params_to_gym(env_name):
-    global env_version_num
-
-    register(
-        id=env_name,
-        entry_point=(
-            "ray.rllib.examples.legacy_multiagent.multiagent_mountaincar_env:"
-            "MultiAgentMountainCarEnv"),
-        max_episode_steps=200,
-        kwargs={})
-
-
-def create_env(env_config):
-    pass_params_to_gym(env_name)
-    env = gym.envs.make(env_name)
-    return env
-
-
-if __name__ == '__main__':
-    register_env(env_name, lambda env_config: create_env(env_config))
-    config = ppo.DEFAULT_CONFIG.copy()
-    horizon = 10
-    num_cpus = 4
-    ray.init(num_cpus=num_cpus, redirect_output=True)
-    config["num_workers"] = num_cpus
-    config["train_batch_size"] = 1000
-    config["num_sgd_iter"] = 10
-    config["gamma"] = 0.999
-    config["horizon"] = horizon
-    config["use_gae"] = False
-    config["model"].update({"fcnet_hiddens": [256, 256]})
-    options = {
-        "multiagent_obs_shapes": [2, 2],
-        "multiagent_act_shapes": [1, 1],
-        "multiagent_shared_model": False,
-        "multiagent_fcnet_hiddens": [[32, 32]] * 2
-    }
-    config["model"].update({"custom_options": options})
-    alg = ppo.PPOAgent(env=env_name, config=config)
-    for i in range(1):
-        alg.train()
--- a/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar_env.py
+++ b/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar_env.py
@ -1,51 +0,0 @@
-from math import cos
-from gym.spaces import Box, Tuple, Discrete
-import numpy as np
-from gym.envs.classic_control.mountain_car import MountainCarEnv
-"""
-Multiagent mountain car that sums and then
-averages its actions to produce the velocity
-"""
-
-
-class MultiAgentMountainCarEnv(MountainCarEnv):
-    def __init__(self):
-        self.min_position = -1.2
-        self.max_position = 0.6
-        self.max_speed = 0.07
-        self.goal_position = 0.5
-
-        self.low = np.array([self.min_position, -self.max_speed])
-        self.high = np.array([self.max_position, self.max_speed])
-
-        self.viewer = None
-
-        self.action_space = [Discrete(3) for _ in range(2)]
-        self.observation_space = Tuple(
-            [Box(self.low, self.high, dtype=np.float32) for _ in range(2)])
-
-        self.seed()
-        self.reset()
-
-    def step(self, action):
-        summed_act = 0.5 * np.sum(action)
-
-        position, velocity = self.state
-        velocity += (summed_act - 1) * 0.001
-        velocity += cos(3 * position) * (-0.0025)
-        velocity = np.clip(velocity, -self.max_speed, self.max_speed)
-        position += velocity
-        position = np.clip(position, self.min_position, self.max_position)
-        if (position == self.min_position and velocity < 0):
-            velocity = 0
-
-        done = bool(position >= self.goal_position)
-
-        reward = position
-
-        self.state = (position, velocity)
-        return [np.array(self.state) for _ in range(2)], reward, done, {}
-
-    def reset(self):
-        self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
-        return [np.array(self.state) for _ in range(2)]
--- a/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py
+++ b/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py
@ -1,60 +0,0 @@
-""" Run script for multiagent pendulum env. Each agent outputs a
-torque which is summed to form the total torque. This is a
-continuous multiagent example
-"""
-
-import gym
-from gym.envs.registration import register
-
-import ray
-import ray.rllib.agents.ppo as ppo
-from ray.tune.registry import register_env
-
-env_name = "MultiAgentPendulumEnv"
-
-env_version_num = 0
-env_name = env_name + '-v' + str(env_version_num)
-
-
-def pass_params_to_gym(env_name):
-    global env_version_num
-
-    register(
-        id=env_name,
-        entry_point=(
-            "ray.rllib.examples.legacy_multiagent.multiagent_pendulum_env:"
-            "MultiAgentPendulumEnv"),
-        max_episode_steps=100,
-        kwargs={})
-
-
-def create_env(env_config):
-    pass_params_to_gym(env_name)
-    env = gym.envs.make(env_name)
-    return env
-
-
-if __name__ == '__main__':
-    register_env(env_name, lambda env_config: create_env(env_config))
-    config = ppo.DEFAULT_CONFIG.copy()
-    horizon = 10
-    num_cpus = 4
-    ray.init(num_cpus=num_cpus, redirect_output=True)
-    config["num_workers"] = num_cpus
-    config["train_batch_size"] = 1000
-    config["sgd_minibatch_size"] = 10
-    config["num_sgd_iter"] = 10
-    config["gamma"] = 0.999
-    config["horizon"] = horizon
-    config["use_gae"] = True
-    config["model"].update({"fcnet_hiddens": [256, 256]})
-    options = {
-        "multiagent_obs_shapes": [3, 3],
-        "multiagent_act_shapes": [1, 1],
-        "multiagent_shared_model": True,
-        "multiagent_fcnet_hiddens": [[32, 32]] * 2
-    }
-    config["model"].update({"custom_options": options})
-    alg = ppo.PPOAgent(env=env_name, config=config)
-    for i in range(1):
-        alg.train()
--- a/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum_env.py
+++ b/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum_env.py
@ -1,74 +0,0 @@
-from gym.spaces import Box, Tuple
-from gym.utils import seeding
-from gym.envs.classic_control.pendulum import PendulumEnv
-import numpy as np
-"""
- Multiagent pendulum that sums its torques to generate an action
-"""
-
-
-class MultiAgentPendulumEnv(PendulumEnv):
-    metadata = {
-        'render.modes': ['human', 'rgb_array'],
-        'video.frames_per_second': 30
-    }
-
-    def __init__(self):
-        self.max_speed = 8
-        self.max_torque = 2.
-        self.dt = .05
-        self.viewer = None
-
-        high = np.array([1., 1., self.max_speed])
-        self.action_space = [
-            Box(low=-self.max_torque / 2,
-                high=self.max_torque / 2,
-                shape=(1, ),
-                dtype=np.float32) for _ in range(2)
-        ]
-        self.observation_space = Tuple(
-            [Box(low=-high, high=high, dtype=np.float32) for _ in range(2)])
-
-        self.seed()
-
-    def seed(self, seed=None):
-        self.np_random, seed = seeding.np_random(seed)
-        return [seed]
-
-    def step(self, u):
-        th, thdot = self.state  # th := theta
-
-        summed_u = np.sum(u)
-        g = 10.
-        m = 1.
-        length = 1.
-        dt = self.dt
-
-        summed_u = np.clip(summed_u, -self.max_torque, self.max_torque)
-        self.last_u = summed_u  # for rendering
-        costs = self.angle_normalize(th) ** 2 + .1 * thdot ** 2 + \
-            .001 * (summed_u ** 2)
-
-        newthdot = thdot + (-3 * g / (2 * length) * np.sin(th + np.pi) + 3. /
-                            (m * length**2) * summed_u) * dt
-        newth = th + newthdot * dt
-        newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
-
-        self.state = np.array([newth, newthdot])
-        return self._get_obs(), -costs, False, {}
-
-    def reset(self):
-        high = np.array([np.pi, 1])
-        self.state = self.np_random.uniform(low=-high, high=high)
-        self.last_u = None
-        return self._get_obs()
-
-    def _get_obs(self):
-        theta, thetadot = self.state
-        return [
-            np.array([np.cos(theta), np.sin(theta), thetadot])
-            for _ in range(2)
-        ]
-
-    def angle_normalize(self, x):
-        return (((x + np.pi) % (2 * np.pi)) - np.pi)
--- a/python/ray/rllib/models/catalog.py
+++ b/python/ray/rllib/models/catalog.py
@ -17,7 +17,6 @@ from ray.rllib.models.preprocessors import get_preprocessor
 from ray.rllib.models.fcnet import FullyConnectedNetwork
 from ray.rllib.models.visionnet import VisionNetwork
 from ray.rllib.models.lstm import LSTM
-from ray.rllib.models.multiagentfcnet import MultiAgentFullyConnectedNetwork

 MODEL_CONFIGS = [
    # === Built-in options ===
@ -178,13 +177,6 @@ class ModelCatalog(object):

        obs_rank = len(inputs.shape) - 1

-        # num_outputs > 1 used to avoid hitting this with the value function
-        if isinstance(
-                options.get("custom_options", {}).get(
-                    "multiagent_fcnet_hiddens", 1), list) and num_outputs > 1:
-            return MultiAgentFullyConnectedNetwork(inputs, num_outputs,
-                                                   options)
-
        if obs_rank > 1:
            return VisionNetwork(inputs, num_outputs, options)

--- a/python/ray/rllib/models/multiagentfcnet.py
+++ b/python/ray/rllib/models/multiagentfcnet.py
@ -1,43 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-from ray.rllib.models.model import Model
-from ray.rllib.models.fcnet import FullyConnectedNetwork
-from ray.rllib.utils.reshaper import Reshaper
-
-
-class MultiAgentFullyConnectedNetwork(Model):
-    """Multiagent fully connected network."""
-
-    def _build_layers(self, inputs, num_outputs, options):
-        # Split the input and output tensors
-        input_shapes = options["custom_options"]["multiagent_obs_shapes"]
-        output_shapes = options["custom_options"]["multiagent_act_shapes"]
-        input_reshaper = Reshaper(input_shapes)
-        output_reshaper = Reshaper(output_shapes)
-        split_inputs = input_reshaper.split_tensor(inputs)
-        num_actions = output_reshaper.split_number(num_outputs)
-
-        custom_options = options["custom_options"]
-        hiddens = custom_options.get("multiagent_fcnet_hiddens",
-                                     [[256, 256]] * 1)
-
-        # check for a shared model
-        shared_model = custom_options.get("multiagent_shared_model", 0)
-        reuse = tf.AUTO_REUSE if shared_model else False
-        outputs = []
-        for i in range(len(hiddens)):
-            scope = "multi" if shared_model else "multi{}".format(i)
-            with tf.variable_scope(scope, reuse=reuse):
-                sub_options = options.copy()
-                sub_options.update({"fcnet_hiddens": hiddens[i]})
-                # TODO(ev) make this support arbitrary networks
-                fcnet = FullyConnectedNetwork(split_inputs[i],
-                                              int(num_actions[i]), sub_options)
-                output = fcnet.outputs
-                outputs.append(output)
-        overall_output = tf.concat(outputs, axis=1)
-        return overall_output, outputs
--- a/python/ray/rllib/utils/reshaper.py
+++ b/python/ray/rllib/utils/reshaper.py
@ -1,49 +0,0 @@
-import numpy as np
-import tensorflow as tf
-
-
-class Reshaper(object):
-    """
-    This class keeps track of where in the flattened observation space
-    we should be slicing and what the new shapes should be
-    """
-
-    def __init__(self, env_space):
-        self.shapes = []
-        self.slice_positions = []
-        self.env_space = env_space
-        if isinstance(env_space, list):
-            for space in env_space:
-                # Handle both gym arrays and just lists of inputs length
-                if hasattr(space, "n"):
-                    arr_shape = np.asarray([1])  # discrete space
-                elif hasattr(space, "shape"):
-                    arr_shape = np.asarray(space.shape)
-                else:
-                    arr_shape = space
-                self.shapes.append(arr_shape)
-                if len(self.slice_positions) == 0:
-                    self.slice_positions.append(np.product(arr_shape))
-                else:
-                    self.slice_positions.append(
-                        np.product(arr_shape) + self.slice_positions[-1])
-        else:
-            self.shapes.append(np.asarray(env_space.shape))
-            self.slice_positions.append(np.product(env_space.shape))
-
-    def get_slice_lengths(self):
-        diffed_list = np.diff(self.slice_positions).tolist()
-        diffed_list.insert(0, self.slice_positions[0])
-        return np.asarray(diffed_list).astype(int)
-
-    def split_tensor(self, tensor, axis=-1):
-        # FIXME (ev) This won't work for mixed action distributions like
-        # one agent Gaussian one agent discrete
-        slice_rescale = int(tensor.shape.as_list()[axis] / int(
-            np.sum(self.get_slice_lengths())))
-        return tf.split(
-            tensor, slice_rescale * self.get_slice_lengths(), axis=axis)
-
-    def split_number(self, number):
-        slice_rescale = int(number / int(np.sum(self.get_slice_lengths())))
-        return slice_rescale * self.get_slice_lengths()