[rllib] Remove legacy multiagent support (#2975)

* remove legacy

* remove reshaper
This commit is contained in:
Eric Liang 2018-10-01 13:07:11 -07:00 committed by GitHub
parent fcef4edd46
commit 2019b4122b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 0 additions and 344 deletions

View file

@ -1,59 +0,0 @@
""" Multiagent mountain car. Each agent outputs an action which
is summed to form the total action. This is a discrete
multiagent example
"""
import gym
from gym.envs.registration import register
import ray
import ray.rllib.agents.ppo as ppo
from ray.tune.registry import register_env
env_name = "MultiAgentMountainCarEnv"
env_version_num = 0
env_name = env_name + '-v' + str(env_version_num)
def pass_params_to_gym(env_name):
global env_version_num
register(
id=env_name,
entry_point=(
"ray.rllib.examples.legacy_multiagent.multiagent_mountaincar_env:"
"MultiAgentMountainCarEnv"),
max_episode_steps=200,
kwargs={})
def create_env(env_config):
pass_params_to_gym(env_name)
env = gym.envs.make(env_name)
return env
if __name__ == '__main__':
register_env(env_name, lambda env_config: create_env(env_config))
config = ppo.DEFAULT_CONFIG.copy()
horizon = 10
num_cpus = 4
ray.init(num_cpus=num_cpus, redirect_output=True)
config["num_workers"] = num_cpus
config["train_batch_size"] = 1000
config["num_sgd_iter"] = 10
config["gamma"] = 0.999
config["horizon"] = horizon
config["use_gae"] = False
config["model"].update({"fcnet_hiddens": [256, 256]})
options = {
"multiagent_obs_shapes": [2, 2],
"multiagent_act_shapes": [1, 1],
"multiagent_shared_model": False,
"multiagent_fcnet_hiddens": [[32, 32]] * 2
}
config["model"].update({"custom_options": options})
alg = ppo.PPOAgent(env=env_name, config=config)
for i in range(1):
alg.train()

View file

@ -1,51 +0,0 @@
from math import cos
from gym.spaces import Box, Tuple, Discrete
import numpy as np
from gym.envs.classic_control.mountain_car import MountainCarEnv
"""
Multiagent mountain car that sums and then
averages its actions to produce the velocity
"""
class MultiAgentMountainCarEnv(MountainCarEnv):
def __init__(self):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.5
self.low = np.array([self.min_position, -self.max_speed])
self.high = np.array([self.max_position, self.max_speed])
self.viewer = None
self.action_space = [Discrete(3) for _ in range(2)]
self.observation_space = Tuple(
[Box(self.low, self.high, dtype=np.float32) for _ in range(2)])
self.seed()
self.reset()
def step(self, action):
summed_act = 0.5 * np.sum(action)
position, velocity = self.state
velocity += (summed_act - 1) * 0.001
velocity += cos(3 * position) * (-0.0025)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
if (position == self.min_position and velocity < 0):
velocity = 0
done = bool(position >= self.goal_position)
reward = position
self.state = (position, velocity)
return [np.array(self.state) for _ in range(2)], reward, done, {}
def reset(self):
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
return [np.array(self.state) for _ in range(2)]

View file

@ -1,60 +0,0 @@
""" Run script for multiagent pendulum env. Each agent outputs a
torque which is summed to form the total torque. This is a
continuous multiagent example
"""
import gym
from gym.envs.registration import register
import ray
import ray.rllib.agents.ppo as ppo
from ray.tune.registry import register_env
env_name = "MultiAgentPendulumEnv"
env_version_num = 0
env_name = env_name + '-v' + str(env_version_num)
def pass_params_to_gym(env_name):
global env_version_num
register(
id=env_name,
entry_point=(
"ray.rllib.examples.legacy_multiagent.multiagent_pendulum_env:"
"MultiAgentPendulumEnv"),
max_episode_steps=100,
kwargs={})
def create_env(env_config):
pass_params_to_gym(env_name)
env = gym.envs.make(env_name)
return env
if __name__ == '__main__':
register_env(env_name, lambda env_config: create_env(env_config))
config = ppo.DEFAULT_CONFIG.copy()
horizon = 10
num_cpus = 4
ray.init(num_cpus=num_cpus, redirect_output=True)
config["num_workers"] = num_cpus
config["train_batch_size"] = 1000
config["sgd_minibatch_size"] = 10
config["num_sgd_iter"] = 10
config["gamma"] = 0.999
config["horizon"] = horizon
config["use_gae"] = True
config["model"].update({"fcnet_hiddens": [256, 256]})
options = {
"multiagent_obs_shapes": [3, 3],
"multiagent_act_shapes": [1, 1],
"multiagent_shared_model": True,
"multiagent_fcnet_hiddens": [[32, 32]] * 2
}
config["model"].update({"custom_options": options})
alg = ppo.PPOAgent(env=env_name, config=config)
for i in range(1):
alg.train()

View file

@ -1,74 +0,0 @@
from gym.spaces import Box, Tuple
from gym.utils import seeding
from gym.envs.classic_control.pendulum import PendulumEnv
import numpy as np
"""
Multiagent pendulum that sums its torques to generate an action
"""
class MultiAgentPendulumEnv(PendulumEnv):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
def __init__(self):
self.max_speed = 8
self.max_torque = 2.
self.dt = .05
self.viewer = None
high = np.array([1., 1., self.max_speed])
self.action_space = [
Box(low=-self.max_torque / 2,
high=self.max_torque / 2,
shape=(1, ),
dtype=np.float32) for _ in range(2)
]
self.observation_space = Tuple(
[Box(low=-high, high=high, dtype=np.float32) for _ in range(2)])
self.seed()
def seed(self, seed=None):
self.np_random, seed = seeding.np_random(seed)
return [seed]
def step(self, u):
th, thdot = self.state # th := theta
summed_u = np.sum(u)
g = 10.
m = 1.
length = 1.
dt = self.dt
summed_u = np.clip(summed_u, -self.max_torque, self.max_torque)
self.last_u = summed_u # for rendering
costs = self.angle_normalize(th) ** 2 + .1 * thdot ** 2 + \
.001 * (summed_u ** 2)
newthdot = thdot + (-3 * g / (2 * length) * np.sin(th + np.pi) + 3. /
(m * length**2) * summed_u) * dt
newth = th + newthdot * dt
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
self.state = np.array([newth, newthdot])
return self._get_obs(), -costs, False, {}
def reset(self):
high = np.array([np.pi, 1])
self.state = self.np_random.uniform(low=-high, high=high)
self.last_u = None
return self._get_obs()
def _get_obs(self):
theta, thetadot = self.state
return [
np.array([np.cos(theta), np.sin(theta), thetadot])
for _ in range(2)
]
def angle_normalize(self, x):
return (((x + np.pi) % (2 * np.pi)) - np.pi)

View file

@ -17,7 +17,6 @@ from ray.rllib.models.preprocessors import get_preprocessor
from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.models.visionnet import VisionNetwork
from ray.rllib.models.lstm import LSTM
from ray.rllib.models.multiagentfcnet import MultiAgentFullyConnectedNetwork
MODEL_CONFIGS = [
# === Built-in options ===
@ -178,13 +177,6 @@ class ModelCatalog(object):
obs_rank = len(inputs.shape) - 1
# num_outputs > 1 used to avoid hitting this with the value function
if isinstance(
options.get("custom_options", {}).get(
"multiagent_fcnet_hiddens", 1), list) and num_outputs > 1:
return MultiAgentFullyConnectedNetwork(inputs, num_outputs,
options)
if obs_rank > 1:
return VisionNetwork(inputs, num_outputs, options)

View file

@ -1,43 +0,0 @@
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from ray.rllib.models.model import Model
from ray.rllib.models.fcnet import FullyConnectedNetwork
from ray.rllib.utils.reshaper import Reshaper
class MultiAgentFullyConnectedNetwork(Model):
"""Multiagent fully connected network."""
def _build_layers(self, inputs, num_outputs, options):
# Split the input and output tensors
input_shapes = options["custom_options"]["multiagent_obs_shapes"]
output_shapes = options["custom_options"]["multiagent_act_shapes"]
input_reshaper = Reshaper(input_shapes)
output_reshaper = Reshaper(output_shapes)
split_inputs = input_reshaper.split_tensor(inputs)
num_actions = output_reshaper.split_number(num_outputs)
custom_options = options["custom_options"]
hiddens = custom_options.get("multiagent_fcnet_hiddens",
[[256, 256]] * 1)
# check for a shared model
shared_model = custom_options.get("multiagent_shared_model", 0)
reuse = tf.AUTO_REUSE if shared_model else False
outputs = []
for i in range(len(hiddens)):
scope = "multi" if shared_model else "multi{}".format(i)
with tf.variable_scope(scope, reuse=reuse):
sub_options = options.copy()
sub_options.update({"fcnet_hiddens": hiddens[i]})
# TODO(ev) make this support arbitrary networks
fcnet = FullyConnectedNetwork(split_inputs[i],
int(num_actions[i]), sub_options)
output = fcnet.outputs
outputs.append(output)
overall_output = tf.concat(outputs, axis=1)
return overall_output, outputs

View file

@ -1,49 +0,0 @@
import numpy as np
import tensorflow as tf
class Reshaper(object):
"""
This class keeps track of where in the flattened observation space
we should be slicing and what the new shapes should be
"""
def __init__(self, env_space):
self.shapes = []
self.slice_positions = []
self.env_space = env_space
if isinstance(env_space, list):
for space in env_space:
# Handle both gym arrays and just lists of inputs length
if hasattr(space, "n"):
arr_shape = np.asarray([1]) # discrete space
elif hasattr(space, "shape"):
arr_shape = np.asarray(space.shape)
else:
arr_shape = space
self.shapes.append(arr_shape)
if len(self.slice_positions) == 0:
self.slice_positions.append(np.product(arr_shape))
else:
self.slice_positions.append(
np.product(arr_shape) + self.slice_positions[-1])
else:
self.shapes.append(np.asarray(env_space.shape))
self.slice_positions.append(np.product(env_space.shape))
def get_slice_lengths(self):
diffed_list = np.diff(self.slice_positions).tolist()
diffed_list.insert(0, self.slice_positions[0])
return np.asarray(diffed_list).astype(int)
def split_tensor(self, tensor, axis=-1):
# FIXME (ev) This won't work for mixed action distributions like
# one agent Gaussian one agent discrete
slice_rescale = int(tensor.shape.as_list()[axis] / int(
np.sum(self.get_slice_lengths())))
return tf.split(
tensor, slice_rescale * self.get_slice_lengths(), axis=axis)
def split_number(self, number):
slice_rescale = int(number / int(np.sum(self.get_slice_lengths())))
return slice_rescale * self.get_slice_lengths()