mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00
[rllib] Remove legacy multiagent support (#2975)
* remove legacy * remove reshaper
This commit is contained in:
parent
fcef4edd46
commit
2019b4122b
8 changed files with 0 additions and 344 deletions
|
@ -1,59 +0,0 @@
|
|||
""" Multiagent mountain car. Each agent outputs an action which
|
||||
is summed to form the total action. This is a discrete
|
||||
multiagent example
|
||||
"""
|
||||
|
||||
import gym
|
||||
from gym.envs.registration import register
|
||||
|
||||
import ray
|
||||
import ray.rllib.agents.ppo as ppo
|
||||
from ray.tune.registry import register_env
|
||||
|
||||
env_name = "MultiAgentMountainCarEnv"
|
||||
|
||||
env_version_num = 0
|
||||
env_name = env_name + '-v' + str(env_version_num)
|
||||
|
||||
|
||||
def pass_params_to_gym(env_name):
|
||||
global env_version_num
|
||||
|
||||
register(
|
||||
id=env_name,
|
||||
entry_point=(
|
||||
"ray.rllib.examples.legacy_multiagent.multiagent_mountaincar_env:"
|
||||
"MultiAgentMountainCarEnv"),
|
||||
max_episode_steps=200,
|
||||
kwargs={})
|
||||
|
||||
|
||||
def create_env(env_config):
|
||||
pass_params_to_gym(env_name)
|
||||
env = gym.envs.make(env_name)
|
||||
return env
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
register_env(env_name, lambda env_config: create_env(env_config))
|
||||
config = ppo.DEFAULT_CONFIG.copy()
|
||||
horizon = 10
|
||||
num_cpus = 4
|
||||
ray.init(num_cpus=num_cpus, redirect_output=True)
|
||||
config["num_workers"] = num_cpus
|
||||
config["train_batch_size"] = 1000
|
||||
config["num_sgd_iter"] = 10
|
||||
config["gamma"] = 0.999
|
||||
config["horizon"] = horizon
|
||||
config["use_gae"] = False
|
||||
config["model"].update({"fcnet_hiddens": [256, 256]})
|
||||
options = {
|
||||
"multiagent_obs_shapes": [2, 2],
|
||||
"multiagent_act_shapes": [1, 1],
|
||||
"multiagent_shared_model": False,
|
||||
"multiagent_fcnet_hiddens": [[32, 32]] * 2
|
||||
}
|
||||
config["model"].update({"custom_options": options})
|
||||
alg = ppo.PPOAgent(env=env_name, config=config)
|
||||
for i in range(1):
|
||||
alg.train()
|
|
@ -1,51 +0,0 @@
|
|||
from math import cos
|
||||
from gym.spaces import Box, Tuple, Discrete
|
||||
import numpy as np
|
||||
from gym.envs.classic_control.mountain_car import MountainCarEnv
|
||||
"""
|
||||
Multiagent mountain car that sums and then
|
||||
averages its actions to produce the velocity
|
||||
"""
|
||||
|
||||
|
||||
class MultiAgentMountainCarEnv(MountainCarEnv):
|
||||
def __init__(self):
|
||||
self.min_position = -1.2
|
||||
self.max_position = 0.6
|
||||
self.max_speed = 0.07
|
||||
self.goal_position = 0.5
|
||||
|
||||
self.low = np.array([self.min_position, -self.max_speed])
|
||||
self.high = np.array([self.max_position, self.max_speed])
|
||||
|
||||
self.viewer = None
|
||||
|
||||
self.action_space = [Discrete(3) for _ in range(2)]
|
||||
self.observation_space = Tuple(
|
||||
[Box(self.low, self.high, dtype=np.float32) for _ in range(2)])
|
||||
|
||||
self.seed()
|
||||
self.reset()
|
||||
|
||||
def step(self, action):
|
||||
summed_act = 0.5 * np.sum(action)
|
||||
|
||||
position, velocity = self.state
|
||||
velocity += (summed_act - 1) * 0.001
|
||||
velocity += cos(3 * position) * (-0.0025)
|
||||
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
|
||||
position += velocity
|
||||
position = np.clip(position, self.min_position, self.max_position)
|
||||
if (position == self.min_position and velocity < 0):
|
||||
velocity = 0
|
||||
|
||||
done = bool(position >= self.goal_position)
|
||||
|
||||
reward = position
|
||||
|
||||
self.state = (position, velocity)
|
||||
return [np.array(self.state) for _ in range(2)], reward, done, {}
|
||||
|
||||
def reset(self):
|
||||
self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0])
|
||||
return [np.array(self.state) for _ in range(2)]
|
|
@ -1,60 +0,0 @@
|
|||
""" Run script for multiagent pendulum env. Each agent outputs a
|
||||
torque which is summed to form the total torque. This is a
|
||||
continuous multiagent example
|
||||
"""
|
||||
|
||||
import gym
|
||||
from gym.envs.registration import register
|
||||
|
||||
import ray
|
||||
import ray.rllib.agents.ppo as ppo
|
||||
from ray.tune.registry import register_env
|
||||
|
||||
env_name = "MultiAgentPendulumEnv"
|
||||
|
||||
env_version_num = 0
|
||||
env_name = env_name + '-v' + str(env_version_num)
|
||||
|
||||
|
||||
def pass_params_to_gym(env_name):
|
||||
global env_version_num
|
||||
|
||||
register(
|
||||
id=env_name,
|
||||
entry_point=(
|
||||
"ray.rllib.examples.legacy_multiagent.multiagent_pendulum_env:"
|
||||
"MultiAgentPendulumEnv"),
|
||||
max_episode_steps=100,
|
||||
kwargs={})
|
||||
|
||||
|
||||
def create_env(env_config):
|
||||
pass_params_to_gym(env_name)
|
||||
env = gym.envs.make(env_name)
|
||||
return env
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
register_env(env_name, lambda env_config: create_env(env_config))
|
||||
config = ppo.DEFAULT_CONFIG.copy()
|
||||
horizon = 10
|
||||
num_cpus = 4
|
||||
ray.init(num_cpus=num_cpus, redirect_output=True)
|
||||
config["num_workers"] = num_cpus
|
||||
config["train_batch_size"] = 1000
|
||||
config["sgd_minibatch_size"] = 10
|
||||
config["num_sgd_iter"] = 10
|
||||
config["gamma"] = 0.999
|
||||
config["horizon"] = horizon
|
||||
config["use_gae"] = True
|
||||
config["model"].update({"fcnet_hiddens": [256, 256]})
|
||||
options = {
|
||||
"multiagent_obs_shapes": [3, 3],
|
||||
"multiagent_act_shapes": [1, 1],
|
||||
"multiagent_shared_model": True,
|
||||
"multiagent_fcnet_hiddens": [[32, 32]] * 2
|
||||
}
|
||||
config["model"].update({"custom_options": options})
|
||||
alg = ppo.PPOAgent(env=env_name, config=config)
|
||||
for i in range(1):
|
||||
alg.train()
|
|
@ -1,74 +0,0 @@
|
|||
from gym.spaces import Box, Tuple
|
||||
from gym.utils import seeding
|
||||
from gym.envs.classic_control.pendulum import PendulumEnv
|
||||
import numpy as np
|
||||
"""
|
||||
Multiagent pendulum that sums its torques to generate an action
|
||||
"""
|
||||
|
||||
|
||||
class MultiAgentPendulumEnv(PendulumEnv):
|
||||
metadata = {
|
||||
'render.modes': ['human', 'rgb_array'],
|
||||
'video.frames_per_second': 30
|
||||
}
|
||||
|
||||
def __init__(self):
|
||||
self.max_speed = 8
|
||||
self.max_torque = 2.
|
||||
self.dt = .05
|
||||
self.viewer = None
|
||||
|
||||
high = np.array([1., 1., self.max_speed])
|
||||
self.action_space = [
|
||||
Box(low=-self.max_torque / 2,
|
||||
high=self.max_torque / 2,
|
||||
shape=(1, ),
|
||||
dtype=np.float32) for _ in range(2)
|
||||
]
|
||||
self.observation_space = Tuple(
|
||||
[Box(low=-high, high=high, dtype=np.float32) for _ in range(2)])
|
||||
|
||||
self.seed()
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def step(self, u):
|
||||
th, thdot = self.state # th := theta
|
||||
|
||||
summed_u = np.sum(u)
|
||||
g = 10.
|
||||
m = 1.
|
||||
length = 1.
|
||||
dt = self.dt
|
||||
|
||||
summed_u = np.clip(summed_u, -self.max_torque, self.max_torque)
|
||||
self.last_u = summed_u # for rendering
|
||||
costs = self.angle_normalize(th) ** 2 + .1 * thdot ** 2 + \
|
||||
.001 * (summed_u ** 2)
|
||||
|
||||
newthdot = thdot + (-3 * g / (2 * length) * np.sin(th + np.pi) + 3. /
|
||||
(m * length**2) * summed_u) * dt
|
||||
newth = th + newthdot * dt
|
||||
newthdot = np.clip(newthdot, -self.max_speed, self.max_speed)
|
||||
|
||||
self.state = np.array([newth, newthdot])
|
||||
return self._get_obs(), -costs, False, {}
|
||||
|
||||
def reset(self):
|
||||
high = np.array([np.pi, 1])
|
||||
self.state = self.np_random.uniform(low=-high, high=high)
|
||||
self.last_u = None
|
||||
return self._get_obs()
|
||||
|
||||
def _get_obs(self):
|
||||
theta, thetadot = self.state
|
||||
return [
|
||||
np.array([np.cos(theta), np.sin(theta), thetadot])
|
||||
for _ in range(2)
|
||||
]
|
||||
|
||||
def angle_normalize(self, x):
|
||||
return (((x + np.pi) % (2 * np.pi)) - np.pi)
|
|
@ -17,7 +17,6 @@ from ray.rllib.models.preprocessors import get_preprocessor
|
|||
from ray.rllib.models.fcnet import FullyConnectedNetwork
|
||||
from ray.rllib.models.visionnet import VisionNetwork
|
||||
from ray.rllib.models.lstm import LSTM
|
||||
from ray.rllib.models.multiagentfcnet import MultiAgentFullyConnectedNetwork
|
||||
|
||||
MODEL_CONFIGS = [
|
||||
# === Built-in options ===
|
||||
|
@ -178,13 +177,6 @@ class ModelCatalog(object):
|
|||
|
||||
obs_rank = len(inputs.shape) - 1
|
||||
|
||||
# num_outputs > 1 used to avoid hitting this with the value function
|
||||
if isinstance(
|
||||
options.get("custom_options", {}).get(
|
||||
"multiagent_fcnet_hiddens", 1), list) and num_outputs > 1:
|
||||
return MultiAgentFullyConnectedNetwork(inputs, num_outputs,
|
||||
options)
|
||||
|
||||
if obs_rank > 1:
|
||||
return VisionNetwork(inputs, num_outputs, options)
|
||||
|
||||
|
|
|
@ -1,43 +0,0 @@
|
|||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from ray.rllib.models.model import Model
|
||||
from ray.rllib.models.fcnet import FullyConnectedNetwork
|
||||
from ray.rllib.utils.reshaper import Reshaper
|
||||
|
||||
|
||||
class MultiAgentFullyConnectedNetwork(Model):
|
||||
"""Multiagent fully connected network."""
|
||||
|
||||
def _build_layers(self, inputs, num_outputs, options):
|
||||
# Split the input and output tensors
|
||||
input_shapes = options["custom_options"]["multiagent_obs_shapes"]
|
||||
output_shapes = options["custom_options"]["multiagent_act_shapes"]
|
||||
input_reshaper = Reshaper(input_shapes)
|
||||
output_reshaper = Reshaper(output_shapes)
|
||||
split_inputs = input_reshaper.split_tensor(inputs)
|
||||
num_actions = output_reshaper.split_number(num_outputs)
|
||||
|
||||
custom_options = options["custom_options"]
|
||||
hiddens = custom_options.get("multiagent_fcnet_hiddens",
|
||||
[[256, 256]] * 1)
|
||||
|
||||
# check for a shared model
|
||||
shared_model = custom_options.get("multiagent_shared_model", 0)
|
||||
reuse = tf.AUTO_REUSE if shared_model else False
|
||||
outputs = []
|
||||
for i in range(len(hiddens)):
|
||||
scope = "multi" if shared_model else "multi{}".format(i)
|
||||
with tf.variable_scope(scope, reuse=reuse):
|
||||
sub_options = options.copy()
|
||||
sub_options.update({"fcnet_hiddens": hiddens[i]})
|
||||
# TODO(ev) make this support arbitrary networks
|
||||
fcnet = FullyConnectedNetwork(split_inputs[i],
|
||||
int(num_actions[i]), sub_options)
|
||||
output = fcnet.outputs
|
||||
outputs.append(output)
|
||||
overall_output = tf.concat(outputs, axis=1)
|
||||
return overall_output, outputs
|
|
@ -1,49 +0,0 @@
|
|||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class Reshaper(object):
|
||||
"""
|
||||
This class keeps track of where in the flattened observation space
|
||||
we should be slicing and what the new shapes should be
|
||||
"""
|
||||
|
||||
def __init__(self, env_space):
|
||||
self.shapes = []
|
||||
self.slice_positions = []
|
||||
self.env_space = env_space
|
||||
if isinstance(env_space, list):
|
||||
for space in env_space:
|
||||
# Handle both gym arrays and just lists of inputs length
|
||||
if hasattr(space, "n"):
|
||||
arr_shape = np.asarray([1]) # discrete space
|
||||
elif hasattr(space, "shape"):
|
||||
arr_shape = np.asarray(space.shape)
|
||||
else:
|
||||
arr_shape = space
|
||||
self.shapes.append(arr_shape)
|
||||
if len(self.slice_positions) == 0:
|
||||
self.slice_positions.append(np.product(arr_shape))
|
||||
else:
|
||||
self.slice_positions.append(
|
||||
np.product(arr_shape) + self.slice_positions[-1])
|
||||
else:
|
||||
self.shapes.append(np.asarray(env_space.shape))
|
||||
self.slice_positions.append(np.product(env_space.shape))
|
||||
|
||||
def get_slice_lengths(self):
|
||||
diffed_list = np.diff(self.slice_positions).tolist()
|
||||
diffed_list.insert(0, self.slice_positions[0])
|
||||
return np.asarray(diffed_list).astype(int)
|
||||
|
||||
def split_tensor(self, tensor, axis=-1):
|
||||
# FIXME (ev) This won't work for mixed action distributions like
|
||||
# one agent Gaussian one agent discrete
|
||||
slice_rescale = int(tensor.shape.as_list()[axis] / int(
|
||||
np.sum(self.get_slice_lengths())))
|
||||
return tf.split(
|
||||
tensor, slice_rescale * self.get_slice_lengths(), axis=axis)
|
||||
|
||||
def split_number(self, number):
|
||||
slice_rescale = int(number / int(np.sum(self.get_slice_lengths())))
|
||||
return slice_rescale * self.get_slice_lengths()
|
Loading…
Add table
Reference in a new issue