diff --git a/python/ray/rllib/examples/legacy_multiagent/__init__.py b/python/ray/rllib/examples/legacy_multiagent/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py b/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py deleted file mode 100644 index 955964829..000000000 --- a/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar.py +++ /dev/null @@ -1,59 +0,0 @@ -""" Multiagent mountain car. Each agent outputs an action which -is summed to form the total action. This is a discrete -multiagent example -""" - -import gym -from gym.envs.registration import register - -import ray -import ray.rllib.agents.ppo as ppo -from ray.tune.registry import register_env - -env_name = "MultiAgentMountainCarEnv" - -env_version_num = 0 -env_name = env_name + '-v' + str(env_version_num) - - -def pass_params_to_gym(env_name): - global env_version_num - - register( - id=env_name, - entry_point=( - "ray.rllib.examples.legacy_multiagent.multiagent_mountaincar_env:" - "MultiAgentMountainCarEnv"), - max_episode_steps=200, - kwargs={}) - - -def create_env(env_config): - pass_params_to_gym(env_name) - env = gym.envs.make(env_name) - return env - - -if __name__ == '__main__': - register_env(env_name, lambda env_config: create_env(env_config)) - config = ppo.DEFAULT_CONFIG.copy() - horizon = 10 - num_cpus = 4 - ray.init(num_cpus=num_cpus, redirect_output=True) - config["num_workers"] = num_cpus - config["train_batch_size"] = 1000 - config["num_sgd_iter"] = 10 - config["gamma"] = 0.999 - config["horizon"] = horizon - config["use_gae"] = False - config["model"].update({"fcnet_hiddens": [256, 256]}) - options = { - "multiagent_obs_shapes": [2, 2], - "multiagent_act_shapes": [1, 1], - "multiagent_shared_model": False, - "multiagent_fcnet_hiddens": [[32, 32]] * 2 - } - config["model"].update({"custom_options": options}) - alg = ppo.PPOAgent(env=env_name, config=config) - for i in range(1): - alg.train() diff --git a/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar_env.py b/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar_env.py deleted file mode 100644 index c120f00c9..000000000 --- a/python/ray/rllib/examples/legacy_multiagent/multiagent_mountaincar_env.py +++ /dev/null @@ -1,51 +0,0 @@ -from math import cos -from gym.spaces import Box, Tuple, Discrete -import numpy as np -from gym.envs.classic_control.mountain_car import MountainCarEnv -""" -Multiagent mountain car that sums and then -averages its actions to produce the velocity -""" - - -class MultiAgentMountainCarEnv(MountainCarEnv): - def __init__(self): - self.min_position = -1.2 - self.max_position = 0.6 - self.max_speed = 0.07 - self.goal_position = 0.5 - - self.low = np.array([self.min_position, -self.max_speed]) - self.high = np.array([self.max_position, self.max_speed]) - - self.viewer = None - - self.action_space = [Discrete(3) for _ in range(2)] - self.observation_space = Tuple( - [Box(self.low, self.high, dtype=np.float32) for _ in range(2)]) - - self.seed() - self.reset() - - def step(self, action): - summed_act = 0.5 * np.sum(action) - - position, velocity = self.state - velocity += (summed_act - 1) * 0.001 - velocity += cos(3 * position) * (-0.0025) - velocity = np.clip(velocity, -self.max_speed, self.max_speed) - position += velocity - position = np.clip(position, self.min_position, self.max_position) - if (position == self.min_position and velocity < 0): - velocity = 0 - - done = bool(position >= self.goal_position) - - reward = position - - self.state = (position, velocity) - return [np.array(self.state) for _ in range(2)], reward, done, {} - - def reset(self): - self.state = np.array([self.np_random.uniform(low=-0.6, high=-0.4), 0]) - return [np.array(self.state) for _ in range(2)] diff --git a/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py b/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py deleted file mode 100644 index b183ff2c0..000000000 --- a/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum.py +++ /dev/null @@ -1,60 +0,0 @@ -""" Run script for multiagent pendulum env. Each agent outputs a -torque which is summed to form the total torque. This is a -continuous multiagent example -""" - -import gym -from gym.envs.registration import register - -import ray -import ray.rllib.agents.ppo as ppo -from ray.tune.registry import register_env - -env_name = "MultiAgentPendulumEnv" - -env_version_num = 0 -env_name = env_name + '-v' + str(env_version_num) - - -def pass_params_to_gym(env_name): - global env_version_num - - register( - id=env_name, - entry_point=( - "ray.rllib.examples.legacy_multiagent.multiagent_pendulum_env:" - "MultiAgentPendulumEnv"), - max_episode_steps=100, - kwargs={}) - - -def create_env(env_config): - pass_params_to_gym(env_name) - env = gym.envs.make(env_name) - return env - - -if __name__ == '__main__': - register_env(env_name, lambda env_config: create_env(env_config)) - config = ppo.DEFAULT_CONFIG.copy() - horizon = 10 - num_cpus = 4 - ray.init(num_cpus=num_cpus, redirect_output=True) - config["num_workers"] = num_cpus - config["train_batch_size"] = 1000 - config["sgd_minibatch_size"] = 10 - config["num_sgd_iter"] = 10 - config["gamma"] = 0.999 - config["horizon"] = horizon - config["use_gae"] = True - config["model"].update({"fcnet_hiddens": [256, 256]}) - options = { - "multiagent_obs_shapes": [3, 3], - "multiagent_act_shapes": [1, 1], - "multiagent_shared_model": True, - "multiagent_fcnet_hiddens": [[32, 32]] * 2 - } - config["model"].update({"custom_options": options}) - alg = ppo.PPOAgent(env=env_name, config=config) - for i in range(1): - alg.train() diff --git a/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum_env.py b/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum_env.py deleted file mode 100644 index 026458327..000000000 --- a/python/ray/rllib/examples/legacy_multiagent/multiagent_pendulum_env.py +++ /dev/null @@ -1,74 +0,0 @@ -from gym.spaces import Box, Tuple -from gym.utils import seeding -from gym.envs.classic_control.pendulum import PendulumEnv -import numpy as np -""" - Multiagent pendulum that sums its torques to generate an action -""" - - -class MultiAgentPendulumEnv(PendulumEnv): - metadata = { - 'render.modes': ['human', 'rgb_array'], - 'video.frames_per_second': 30 - } - - def __init__(self): - self.max_speed = 8 - self.max_torque = 2. - self.dt = .05 - self.viewer = None - - high = np.array([1., 1., self.max_speed]) - self.action_space = [ - Box(low=-self.max_torque / 2, - high=self.max_torque / 2, - shape=(1, ), - dtype=np.float32) for _ in range(2) - ] - self.observation_space = Tuple( - [Box(low=-high, high=high, dtype=np.float32) for _ in range(2)]) - - self.seed() - - def seed(self, seed=None): - self.np_random, seed = seeding.np_random(seed) - return [seed] - - def step(self, u): - th, thdot = self.state # th := theta - - summed_u = np.sum(u) - g = 10. - m = 1. - length = 1. - dt = self.dt - - summed_u = np.clip(summed_u, -self.max_torque, self.max_torque) - self.last_u = summed_u # for rendering - costs = self.angle_normalize(th) ** 2 + .1 * thdot ** 2 + \ - .001 * (summed_u ** 2) - - newthdot = thdot + (-3 * g / (2 * length) * np.sin(th + np.pi) + 3. / - (m * length**2) * summed_u) * dt - newth = th + newthdot * dt - newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) - - self.state = np.array([newth, newthdot]) - return self._get_obs(), -costs, False, {} - - def reset(self): - high = np.array([np.pi, 1]) - self.state = self.np_random.uniform(low=-high, high=high) - self.last_u = None - return self._get_obs() - - def _get_obs(self): - theta, thetadot = self.state - return [ - np.array([np.cos(theta), np.sin(theta), thetadot]) - for _ in range(2) - ] - - def angle_normalize(self, x): - return (((x + np.pi) % (2 * np.pi)) - np.pi) diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index 9a889058c..370429c43 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -17,7 +17,6 @@ from ray.rllib.models.preprocessors import get_preprocessor from ray.rllib.models.fcnet import FullyConnectedNetwork from ray.rllib.models.visionnet import VisionNetwork from ray.rllib.models.lstm import LSTM -from ray.rllib.models.multiagentfcnet import MultiAgentFullyConnectedNetwork MODEL_CONFIGS = [ # === Built-in options === @@ -178,13 +177,6 @@ class ModelCatalog(object): obs_rank = len(inputs.shape) - 1 - # num_outputs > 1 used to avoid hitting this with the value function - if isinstance( - options.get("custom_options", {}).get( - "multiagent_fcnet_hiddens", 1), list) and num_outputs > 1: - return MultiAgentFullyConnectedNetwork(inputs, num_outputs, - options) - if obs_rank > 1: return VisionNetwork(inputs, num_outputs, options) diff --git a/python/ray/rllib/models/multiagentfcnet.py b/python/ray/rllib/models/multiagentfcnet.py deleted file mode 100644 index dad7f2983..000000000 --- a/python/ray/rllib/models/multiagentfcnet.py +++ /dev/null @@ -1,43 +0,0 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from ray.rllib.models.model import Model -from ray.rllib.models.fcnet import FullyConnectedNetwork -from ray.rllib.utils.reshaper import Reshaper - - -class MultiAgentFullyConnectedNetwork(Model): - """Multiagent fully connected network.""" - - def _build_layers(self, inputs, num_outputs, options): - # Split the input and output tensors - input_shapes = options["custom_options"]["multiagent_obs_shapes"] - output_shapes = options["custom_options"]["multiagent_act_shapes"] - input_reshaper = Reshaper(input_shapes) - output_reshaper = Reshaper(output_shapes) - split_inputs = input_reshaper.split_tensor(inputs) - num_actions = output_reshaper.split_number(num_outputs) - - custom_options = options["custom_options"] - hiddens = custom_options.get("multiagent_fcnet_hiddens", - [[256, 256]] * 1) - - # check for a shared model - shared_model = custom_options.get("multiagent_shared_model", 0) - reuse = tf.AUTO_REUSE if shared_model else False - outputs = [] - for i in range(len(hiddens)): - scope = "multi" if shared_model else "multi{}".format(i) - with tf.variable_scope(scope, reuse=reuse): - sub_options = options.copy() - sub_options.update({"fcnet_hiddens": hiddens[i]}) - # TODO(ev) make this support arbitrary networks - fcnet = FullyConnectedNetwork(split_inputs[i], - int(num_actions[i]), sub_options) - output = fcnet.outputs - outputs.append(output) - overall_output = tf.concat(outputs, axis=1) - return overall_output, outputs diff --git a/python/ray/rllib/utils/reshaper.py b/python/ray/rllib/utils/reshaper.py deleted file mode 100644 index e9c165212..000000000 --- a/python/ray/rllib/utils/reshaper.py +++ /dev/null @@ -1,49 +0,0 @@ -import numpy as np -import tensorflow as tf - - -class Reshaper(object): - """ - This class keeps track of where in the flattened observation space - we should be slicing and what the new shapes should be - """ - - def __init__(self, env_space): - self.shapes = [] - self.slice_positions = [] - self.env_space = env_space - if isinstance(env_space, list): - for space in env_space: - # Handle both gym arrays and just lists of inputs length - if hasattr(space, "n"): - arr_shape = np.asarray([1]) # discrete space - elif hasattr(space, "shape"): - arr_shape = np.asarray(space.shape) - else: - arr_shape = space - self.shapes.append(arr_shape) - if len(self.slice_positions) == 0: - self.slice_positions.append(np.product(arr_shape)) - else: - self.slice_positions.append( - np.product(arr_shape) + self.slice_positions[-1]) - else: - self.shapes.append(np.asarray(env_space.shape)) - self.slice_positions.append(np.product(env_space.shape)) - - def get_slice_lengths(self): - diffed_list = np.diff(self.slice_positions).tolist() - diffed_list.insert(0, self.slice_positions[0]) - return np.asarray(diffed_list).astype(int) - - def split_tensor(self, tensor, axis=-1): - # FIXME (ev) This won't work for mixed action distributions like - # one agent Gaussian one agent discrete - slice_rescale = int(tensor.shape.as_list()[axis] / int( - np.sum(self.get_slice_lengths()))) - return tf.split( - tensor, slice_rescale * self.get_slice_lengths(), axis=axis) - - def split_number(self, number): - slice_rescale = int(number / int(np.sum(self.get_slice_lengths()))) - return slice_rescale * self.get_slice_lengths()