From b45bed4bce94725bd4fc11c224e555c5fde7e1f4 Mon Sep 17 00:00:00 2001 From: Eric Liang Date: Mon, 1 Oct 2018 12:49:39 -0700 Subject: [PATCH] [rllib] Propagate model options correctly in ARS / ES, to action dist of PPO (#2974) * fix * fix * fix it * propagate conf to action dist * move carla example too * rr * Update policies.py * wip * lint --- doc/source/example-a3c.rst | 2 +- doc/source/example-policy-gradient.rst | 2 +- doc/source/index.rst | 2 +- doc/source/rllib-env.rst | 2 +- doc/source/rllib-models.rst | 2 +- doc/source/rllib.rst | 2 +- examples/carla/scenarios.py | 119 ------------ examples/custom_env/README | 1 - python/ray/rllib/agents/ars/ars.py | 65 ++----- python/ray/rllib/agents/ars/policies.py | 43 +---- python/ray/rllib/agents/ars/utils.py | 21 --- python/ray/rllib/agents/es/es.py | 12 +- python/ray/rllib/agents/es/policies.py | 6 +- python/ray/rllib/agents/pg/pg_policy_graph.py | 4 +- .../ray/rllib/agents/ppo/ppo_policy_graph.py | 5 +- .../ray/rllib/examples}/carla/README | 0 .../rllib/examples}/carla/a3c_lane_keep.py | 1 - .../rllib/examples}/carla/dqn_lane_keep.py | 4 - .../ray/rllib/examples}/carla/env.py | 174 +++++++++--------- .../ray/rllib/examples}/carla/models.py | 29 ++- .../rllib/examples}/carla/ppo_lane_keep.py | 5 +- python/ray/rllib/examples/carla/scenarios.py | 131 +++++++++++++ .../ray/rllib/examples}/carla/train_a3c.py | 1 - .../ray/rllib/examples}/carla/train_dqn.py | 19 +- .../ray/rllib/examples}/carla/train_ppo.py | 17 +- .../ray/rllib/examples}/custom_env.py | 4 +- python/ray/rllib/models/catalog.py | 17 +- python/ray/rllib/test/test_catalog.py | 5 +- .../ray/rllib/tuned_examples/swimmer-ars.yaml | 4 +- 29 files changed, 322 insertions(+), 377 deletions(-) delete mode 100644 examples/carla/scenarios.py delete mode 100644 examples/custom_env/README rename {examples => python/ray/rllib/examples}/carla/README (100%) rename {examples => python/ray/rllib/examples}/carla/a3c_lane_keep.py (96%) rename {examples => python/ray/rllib/examples}/carla/dqn_lane_keep.py (90%) rename {examples => python/ray/rllib/examples}/carla/env.py (83%) rename {examples => python/ray/rllib/examples}/carla/models.py (83%) rename {examples => python/ray/rllib/examples}/carla/ppo_lane_keep.py (93%) create mode 100644 python/ray/rllib/examples/carla/scenarios.py rename {examples => python/ray/rllib/examples}/carla/train_a3c.py (96%) rename {examples => python/ray/rllib/examples}/carla/train_dqn.py (81%) rename {examples => python/ray/rllib/examples}/carla/train_ppo.py (80%) rename {examples/custom_env => python/ray/rllib/examples}/custom_env.py (93%) diff --git a/doc/source/example-a3c.rst b/doc/source/example-a3c.rst index 665d49a36..4a62ec61a 100644 --- a/doc/source/example-a3c.rst +++ b/doc/source/example-a3c.rst @@ -13,7 +13,7 @@ View the `code for this example`_. .. note:: - For an overview of Ray's reinforcement learning library, see `Ray RLlib `__. + For an overview of Ray's reinforcement learning library, see `RLlib `__. To run the application, first install **ray** and then some dependencies: diff --git a/doc/source/example-policy-gradient.rst b/doc/source/example-policy-gradient.rst index 806764560..cabadfd37 100644 --- a/doc/source/example-policy-gradient.rst +++ b/doc/source/example-policy-gradient.rst @@ -6,7 +6,7 @@ View the `code for this example`_. .. note:: - For an overview of Ray's reinforcement learning library, see `Ray RLlib `__. + For an overview of Ray's reinforcement learning library, see `RLlib `__. To run this example, you will need to install `TensorFlow with GPU support`_ (at diff --git a/doc/source/index.rst b/doc/source/index.rst index b71987108..d951066e8 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -77,7 +77,7 @@ Ray comes with libraries that accelerate deep learning and reinforcement learnin .. toctree:: :maxdepth: 1 - :caption: Ray RLlib + :caption: RLlib rllib.rst rllib-training.rst diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst index 6de076785..c95def692 100644 --- a/doc/source/rllib-env.rst +++ b/doc/source/rllib-env.rst @@ -50,7 +50,7 @@ In the above example, note that the ``env_creator`` function takes in an ``env_c OpenAI Gym ---------- -RLlib uses Gym as its environment interface for single-agent training. For more information on how to implement a custom Gym environment, see the `gym.Env class definition `__. You may also find the `SimpleCorridor `__ and `Carla simulator `__ example env implementations useful as a reference. +RLlib uses Gym as its environment interface for single-agent training. For more information on how to implement a custom Gym environment, see the `gym.Env class definition `__. You may also find the `SimpleCorridor `__ and `Carla simulator `__ example env implementations useful as a reference. Performance ~~~~~~~~~~~ diff --git a/doc/source/rllib-models.rst b/doc/source/rllib-models.rst index c279855ac..5b3f88cf0 100644 --- a/doc/source/rllib-models.rst +++ b/doc/source/rllib-models.rst @@ -46,7 +46,7 @@ Custom models should subclass the common RLlib `model class `__ and associated `training scripts `__. The ``CarlaModel`` class defined there operates over a composite (Tuple) observation space including both images and scalar measurements. +For a full example of a custom model in code, see the `Carla RLlib model `__ and associated `training scripts `__. The ``CarlaModel`` class defined there operates over a composite (Tuple) observation space including both images and scalar measurements. Custom Preprocessors -------------------- diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index ea5bbbf58..ba011d08c 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -10,7 +10,7 @@ Learn more about RLlib's design by reading the `ICML paper `__ or `TensorFlow `__. Then, install the Ray RLlib module: +RLlib has extra dependencies on top of ``ray``. First, you'll need to install either `PyTorch `__ or `TensorFlow `__. Then, install the RLlib module: .. code-block:: bash diff --git a/examples/carla/scenarios.py b/examples/carla/scenarios.py deleted file mode 100644 index e6494af18..000000000 --- a/examples/carla/scenarios.py +++ /dev/null @@ -1,119 +0,0 @@ -"""Collection of Carla scenarios, including those from the CoRL 2017 paper.""" - - -TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13] -TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14] - - -def build_scenario( - city, start, end, vehicles, pedestrians, max_steps, weathers): - return { - "city": city, - "num_vehicles": vehicles, - "num_pedestrians": pedestrians, - "weather_distribution": weathers, - "start_pos_id": start, - "end_pos_id": end, - "max_steps": max_steps, - } - - -# Simple scenario for Town02 that involves driving down a road -DEFAULT_SCENARIO = build_scenario( - city="Town02", start=36, end=40, vehicles=20, pedestrians=40, - max_steps=200, weathers=[0]) - -# Simple scenario for Town02 that involves driving down a road -LANE_KEEP = build_scenario( - city="Town02", start=36, end=40, vehicles=0, pedestrians=0, - max_steps=2000, weathers=[0]) - -# Scenarios from the CoRL2017 paper -POSES_TOWN1_STRAIGHT = [ - [36, 40], [39, 35], [110, 114], [7, 3], [0, 4], - [68, 50], [61, 59], [47, 64], [147, 90], [33, 87], - [26, 19], [80, 76], [45, 49], [55, 44], [29, 107], - [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], - [20, 107], [78, 70], [95, 102], [68, 44], [45, 69]] - - -POSES_TOWN1_ONE_CURVE = [ - [138, 17], [47, 16], [26, 9], [42, 49], [140, 124], - [85, 98], [65, 133], [137, 51], [76, 66], [46, 39], - [40, 60], [0, 29], [4, 129], [121, 140], [2, 129], - [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], - [84, 69], [47, 79], [110, 15], [130, 17], [0, 17]] - -POSES_TOWN1_NAV = [ - [105, 29], [27, 130], [102, 87], [132, 27], [24, 44], - [96, 26], [34, 67], [28, 1], [140, 134], [105, 9], - [148, 129], [65, 18], [21, 16], [147, 97], [42, 51], - [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], - [111, 64], [79, 45], [84, 69], [73, 31], [37, 81]] - - -POSES_TOWN2_STRAIGHT = [ - [38, 34], [4, 2], [12, 10], [62, 55], [43, 47], - [64, 66], [78, 76], [59, 57], [61, 18], [35, 39], - [12, 8], [0, 18], [75, 68], [54, 60], [45, 49], - [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], - [54, 63], [51, 42], [16, 19], [17, 26], [77, 68]] - -POSES_TOWN2_ONE_CURVE = [ - [37, 76], [8, 24], [60, 69], [38, 10], [21, 1], - [58, 71], [74, 32], [44, 0], [71, 16], [14, 24], - [34, 11], [43, 14], [75, 16], [80, 21], [3, 23], - [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], - [40, 63], [58, 76], [79, 55], [16, 61], [27, 11]] - -POSES_TOWN2_NAV = [ - [19, 66], [79, 14], [19, 57], [23, 1], - [53, 76], [42, 13], [31, 71], [33, 5], - [54, 30], [10, 61], [66, 3], [27, 12], - [79, 19], [2, 29], [16, 14], [5, 57], - [70, 73], [46, 67], [57, 50], [61, 49], [21, 12], - [51, 81], [77, 68], [56, 65], [43, 54]] - -TOWN1_STRAIGHT = [ - build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_STRAIGHT] - -TOWN1_ONE_CURVE = [ - build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_ONE_CURVE] - -TOWN1_NAVIGATION = [ - build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_NAV] - -TOWN1_NAVIGATION_DYNAMIC = [ - build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS) - for (start, end) in POSES_TOWN1_NAV] - -TOWN2_STRAIGHT = [ - build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_STRAIGHT] - -TOWN2_STRAIGHT_DYNAMIC = [ - build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_STRAIGHT] - -TOWN2_ONE_CURVE = [ - build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_ONE_CURVE] - -TOWN2_NAVIGATION = [ - build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_NAV] - -TOWN2_NAVIGATION_DYNAMIC = [ - build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS) - for (start, end) in POSES_TOWN2_NAV] - -TOWN1_ALL = ( - TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION + - TOWN1_NAVIGATION_DYNAMIC) - -TOWN2_ALL = ( - TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION + - TOWN2_NAVIGATION_DYNAMIC) diff --git a/examples/custom_env/README b/examples/custom_env/README deleted file mode 100644 index 75ffcad88..000000000 --- a/examples/custom_env/README +++ /dev/null @@ -1 +0,0 @@ -Example of using a custom gym env with RLlib. diff --git a/python/ray/rllib/agents/ars/ars.py b/python/ray/rllib/agents/ars/ars.py index e1a945985..5984e2e01 100644 --- a/python/ray/rllib/agents/ars/ars.py +++ b/python/ray/rllib/agents/ars/ars.py @@ -25,19 +25,17 @@ Result = namedtuple("Result", [ ]) DEFAULT_CONFIG = with_common_config({ - 'noise_stdev': 0.02, # std deviation of parameter noise - 'num_rollouts': 32, # number of perturbs to try - 'rollouts_used': 32, # number of perturbs to keep in gradient estimate - 'num_workers': 2, - 'sgd_stepsize': 0.01, # sgd step-size - 'observation_filter': "MeanStdFilter", - 'noise_size': 250000000, - 'eval_prob': 0.03, # probability of evaluating the parameter rewards - 'report_length': 10, # how many of the last rewards we average over - 'env_config': {}, - 'offset': 0, - 'policy_type': "LinearPolicy", # ["LinearPolicy", "MLPPolicy"] - "fcnet_hiddens": [32, 32], # fcnet structure of MLPPolicy + "noise_stdev": 0.02, # std deviation of parameter noise + "num_rollouts": 32, # number of perturbs to try + "rollouts_used": 32, # number of perturbs to keep in gradient estimate + "num_workers": 2, + "sgd_stepsize": 0.01, # sgd step-size + "observation_filter": "MeanStdFilter", + "noise_size": 250000000, + "eval_prob": 0.03, # probability of evaluating the parameter rewards + "report_length": 10, # how many of the last rewards we average over + "env_config": {}, + "offset": 0, }) @@ -67,15 +65,9 @@ class SharedNoiseTable(object): @ray.remote class Worker(object): - def __init__(self, - config, - policy_params, - env_creator, - noise, - min_task_runtime=0.2): + def __init__(self, config, env_creator, noise, min_task_runtime=0.2): self.min_task_runtime = min_task_runtime self.config = config - self.policy_params = policy_params self.noise = SharedNoiseTable(noise) self.env = env_creator(config["env_config"]) @@ -83,15 +75,9 @@ class Worker(object): self.preprocessor = models.ModelCatalog.get_preprocessor(self.env) self.sess = utils.make_session(single_threaded=True) - if config["policy_type"] == "LinearPolicy": - self.policy = policies.LinearPolicy( - self.sess, self.env.action_space, self.preprocessor, - config["observation_filter"], **policy_params) - else: - self.policy = policies.MLPPolicy( - self.sess, self.env.action_space, self.preprocessor, - config["observation_filter"], config["fcnet_hiddens"], - **policy_params) + self.policy = policies.GenericPolicy( + self.sess, self.env.action_space, self.preprocessor, + config["observation_filter"], config["model"]) def rollout(self, timestep_limit, add_noise=False): rollout_rewards, rollout_length = policies.rollout( @@ -160,25 +146,14 @@ class ARSAgent(Agent): return Resources(cpu=1, gpu=0, extra_cpu=cf["num_workers"]) def _init(self): - policy_params = {"action_noise_std": 0.0} - - # register the linear network - utils.register_linear_network() - env = self.env_creator(self.config["env_config"]) from ray.rllib import models preprocessor = models.ModelCatalog.get_preprocessor(env) self.sess = utils.make_session(single_threaded=False) - if self.config["policy_type"] == "LinearPolicy": - self.policy = policies.LinearPolicy( - self.sess, env.action_space, preprocessor, - self.config["observation_filter"], **policy_params) - else: - self.policy = policies.MLPPolicy( - self.sess, env.action_space, preprocessor, - self.config["observation_filter"], - self.config["fcnet_hiddens"], **policy_params) + self.policy = policies.GenericPolicy( + self.sess, env.action_space, preprocessor, + self.config["observation_filter"], self.config["model"]) self.optimizer = optimizers.SGD(self.policy, self.config["sgd_stepsize"]) @@ -194,8 +169,8 @@ class ARSAgent(Agent): # Create the actors. print("Creating actors.") self.workers = [ - Worker.remote(self.config, policy_params, self.env_creator, - noise_id) for _ in range(self.config["num_workers"]) + Worker.remote(self.config, self.env_creator, noise_id) + for _ in range(self.config["num_workers"]) ] self.episodes_so_far = 0 diff --git a/python/ray/rllib/agents/ars/policies.py b/python/ray/rllib/agents/ars/policies.py index 3a25d68eb..6c8bd9273 100644 --- a/python/ray/rllib/agents/ars/policies.py +++ b/python/ray/rllib/agents/ars/policies.py @@ -11,7 +11,6 @@ import tensorflow as tf import ray from ray.rllib.utils.filter import get_filter -from ray.rllib.utils.error import UnsupportedSpaceException from ray.rllib.models import ModelCatalog @@ -59,14 +58,8 @@ class GenericPolicy(object): action_space, preprocessor, observation_filter, - action_noise_std, - options={}): - - if len(preprocessor.shape) > 1: - raise UnsupportedSpaceException( - "Observation space {} is not supported with ARS.".format( - preprocessor.shape)) - + model_config, + action_noise_std=0.0): self.sess = sess self.action_space = action_space self.action_noise_std = action_noise_std @@ -78,9 +71,9 @@ class GenericPolicy(object): # Policy network. dist_class, dist_dim = ModelCatalog.get_action_dist( - action_space, dist_type="deterministic") + action_space, model_config, dist_type="deterministic") - model = ModelCatalog.get_model(self.inputs, dist_dim, options=options) + model = ModelCatalog.get_model(self.inputs, dist_dim, model_config) dist = dist_class(model.outputs) self.sampler = dist.sample() @@ -106,31 +99,3 @@ class GenericPolicy(object): def get_weights(self): return self.variables.get_flat() - - -class LinearPolicy(GenericPolicy): - def __init__(self, sess, action_space, preprocessor, observation_filter, - action_noise_std): - options = {"custom_model": "LinearNetwork"} - GenericPolicy.__init__( - self, - sess, - action_space, - preprocessor, - observation_filter, - action_noise_std, - options=options) - - -class MLPPolicy(GenericPolicy): - def __init__(self, sess, action_space, preprocessor, observation_filter, - fcnet_hiddens, action_noise_std): - options = {"fcnet_hiddens": fcnet_hiddens} - GenericPolicy.__init__( - self, - sess, - action_space, - preprocessor, - observation_filter, - action_noise_std, - options=options) diff --git a/python/ray/rllib/agents/ars/utils.py b/python/ray/rllib/agents/ars/utils.py index a70dd97bb..1575e46c3 100644 --- a/python/ray/rllib/agents/ars/utils.py +++ b/python/ray/rllib/agents/ars/utils.py @@ -7,9 +7,6 @@ from __future__ import print_function import numpy as np import tensorflow as tf -from ray.rllib.models import ModelCatalog, Model -import tensorflow.contrib.slim as slim -from ray.rllib.models.misc import normc_initializer def compute_ranks(x): @@ -62,21 +59,3 @@ def batched_weighted_sum(weights, vecs, batch_size): np.asarray(batch_vecs, dtype=np.float32)) num_items_summed += len(batch_weights) return total, num_items_summed - - -class LinearNetwork(Model): - """Generic linear network.""" - - def _build_layers(self, inputs, num_outputs, _): - with tf.name_scope("linear"): - output = slim.fully_connected( - inputs, - num_outputs, - weights_initializer=normc_initializer(0.01), - activation_fn=None, - ) - return output, inputs - - -def register_linear_network(): - ModelCatalog.register_custom_model("LinearNetwork", LinearNetwork) diff --git a/python/ray/rllib/agents/es/es.py b/python/ray/rllib/agents/es/es.py index 1ce219b7c..392f98f1d 100644 --- a/python/ray/rllib/agents/es/es.py +++ b/python/ray/rllib/agents/es/es.py @@ -10,7 +10,7 @@ import numpy as np import time import ray -from ray.rllib.agents import Agent +from ray.rllib.agents import Agent, with_common_config from ray.tune.trial import Resources from ray.rllib.agents.es import optimizers @@ -24,7 +24,7 @@ Result = namedtuple("Result", [ "eval_returns", "eval_lengths" ]) -DEFAULT_CONFIG = { +DEFAULT_CONFIG = with_common_config({ "l2_coeff": 0.005, "noise_stdev": 0.02, "episodes_per_batch": 1000, @@ -38,7 +38,8 @@ DEFAULT_CONFIG = { "report_length": 10, "env": None, "env_config": {}, -} + "model": {}, +}) @ray.remote @@ -81,7 +82,7 @@ class Worker(object): self.sess = utils.make_session(single_threaded=True) self.policy = policies.GenericPolicy( self.sess, self.env.action_space, self.preprocessor, - config["observation_filter"], **policy_params) + config["observation_filter"], config["model"], **policy_params) def rollout(self, timestep_limit, add_noise=True): rollout_rewards, rollout_length = policies.rollout( @@ -161,7 +162,8 @@ class ESAgent(Agent): self.sess = utils.make_session(single_threaded=False) self.policy = policies.GenericPolicy( self.sess, env.action_space, preprocessor, - self.config["observation_filter"], **policy_params) + self.config["observation_filter"], self.config["model"], + **policy_params) self.optimizer = optimizers.Adam(self.policy, self.config["stepsize"]) self.report_length = self.config["report_length"] diff --git a/python/ray/rllib/agents/es/policies.py b/python/ray/rllib/agents/es/policies.py index d62fee43c..b40f2db56 100644 --- a/python/ray/rllib/agents/es/policies.py +++ b/python/ray/rllib/agents/es/policies.py @@ -39,7 +39,7 @@ def rollout(policy, env, timestep_limit=None, add_noise=False): class GenericPolicy(object): def __init__(self, sess, action_space, preprocessor, observation_filter, - action_noise_std): + model_options, action_noise_std): self.sess = sess self.action_space = action_space self.action_noise_std = action_noise_std @@ -51,8 +51,8 @@ class GenericPolicy(object): # Policy network. dist_class, dist_dim = ModelCatalog.get_action_dist( - self.action_space, dist_type="deterministic") - model = ModelCatalog.get_model(self.inputs, dist_dim) + self.action_space, model_options, dist_type="deterministic") + model = ModelCatalog.get_model(self.inputs, dist_dim, model_options) dist = dist_class(model.outputs) self.sampler = dist.sample() diff --git a/python/ray/rllib/agents/pg/pg_policy_graph.py b/python/ray/rllib/agents/pg/pg_policy_graph.py index bb831c47d..7cdb8532b 100644 --- a/python/ray/rllib/agents/pg/pg_policy_graph.py +++ b/python/ray/rllib/agents/pg/pg_policy_graph.py @@ -24,8 +24,8 @@ class PGPolicyGraph(TFPolicyGraph): obs = tf.placeholder(tf.float32, shape=[None] + list(obs_space.shape)) dist_class, self.logit_dim = ModelCatalog.get_action_dist( action_space, self.config["model"]) - self.model = ModelCatalog.get_model( - obs, self.logit_dim, options=self.config["model"]) + self.model = ModelCatalog.get_model(obs, self.logit_dim, + self.config["model"]) action_dist = dist_class(self.model.outputs) # logit for each action # Setup policy loss diff --git a/python/ray/rllib/agents/ppo/ppo_policy_graph.py b/python/ray/rllib/agents/ppo/ppo_policy_graph.py index e6fc90d1c..9456ebe94 100644 --- a/python/ray/rllib/agents/ppo/ppo_policy_graph.py +++ b/python/ray/rllib/agents/ppo/ppo_policy_graph.py @@ -54,7 +54,7 @@ class PPOLoss(object): vf_loss_coeff (float): Coefficient of the value function loss use_gae (bool): If true, use the Generalized Advantage Estimator. """ - dist_cls, _ = ModelCatalog.get_action_dist(action_space) + dist_cls, _ = ModelCatalog.get_action_dist(action_space, {}) prev_dist = dist_cls(logits) # Make loss functions. logp_ratio = tf.exp( @@ -108,7 +108,8 @@ class PPOPolicyGraph(LearningRateSchedule, TFPolicyGraph): self.config = config self.kl_coeff_val = self.config["kl_coeff"] self.kl_target = self.config["kl_target"] - dist_cls, logit_dim = ModelCatalog.get_action_dist(action_space) + dist_cls, logit_dim = ModelCatalog.get_action_dist( + action_space, self.config["model"]) if existing_inputs: obs_ph, value_targets_ph, adv_ph, act_ph, \ diff --git a/examples/carla/README b/python/ray/rllib/examples/carla/README similarity index 100% rename from examples/carla/README rename to python/ray/rllib/examples/carla/README diff --git a/examples/carla/a3c_lane_keep.py b/python/ray/rllib/examples/carla/a3c_lane_keep.py similarity index 96% rename from examples/carla/a3c_lane_keep.py rename to python/ray/rllib/examples/carla/a3c_lane_keep.py index 1338736d2..9629808ba 100644 --- a/examples/carla/a3c_lane_keep.py +++ b/python/ray/rllib/examples/carla/a3c_lane_keep.py @@ -31,7 +31,6 @@ run_experiments({ "carla-a3c": { "run": "A3C", "env": "carla_env", - "trial_resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { diff --git a/examples/carla/dqn_lane_keep.py b/python/ray/rllib/examples/carla/dqn_lane_keep.py similarity index 90% rename from examples/carla/dqn_lane_keep.py rename to python/ray/rllib/examples/carla/dqn_lane_keep.py index 2746a1c4b..84fed98cd 100644 --- a/examples/carla/dqn_lane_keep.py +++ b/python/ray/rllib/examples/carla/dqn_lane_keep.py @@ -31,7 +31,6 @@ run_experiments({ "carla-dqn": { "run": "DQN", "env": "carla_env", - "trial_resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { @@ -49,9 +48,6 @@ run_experiments({ "learning_starts": 1000, "schedule_max_timesteps": 100000, "gamma": 0.8, - "tf_session_args": { - "gpu_options": {"allow_growth": True}, - }, }, }, }) diff --git a/examples/carla/env.py b/python/ray/rllib/examples/carla/env.py similarity index 83% rename from examples/carla/env.py rename to python/ray/rllib/examples/carla/env.py index c88a71b28..af5b619af 100644 --- a/examples/carla/env.py +++ b/python/ray/rllib/examples/carla/env.py @@ -33,8 +33,8 @@ if CARLA_OUT_PATH and not os.path.exists(CARLA_OUT_PATH): os.makedirs(CARLA_OUT_PATH) # Set this to the path of your Carla binary -SERVER_BINARY = os.environ.get( - "CARLA_SERVER", os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh")) +SERVER_BINARY = os.environ.get("CARLA_SERVER", + os.path.expanduser("~/CARLA_0.7.0/CarlaUE4.sh")) assert os.path.exists(SERVER_BINARY) if "CARLA_PY_PATH" in os.environ: @@ -97,7 +97,6 @@ ENV_CONFIG = { "squash_action_logits": False, } - DISCRETE_ACTIONS = { # coast 0: [0.0, 0.0], @@ -119,7 +118,6 @@ DISCRETE_ACTIONS = { 8: [-0.5, 0.5], } - live_carla_processes = set() @@ -133,7 +131,6 @@ atexit.register(cleanup) class CarlaEnv(gym.Env): - def __init__(self, config=ENV_CONFIG): self.config = config self.city = self.config["server_map"].split("/")[-1] @@ -143,21 +140,27 @@ class CarlaEnv(gym.Env): if config["discrete_actions"]: self.action_space = Discrete(len(DISCRETE_ACTIONS)) else: - self.action_space = Box(-1.0, 1.0, shape=(2,), dtype=np.float32) + self.action_space = Box(-1.0, 1.0, shape=(2, ), dtype=np.float32) if config["use_depth_camera"]: image_space = Box( - -1.0, 1.0, shape=( - config["y_res"], config["x_res"], - 1 * config["framestack"]), dtype=np.float32) + -1.0, + 1.0, + shape=(config["y_res"], config["x_res"], + 1 * config["framestack"]), + dtype=np.float32) else: image_space = Box( - 0, 255, shape=( - config["y_res"], config["x_res"], - 3 * config["framestack"]), dtype=np.uint8) + 0, + 255, + shape=(config["y_res"], config["x_res"], + 3 * config["framestack"]), + dtype=np.uint8) self.observation_space = Tuple( # forward_speed, dist to goal - [image_space, - Discrete(len(COMMANDS_ENUM)), # next_command - Box(-128.0, 128.0, shape=(2,), dtype=np.float32)]) + [ + image_space, + Discrete(len(COMMANDS_ENUM)), # next_command + Box(-128.0, 128.0, shape=(2, ), dtype=np.float32) + ]) # TODO(ekl) this isn't really a proper gym spec self._spec = lambda: None @@ -185,11 +188,13 @@ class CarlaEnv(gym.Env): # Create a new server process and start the client. self.server_port = random.randint(10000, 60000) self.server_process = subprocess.Popen( - [SERVER_BINARY, self.config["server_map"], - "-windowed", "-ResX=400", "-ResY=300", - "-carla-server", - "-carla-world-port={}".format(self.server_port)], - preexec_fn=os.setsid, stdout=open(os.devnull, "w")) + [ + SERVER_BINARY, self.config["server_map"], "-windowed", + "-ResX=400", "-ResY=300", "-carla-server", + "-carla-world-port={}".format(self.server_port) + ], + preexec_fn=os.setsid, + stdout=open(os.devnull, "w")) live_carla_processes.add(os.getpgid(self.server_process.pid)) for i in range(RETRIES_ON_ERROR): @@ -257,14 +262,14 @@ class CarlaEnv(gym.Env): if self.config["use_depth_camera"]: camera1 = Camera("CameraDepth", PostProcessing="Depth") - camera1.set_image_size( - self.config["render_x_res"], self.config["render_y_res"]) + camera1.set_image_size(self.config["render_x_res"], + self.config["render_y_res"]) camera1.set_position(30, 0, 130) settings.add_sensor(camera1) camera2 = Camera("CameraRGB") - camera2.set_image_size( - self.config["render_x_res"], self.config["render_y_res"]) + camera2.set_image_size(self.config["render_x_res"], + self.config["render_y_res"]) camera2.set_position(30, 0, 130) settings.add_sensor(camera2) @@ -274,13 +279,14 @@ class CarlaEnv(gym.Env): self.start_pos = positions[self.scenario["start_pos_id"]] self.end_pos = positions[self.scenario["end_pos_id"]] self.start_coord = [ - self.start_pos.location.x // 100, self.start_pos.location.y // 100] + self.start_pos.location.x // 100, self.start_pos.location.y // 100 + ] self.end_coord = [ - self.end_pos.location.x // 100, self.end_pos.location.y // 100] - print( - "Start pos {} ({}), end {} ({})".format( - self.scenario["start_pos_id"], self.start_coord, - self.scenario["end_pos_id"], self.end_coord)) + self.end_pos.location.x // 100, self.end_pos.location.y // 100 + ] + print("Start pos {} ({}), end {} ({})".format( + self.scenario["start_pos_id"], self.start_coord, + self.scenario["end_pos_id"], self.end_coord)) # Notify the server that we want to start the episode at the # player_start index. This function blocks until the server is ready @@ -300,11 +306,10 @@ class CarlaEnv(gym.Env): prev_image = image if self.config["framestack"] == 2: image = np.concatenate([prev_image, image], axis=2) - obs = ( - image, - COMMAND_ORDINAL[py_measurements["next_command"]], - [py_measurements["forward_speed"], - py_measurements["distance_to_goal"]]) + obs = (image, COMMAND_ORDINAL[py_measurements["next_command"]], [ + py_measurements["forward_speed"], + py_measurements["distance_to_goal"] + ]) self.last_obs = obs return obs @@ -313,9 +318,8 @@ class CarlaEnv(gym.Env): obs = self._step(action) return obs except Exception: - print( - "Error during step, terminating episode early", - traceback.format_exc()) + print("Error during step, terminating episode early", + traceback.format_exc()) self.clear_server_state() return (self.last_obs, 0.0, True, {}) @@ -336,12 +340,14 @@ class CarlaEnv(gym.Env): hand_brake = False if self.config["verbose"]: - print( - "steer", steer, "throttle", throttle, "brake", brake, - "reverse", reverse) + print("steer", steer, "throttle", throttle, "brake", brake, + "reverse", reverse) self.client.send_control( - steer=steer, throttle=throttle, brake=brake, hand_brake=hand_brake, + steer=steer, + throttle=throttle, + brake=brake, + hand_brake=hand_brake, reverse=reverse) # Process observations @@ -359,15 +365,14 @@ class CarlaEnv(gym.Env): "reverse": reverse, "hand_brake": hand_brake, } - reward = compute_reward( - self, self.prev_measurement, py_measurements) + reward = compute_reward(self, self.prev_measurement, py_measurements) self.total_reward += reward py_measurements["reward"] = reward py_measurements["total_reward"] = self.total_reward - done = (self.num_steps > self.scenario["max_steps"] or - py_measurements["next_command"] == "REACH_GOAL" or - (self.config["early_terminate_on_collision"] and - collided_done(py_measurements))) + done = (self.num_steps > self.scenario["max_steps"] + or py_measurements["next_command"] == "REACH_GOAL" + or (self.config["early_terminate_on_collision"] + and collided_done(py_measurements))) py_measurements["done"] = done self.prev_measurement = py_measurements @@ -377,8 +382,7 @@ class CarlaEnv(gym.Env): self.measurements_file = open( os.path.join( CARLA_OUT_PATH, - "measurements_{}.json".format(self.episode_id)), - "w") + "measurements_{}.json".format(self.episode_id)), "w") self.measurements_file.write(json.dumps(py_measurements)) self.measurements_file.write("\n") if done: @@ -389,9 +393,8 @@ class CarlaEnv(gym.Env): self.num_steps += 1 image = self.preprocess_image(image) - return ( - self.encode_obs(image, py_measurements), reward, done, - py_measurements) + return (self.encode_obs(image, py_measurements), reward, done, + py_measurements) def images_to_video(self): videos_dir = os.path.join(CARLA_OUT_PATH, "Videos") @@ -413,15 +416,15 @@ class CarlaEnv(gym.Env): if self.config["use_depth_camera"]: assert self.config["use_depth_camera"] data = (image.data - 0.5) * 2 - data = data.reshape( - self.config["render_y_res"], self.config["render_x_res"], 1) + data = data.reshape(self.config["render_y_res"], + self.config["render_x_res"], 1) data = cv2.resize( data, (self.config["x_res"], self.config["y_res"]), interpolation=cv2.INTER_AREA) data = np.expand_dims(data, 2) else: - data = image.data.reshape( - self.config["render_y_res"], self.config["render_x_res"], 3) + data = image.data.reshape(self.config["render_y_res"], + self.config["render_x_res"], 3) data = cv2.resize( data, (self.config["x_res"], self.config["y_res"]), interpolation=cv2.INTER_AREA) @@ -448,36 +451,39 @@ class CarlaEnv(gym.Env): cur = measurements.player_measurements if self.config["enable_planner"]: - next_command = COMMANDS_ENUM[ - self.planner.get_next_command( - [cur.transform.location.x, cur.transform.location.y, - GROUND_Z], - [cur.transform.orientation.x, cur.transform.orientation.y, - GROUND_Z], - [self.end_pos.location.x, self.end_pos.location.y, - GROUND_Z], - [self.end_pos.orientation.x, self.end_pos.orientation.y, - GROUND_Z]) - ] + next_command = COMMANDS_ENUM[self.planner.get_next_command( + [cur.transform.location.x, cur.transform.location.y, GROUND_Z], + [ + cur.transform.orientation.x, cur.transform.orientation.y, + GROUND_Z + ], + [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [ + self.end_pos.orientation.x, self.end_pos.orientation.y, + GROUND_Z + ])] else: next_command = "LANE_FOLLOW" if next_command == "REACH_GOAL": distance_to_goal = 0.0 # avoids crash in planner elif self.config["enable_planner"]: - distance_to_goal = self.planner.get_shortest_path_distance( - [cur.transform.location.x, cur.transform.location.y, GROUND_Z], - [cur.transform.orientation.x, cur.transform.orientation.y, - GROUND_Z], - [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], - [self.end_pos.orientation.x, self.end_pos.orientation.y, - GROUND_Z]) / 100 + distance_to_goal = self.planner.get_shortest_path_distance([ + cur.transform.location.x, cur.transform.location.y, GROUND_Z + ], [ + cur.transform.orientation.x, cur.transform.orientation.y, + GROUND_Z + ], [self.end_pos.location.x, self.end_pos.location.y, GROUND_Z], [ + self.end_pos.orientation.x, self.end_pos.orientation.y, + GROUND_Z + ]) / 100 else: distance_to_goal = -1 - distance_to_goal_euclidean = float(np.linalg.norm( - [cur.transform.location.x - self.end_pos.location.x, - cur.transform.location.y - self.end_pos.location.y]) / 100) + distance_to_goal_euclidean = float( + np.linalg.norm([ + cur.transform.location.x - self.end_pos.location.x, + cur.transform.location.y - self.end_pos.location.y + ]) / 100) py_measurements = { "episode_id": self.episode_id, @@ -513,8 +519,8 @@ class CarlaEnv(gym.Env): if not os.path.exists(out_dir): os.makedirs(out_dir) out_file = os.path.join( - out_dir, - "{}_{:>04}.jpg".format(self.episode_id, self.num_steps)) + out_dir, "{}_{:>04}.jpg".format(self.episode_id, + self.num_steps)) scipy.misc.imsave(out_file, image.data) assert observation is not None, sensor_data @@ -621,8 +627,7 @@ REWARD_FUNCTIONS = { def compute_reward(env, prev, current): - return REWARD_FUNCTIONS[env.config["reward_function"]]( - env, prev, current) + return REWARD_FUNCTIONS[env.config["reward_function"]](env, prev, current) def print_measurements(measurements): @@ -654,9 +659,8 @@ def sigmoid(x): def collided_done(py_measurements): m = py_measurements - collided = ( - m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0 or - m["collision_other"] > 0) + collided = (m["collision_vehicles"] > 0 or m["collision_pedestrians"] > 0 + or m["collision_other"] > 0) return bool(collided or m["total_reward"] < -100) diff --git a/examples/carla/models.py b/python/ray/rllib/examples/carla/models.py similarity index 83% rename from examples/carla/models.py rename to python/ray/rllib/examples/carla/models.py index 9233c9c8e..fd20cd0c0 100644 --- a/examples/carla/models.py +++ b/python/ray/rllib/examples/carla/models.py @@ -43,8 +43,8 @@ class CarlaModel(Model): (inputs.shape.as_list()[1:], expected_shape) # Reshape the input vector back into its components - vision_in = tf.reshape( - inputs[:, :image_size], [tf.shape(inputs)[0]] + image_shape) + vision_in = tf.reshape(inputs[:, :image_size], + [tf.shape(inputs)[0]] + image_shape) metrics_in = inputs[:, image_size:] print("Vision in shape", vision_in) print("Metrics in shape", metrics_in) @@ -53,18 +53,26 @@ class CarlaModel(Model): with tf.name_scope("carla_vision"): for i, (out_size, kernel, stride) in enumerate(convs[:-1], 1): vision_in = slim.conv2d( - vision_in, out_size, kernel, stride, + vision_in, + out_size, + kernel, + stride, scope="conv{}".format(i)) out_size, kernel, stride = convs[-1] vision_in = slim.conv2d( - vision_in, out_size, kernel, stride, - padding="VALID", scope="conv_out") + vision_in, + out_size, + kernel, + stride, + padding="VALID", + scope="conv_out") vision_in = tf.squeeze(vision_in, [1, 2]) # Setup metrics layer with tf.name_scope("carla_metrics"): metrics_in = slim.fully_connected( - metrics_in, 64, + metrics_in, + 64, weights_initializer=xavier_initializer(), activation_fn=activation, scope="metrics_out") @@ -79,15 +87,18 @@ class CarlaModel(Model): print("Shape of concatenated out is", last_layer.shape) for size in hiddens: last_layer = slim.fully_connected( - last_layer, size, + last_layer, + size, weights_initializer=xavier_initializer(), activation_fn=activation, scope="fc{}".format(i)) i += 1 output = slim.fully_connected( - last_layer, num_outputs, + last_layer, + num_outputs, weights_initializer=normc_initializer(0.01), - activation_fn=None, scope="fc_out") + activation_fn=None, + scope="fc_out") return output, last_layer diff --git a/examples/carla/ppo_lane_keep.py b/python/ray/rllib/examples/carla/ppo_lane_keep.py similarity index 93% rename from examples/carla/ppo_lane_keep.py rename to python/ray/rllib/examples/carla/ppo_lane_keep.py index 25e5acbf3..ac0f6ff8a 100644 --- a/examples/carla/ppo_lane_keep.py +++ b/python/ray/rllib/examples/carla/ppo_lane_keep.py @@ -31,7 +31,6 @@ run_experiments({ "carla-ppo": { "run": "PPO", "env": "carla_env", - "trial_resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { @@ -55,7 +54,9 @@ run_experiments({ "sgd_batchsize": 32, "devices": ["/gpu:0"], "tf_session_args": { - "gpu_options": {"allow_growth": True} + "gpu_options": { + "allow_growth": True + } } }, }, diff --git a/python/ray/rllib/examples/carla/scenarios.py b/python/ray/rllib/examples/carla/scenarios.py new file mode 100644 index 000000000..beedd2989 --- /dev/null +++ b/python/ray/rllib/examples/carla/scenarios.py @@ -0,0 +1,131 @@ +"""Collection of Carla scenarios, including those from the CoRL 2017 paper.""" + +TEST_WEATHERS = [0, 2, 5, 7, 9, 10, 11, 12, 13] +TRAIN_WEATHERS = [1, 3, 4, 6, 8, 14] + + +def build_scenario(city, start, end, vehicles, pedestrians, max_steps, + weathers): + return { + "city": city, + "num_vehicles": vehicles, + "num_pedestrians": pedestrians, + "weather_distribution": weathers, + "start_pos_id": start, + "end_pos_id": end, + "max_steps": max_steps, + } + + +# Simple scenario for Town02 that involves driving down a road +DEFAULT_SCENARIO = build_scenario( + city="Town02", + start=36, + end=40, + vehicles=20, + pedestrians=40, + max_steps=200, + weathers=[0]) + +# Simple scenario for Town02 that involves driving down a road +LANE_KEEP = build_scenario( + city="Town02", + start=36, + end=40, + vehicles=0, + pedestrians=0, + max_steps=2000, + weathers=[0]) + +# Scenarios from the CoRL2017 paper +POSES_TOWN1_STRAIGHT = [[36, 40], [39, 35], [110, 114], [7, 3], [0, 4], [ + 68, 50 +], [61, 59], [47, 64], [147, 90], [33, 87], [26, 19], [80, 76], [45, 49], [ + 55, 44 +], [29, 107], [95, 104], [84, 34], [53, 67], [22, 17], [91, 148], [20, 107], + [78, 70], [95, 102], [68, 44], [45, 69]] + +POSES_TOWN1_ONE_CURVE = [[138, 17], [47, 16], [26, 9], [42, 49], [140, 124], [ + 85, 98 +], [65, 133], [137, 51], [76, 66], [46, 39], [40, 60], [0, 29], [4, 129], [ + 121, 140 +], [2, 129], [78, 44], [68, 85], [41, 102], [95, 70], [68, 129], [84, 69], + [47, 79], [110, 15], [130, 17], [0, 17]] + +POSES_TOWN1_NAV = [[105, 29], [27, 130], [102, 87], [132, 27], [24, 44], [ + 96, 26 +], [34, 67], [28, 1], [140, 134], [105, 9], [148, 129], [65, 18], [21, 16], [ + 147, 97 +], [42, 51], [30, 41], [18, 107], [69, 45], [102, 95], [18, 145], [111, 64], + [79, 45], [84, 69], [73, 31], [37, 81]] + +POSES_TOWN2_STRAIGHT = [[38, 34], [4, 2], [12, 10], [62, 55], [43, 47], [ + 64, 66 +], [78, 76], [59, 57], [61, 18], [35, 39], [12, 8], [0, 18], [75, 68], [ + 54, 60 +], [45, 49], [46, 42], [53, 46], [80, 29], [65, 63], [0, 81], [54, 63], + [51, 42], [16, 19], [17, 26], [77, 68]] + +POSES_TOWN2_ONE_CURVE = [[37, 76], [8, 24], [60, 69], [38, 10], [21, 1], [ + 58, 71 +], [74, 32], [44, 0], [71, 16], [14, 24], [34, 11], [43, 14], [75, 16], [ + 80, 21 +], [3, 23], [75, 59], [50, 47], [11, 19], [77, 34], [79, 25], [40, 63], + [58, 76], [79, 55], [16, 61], [27, 11]] + +POSES_TOWN2_NAV = [[19, 66], [79, 14], [19, 57], [23, 1], [53, 76], [42, 13], [ + 31, 71 +], [33, 5], [54, 30], [10, 61], [66, 3], [27, 12], [79, 19], [2, 29], [16, 14], + [5, 57], [70, 73], [46, 67], [57, 50], [61, 49], [21, 12], + [51, 81], [77, 68], [56, 65], [43, 54]] + +TOWN1_STRAIGHT = [ + build_scenario("Town01", start, end, 0, 0, 300, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_STRAIGHT +] + +TOWN1_ONE_CURVE = [ + build_scenario("Town01", start, end, 0, 0, 600, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_ONE_CURVE +] + +TOWN1_NAVIGATION = [ + build_scenario("Town01", start, end, 0, 0, 900, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_NAV +] + +TOWN1_NAVIGATION_DYNAMIC = [ + build_scenario("Town01", start, end, 20, 50, 900, TEST_WEATHERS) + for (start, end) in POSES_TOWN1_NAV +] + +TOWN2_STRAIGHT = [ + build_scenario("Town02", start, end, 0, 0, 300, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_STRAIGHT +] + +TOWN2_STRAIGHT_DYNAMIC = [ + build_scenario("Town02", start, end, 20, 50, 300, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_STRAIGHT +] + +TOWN2_ONE_CURVE = [ + build_scenario("Town02", start, end, 0, 0, 600, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_ONE_CURVE +] + +TOWN2_NAVIGATION = [ + build_scenario("Town02", start, end, 0, 0, 900, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_NAV +] + +TOWN2_NAVIGATION_DYNAMIC = [ + build_scenario("Town02", start, end, 20, 50, 900, TRAIN_WEATHERS) + for (start, end) in POSES_TOWN2_NAV +] + +TOWN1_ALL = (TOWN1_STRAIGHT + TOWN1_ONE_CURVE + TOWN1_NAVIGATION + + TOWN1_NAVIGATION_DYNAMIC) + +TOWN2_ALL = (TOWN2_STRAIGHT + TOWN2_ONE_CURVE + TOWN2_NAVIGATION + + TOWN2_NAVIGATION_DYNAMIC) diff --git a/examples/carla/train_a3c.py b/python/ray/rllib/examples/carla/train_a3c.py similarity index 96% rename from examples/carla/train_a3c.py rename to python/ray/rllib/examples/carla/train_a3c.py index 75856aef2..2c12cd824 100644 --- a/examples/carla/train_a3c.py +++ b/python/ray/rllib/examples/carla/train_a3c.py @@ -32,7 +32,6 @@ run_experiments({ "carla-a3c": { "run": "A3C", "env": "carla_env", - "trial_resources": {"cpu": 5, "extra_gpu": 2}, "config": { "env_config": env_config, "use_gpu_for_workers": True, diff --git a/examples/carla/train_dqn.py b/python/ray/rllib/examples/carla/train_dqn.py similarity index 81% rename from examples/carla/train_dqn.py rename to python/ray/rllib/examples/carla/train_dqn.py index 6180ca48f..fa2dba105 100644 --- a/examples/carla/train_dqn.py +++ b/python/ray/rllib/examples/carla/train_dqn.py @@ -25,21 +25,26 @@ register_env(env_name, lambda env_config: CarlaEnv(env_config)) register_carla_model() ray.init() + + +def shape_out(spec): + return (spec.config.env_config.framestack * + (spec.config.env_config.use_depth_camera and 1 or 3)) + + run_experiments({ "carla-dqn": { "run": "DQN", "env": "carla_env", - "trial_resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { "custom_model": "carla", "custom_options": { "image_shape": [ - 80, 80, - lambda spec: spec.config.env_config.framestack * ( - spec.config.env_config.use_depth_camera and 1 or 3 - ), + 80, + 80, + shape_out, ], }, "conv_filters": [ @@ -53,7 +58,9 @@ run_experiments({ "schedule_max_timesteps": 100000, "gamma": 0.8, "tf_session_args": { - "gpu_options": {"allow_growth": True}, + "gpu_options": { + "allow_growth": True + }, }, }, }, diff --git a/examples/carla/train_ppo.py b/python/ray/rllib/examples/carla/train_ppo.py similarity index 80% rename from examples/carla/train_ppo.py rename to python/ray/rllib/examples/carla/train_ppo.py index 4f3ebf5ea..a9339ca79 100644 --- a/examples/carla/train_ppo.py +++ b/python/ray/rllib/examples/carla/train_ppo.py @@ -28,14 +28,14 @@ run_experiments({ "carla": { "run": "PPO", "env": "carla_env", - "trial_resources": {"cpu": 4, "gpu": 1}, "config": { "env_config": env_config, "model": { "custom_model": "carla", "custom_options": { "image_shape": [ - env_config["x_res"], env_config["y_res"], 6], + env_config["x_res"], env_config["y_res"], 6 + ], }, "conv_filters": [ [16, [8, 8], 4], @@ -44,17 +44,14 @@ run_experiments({ ], }, "num_workers": 1, - "timesteps_per_batch": 2000, - "min_steps_per_task": 100, + "train_batch_size": 2000, + "sample_batch_size": 100, "lambda": 0.95, "clip_param": 0.2, "num_sgd_iter": 20, - "sgd_stepsize": 0.0001, - "sgd_batchsize": 32, - "devices": ["/gpu:0"], - "tf_session_args": { - "gpu_options": {"allow_growth": True} - } + "lr": 0.0001, + "sgd_minibatch_size": 32, + "num_gpus": 1, }, }, }) diff --git a/examples/custom_env/custom_env.py b/python/ray/rllib/examples/custom_env.py similarity index 93% rename from examples/custom_env/custom_env.py rename to python/ray/rllib/examples/custom_env.py index b5a3240ea..66c028808 100644 --- a/examples/custom_env/custom_env.py +++ b/python/ray/rllib/examples/custom_env.py @@ -24,7 +24,7 @@ class SimpleCorridor(gym.Env): self.cur_pos = 0 self.action_space = Discrete(2) self.observation_space = Box( - 0.0, self.end_pos, shape=(1,), dtype=np.float32) + 0.0, self.end_pos, shape=(1, ), dtype=np.float32) self._spec = EnvSpec("SimpleCorridor-{}-v0".format(self.end_pos)) def reset(self): @@ -32,7 +32,7 @@ class SimpleCorridor(gym.Env): return [self.cur_pos] def step(self, action): - assert action in [0, 1] + assert action in [0, 1], action if action == 0 and self.cur_pos > 0: self.cur_pos -= 1 elif action == 1: diff --git a/python/ray/rllib/models/catalog.py b/python/ray/rllib/models/catalog.py index b98061fdd..9a889058c 100644 --- a/python/ray/rllib/models/catalog.py +++ b/python/ray/rllib/models/catalog.py @@ -51,14 +51,15 @@ class ModelCatalog(object): >>> prep = ModelCatalog.get_preprocessor(env) >>> observation = prep.transform(raw_observation) - >>> dist_cls, dist_dim = ModelCatalog.get_action_dist(env.action_space) - >>> model = ModelCatalog.get_model(inputs, dist_dim) + >>> dist_cls, dist_dim = ModelCatalog.get_action_dist( + env.action_space, {}) + >>> model = ModelCatalog.get_model(inputs, dist_dim, options) >>> dist = dist_cls(model.outputs) >>> action = dist.sample() """ @staticmethod - def get_action_dist(action_space, config=None, dist_type=None): + def get_action_dist(action_space, config, dist_type=None): """Returns action distribution class and size for the given action space. Args: @@ -90,7 +91,8 @@ class ModelCatalog(object): child_dist = [] input_lens = [] for action in action_space.spaces: - dist, action_size = ModelCatalog.get_action_dist(action) + dist, action_size = ModelCatalog.get_action_dist( + action, config) child_dist.append(dist) input_lens.append(action_size) return partial( @@ -139,11 +141,7 @@ class ModelCatalog(object): " not supported".format(action_space)) @staticmethod - def get_model(inputs, - num_outputs, - options=None, - state_in=None, - seq_lens=None): + def get_model(inputs, num_outputs, options, state_in=None, seq_lens=None): """Returns a suitable model conforming to given input and output specs. Args: @@ -157,7 +155,6 @@ class ModelCatalog(object): model (Model): Neural network model. """ - options = options or {} model = ModelCatalog._get_model(inputs, num_outputs, options, state_in, seq_lens) diff --git a/python/ray/rllib/test/test_catalog.py b/python/ray/rllib/test/test_catalog.py index e3dc1e782..62468e123 100644 --- a/python/ray/rllib/test/test_catalog.py +++ b/python/ray/rllib/test/test_catalog.py @@ -69,12 +69,13 @@ class ModelCatalogTest(unittest.TestCase): ray.init() with tf.variable_scope("test1"): - p1 = ModelCatalog.get_model(np.zeros((10, 3), dtype=np.float32), 5) + p1 = ModelCatalog.get_model( + np.zeros((10, 3), dtype=np.float32), 5, {}) self.assertEqual(type(p1), FullyConnectedNetwork) with tf.variable_scope("test2"): p2 = ModelCatalog.get_model( - np.zeros((10, 84, 84, 3), dtype=np.float32), 5) + np.zeros((10, 84, 84, 3), dtype=np.float32), 5, {}) self.assertEqual(type(p2), VisionNetwork) def testCustomModel(self): diff --git a/python/ray/rllib/tuned_examples/swimmer-ars.yaml b/python/ray/rllib/tuned_examples/swimmer-ars.yaml index 338c8a12c..532bb00b0 100644 --- a/python/ray/rllib/tuned_examples/swimmer-ars.yaml +++ b/python/ray/rllib/tuned_examples/swimmer-ars.yaml @@ -1,4 +1,3 @@ -# can expect improvement to -140 reward in ~300-500k timesteps swimmer-ars: env: Swimmer-v2 run: ARS @@ -9,8 +8,9 @@ swimmer-ars: num_workers: 1 sgd_stepsize: 0.02 noise_size: 250000000 - policy_type: LinearPolicy eval_prob: 0.2 offset: 0 observation_filter: NoFilter report_length: 3 + model: + fcnet_hiddens: [] # a linear policy