[rllib] Mountaincar fix (#1472)

* Fix for gym version 0.9.5.

* fixed bug in reshaper that was causing discrete spaces to fail
This commit is contained in:
eugenevinitsky 2018-01-25 13:58:35 -08:00 committed by Richard Liaw
parent f6c835e4b8
commit 0a01d3c71f
3 changed files with 13 additions and 13 deletions

View file

@ -36,18 +36,18 @@ def create_env(env_config):
if __name__ == '__main__':
register_env(env_name, lambda env_config: create_env(env_config))
config = ppo.DEFAULT_CONFIG.copy()
horizon = 200
num_cpus = 2
ray.init(num_cpus=num_cpus, redirect_output=False)
horizon = 10
num_cpus = 4
ray.init(num_cpus=num_cpus, redirect_output=True)
config["num_workers"] = num_cpus
config["timesteps_per_batch"] = 100
config["timesteps_per_batch"] = 10
config["num_sgd_iter"] = 10
config["gamma"] = 0.999
config["horizon"] = horizon
config["use_gae"] = True
config["use_gae"] = False
config["model"].update({"fcnet_hiddens": [256, 256]})
options = {"multiagent_obs_shapes": [2, 2],
"multiagent_act_shapes": [3, 3],
"multiagent_act_shapes": [1, 1],
"multiagent_shared_model": False,
"multiagent_fcnet_hiddens": [[32, 32]] * 2}
config["model"].update({"custom_options": options})

View file

@ -36,11 +36,11 @@ def create_env(env_config):
if __name__ == '__main__':
register_env(env_name, lambda env_config: create_env(env_config))
config = ppo.DEFAULT_CONFIG.copy()
horizon = 100
num_cpus = 2
ray.init(num_cpus=num_cpus, redirect_output=False)
horizon = 10
num_cpus = 4
ray.init(num_cpus=num_cpus, redirect_output=True)
config["num_workers"] = num_cpus
config["timesteps_per_batch"] = 100
config["timesteps_per_batch"] = 10
config["num_sgd_iter"] = 10
config["gamma"] = 0.999
config["horizon"] = horizon

View file

@ -14,10 +14,10 @@ class Reshaper(object):
if isinstance(env_space, list):
for space in env_space:
# Handle both gym arrays and just lists of inputs length
if hasattr(space, "shape"):
arr_shape = np.asarray(space.shape)
elif hasattr(space, "n"):
if hasattr(space, "n"):
arr_shape = np.asarray([1]) # discrete space
elif hasattr(space, "shape"):
arr_shape = np.asarray(space.shape)
else:
arr_shape = space
self.shapes.append(arr_shape)