[rllib] Set PPO observation filter to NoFilter by default (#4191)

This commit is contained in:
Eric Liang 2019-03-01 13:19:33 -08:00 committed by GitHub
parent 11a28834fa
commit b5799b5286
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 21 additions and 10 deletions

View file

@ -332,6 +332,7 @@ class Agent(Trainable):
merged_config = deep_update(merged_config, config,
self._allow_unknown_configs,
self._allow_unknown_subkeys)
self.raw_user_config = config
self.config = merged_config
Agent._validate_config(self.config)
if self.config.get("log_level"):

View file

@ -51,7 +51,7 @@ DEFAULT_CONFIG = with_common_config({
# Whether to rollout "complete_episodes" or "truncate_episodes"
"batch_mode": "truncate_episodes",
# Which observation filter to apply to the observation
"observation_filter": "MeanStdFilter",
"observation_filter": "NoFilter",
# Uses the sync samples optimizer instead of the multi-gpu one. This does
# not support minibatches.
"simple_optimizer": False,
@ -99,6 +99,14 @@ class PPOAgent(Agent):
@override(Agent)
def _train(self):
if "observation_filter" not in self.raw_user_config:
# TODO(ekl) remove this message after a few releases
logger.info(
"Important! Since 0.7.0, observation normalization is no "
"longer enabled by default. To enable running-mean "
"normalization, set 'observation_filter': 'MeanStdFilter'. "
"You can ignore this message if your environment doesn't "
"require observation normalization.")
prev_steps = self.optimizer.num_steps_sampled
fetches = self.optimizer.step()
if "kl" in fetches:
@ -139,7 +147,6 @@ class PPOAgent(Agent):
"{} iterations for your value ".format(rew_scale) +
"function to converge. If this is not intended, consider "
"increasing `vf_clip_param`.")
return res
def _validate_config(self):
@ -159,13 +166,7 @@ class PPOAgent(Agent):
"In multi-agent mode, policies will be optimized sequentially "
"by the multi-GPU optimizer. Consider setting "
"simple_optimizer=True if this doesn't work for you.")
if self.config["observation_filter"] != "NoFilter":
logger.warning(
"By default, observations will be normalized with {}. ".format(
self.config["observation_filter"]) +
"If you are using image or discrete type observations, "
"consider disabling this with observation_filter=NoFilter.")
if not self.config["vf_share_layers"]:
logger.warning(
"By default, the value function will NOT share layers with "
"the policy model (vf_share_layers=False).")
"FYI: By default, the value function will not share layers "
"with the policy model ('vf_share_layers': False).")

View file

@ -20,3 +20,4 @@ halfcheetah-ppo:
num_envs_per_worker:
grid_search: [16, 32]
batch_mode: truncate_episodes
observation_filter: MeanStdFilter

View file

@ -11,3 +11,4 @@ hopper-ppo:
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter

View file

@ -18,3 +18,4 @@ humanoid-ppo-gae:
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter

View file

@ -16,3 +16,4 @@ humanoid-ppo:
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter

View file

@ -11,3 +11,4 @@ cartpole-ppo:
grid_search: [1, 4]
sgd_minibatch_size:
grid_search: [128, 256, 512]
observation_fliter: MeanStdFilter

View file

@ -15,3 +15,4 @@ pendulum-ppo:
model:
fcnet_hiddens: [64, 64]
batch_mode: complete_episodes
observation_fliter: MeanStdFilter

View file

@ -7,3 +7,4 @@ cartpole-ppo:
config:
num_workers: 1
batch_mode: complete_episodes
observation_filter: MeanStdFilter

View file

@ -17,3 +17,4 @@ pendulum-ppo:
model:
fcnet_hiddens: [64, 64]
batch_mode: complete_episodes
observation_filter: MeanStdFilter

View file

@ -10,3 +10,4 @@ walker2d-v1-ppo:
num_workers: 64
num_gpus: 4
batch_mode: complete_episodes
observation_filter: MeanStdFilter