mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[rllib] Set PPO observation filter to NoFilter by default (#4191)
This commit is contained in:
parent
11a28834fa
commit
b5799b5286
11 changed files with 21 additions and 10 deletions
|
@ -332,6 +332,7 @@ class Agent(Trainable):
|
|||
merged_config = deep_update(merged_config, config,
|
||||
self._allow_unknown_configs,
|
||||
self._allow_unknown_subkeys)
|
||||
self.raw_user_config = config
|
||||
self.config = merged_config
|
||||
Agent._validate_config(self.config)
|
||||
if self.config.get("log_level"):
|
||||
|
|
|
@ -51,7 +51,7 @@ DEFAULT_CONFIG = with_common_config({
|
|||
# Whether to rollout "complete_episodes" or "truncate_episodes"
|
||||
"batch_mode": "truncate_episodes",
|
||||
# Which observation filter to apply to the observation
|
||||
"observation_filter": "MeanStdFilter",
|
||||
"observation_filter": "NoFilter",
|
||||
# Uses the sync samples optimizer instead of the multi-gpu one. This does
|
||||
# not support minibatches.
|
||||
"simple_optimizer": False,
|
||||
|
@ -99,6 +99,14 @@ class PPOAgent(Agent):
|
|||
|
||||
@override(Agent)
|
||||
def _train(self):
|
||||
if "observation_filter" not in self.raw_user_config:
|
||||
# TODO(ekl) remove this message after a few releases
|
||||
logger.info(
|
||||
"Important! Since 0.7.0, observation normalization is no "
|
||||
"longer enabled by default. To enable running-mean "
|
||||
"normalization, set 'observation_filter': 'MeanStdFilter'. "
|
||||
"You can ignore this message if your environment doesn't "
|
||||
"require observation normalization.")
|
||||
prev_steps = self.optimizer.num_steps_sampled
|
||||
fetches = self.optimizer.step()
|
||||
if "kl" in fetches:
|
||||
|
@ -139,7 +147,6 @@ class PPOAgent(Agent):
|
|||
"{} iterations for your value ".format(rew_scale) +
|
||||
"function to converge. If this is not intended, consider "
|
||||
"increasing `vf_clip_param`.")
|
||||
|
||||
return res
|
||||
|
||||
def _validate_config(self):
|
||||
|
@ -159,13 +166,7 @@ class PPOAgent(Agent):
|
|||
"In multi-agent mode, policies will be optimized sequentially "
|
||||
"by the multi-GPU optimizer. Consider setting "
|
||||
"simple_optimizer=True if this doesn't work for you.")
|
||||
if self.config["observation_filter"] != "NoFilter":
|
||||
logger.warning(
|
||||
"By default, observations will be normalized with {}. ".format(
|
||||
self.config["observation_filter"]) +
|
||||
"If you are using image or discrete type observations, "
|
||||
"consider disabling this with observation_filter=NoFilter.")
|
||||
if not self.config["vf_share_layers"]:
|
||||
logger.warning(
|
||||
"By default, the value function will NOT share layers with "
|
||||
"the policy model (vf_share_layers=False).")
|
||||
"FYI: By default, the value function will not share layers "
|
||||
"with the policy model ('vf_share_layers': False).")
|
||||
|
|
|
@ -20,3 +20,4 @@ halfcheetah-ppo:
|
|||
num_envs_per_worker:
|
||||
grid_search: [16, 32]
|
||||
batch_mode: truncate_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
|
@ -11,3 +11,4 @@ hopper-ppo:
|
|||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
|
@ -18,3 +18,4 @@ humanoid-ppo-gae:
|
|||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
|
@ -16,3 +16,4 @@ humanoid-ppo:
|
|||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
|
@ -11,3 +11,4 @@ cartpole-ppo:
|
|||
grid_search: [1, 4]
|
||||
sgd_minibatch_size:
|
||||
grid_search: [128, 256, 512]
|
||||
observation_fliter: MeanStdFilter
|
||||
|
|
|
@ -15,3 +15,4 @@ pendulum-ppo:
|
|||
model:
|
||||
fcnet_hiddens: [64, 64]
|
||||
batch_mode: complete_episodes
|
||||
observation_fliter: MeanStdFilter
|
||||
|
|
|
@ -7,3 +7,4 @@ cartpole-ppo:
|
|||
config:
|
||||
num_workers: 1
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
|
@ -17,3 +17,4 @@ pendulum-ppo:
|
|||
model:
|
||||
fcnet_hiddens: [64, 64]
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
|
@ -10,3 +10,4 @@ walker2d-v1-ppo:
|
|||
num_workers: 64
|
||||
num_gpus: 4
|
||||
batch_mode: complete_episodes
|
||||
observation_filter: MeanStdFilter
|
||||
|
|
Loading…
Add table
Reference in a new issue