[rllib] Use NoFilter instead of MeanStdFilter for PPO. (#1082)

* Make NoFilter the default observation filter for PPO.

* Make reward filter NoFilter for PPO.
This commit is contained in:
Robert Nishihara 2017-10-04 21:31:17 -07:00 committed by Philipp Moritz
parent a72084c568
commit 971becc905
2 changed files with 2 additions and 2 deletions

View file

@ -56,7 +56,7 @@ DEFAULT_CONFIG = {
# Config params to pass to the model
"model": {"free_log_std": False},
# Which observation filter to apply to the observation
"observation_filter": "MeanStdFilter",
"observation_filter": "NoFilter",
# If >1, adds frameskip
"extra_frameskip": 1,
# Number of timesteps collected in each outer loop

View file

@ -148,7 +148,7 @@ class Runner(object):
else:
raise Exception("Unknown observation_filter: " +
str(config["observation_filter"]))
self.reward_filter = MeanStdFilter((), clip=5.0)
self.reward_filter = NoFilter()
self.sess.run(tf.global_variables_initializer())
def load_data(self, trajectories, full_trace):