mirror of
https://github.com/vale981/ray
synced 2025-03-06 18:41:40 -05:00
[rllib] Use NoFilter instead of MeanStdFilter for PPO. (#1082)
* Make NoFilter the default observation filter for PPO. * Make reward filter NoFilter for PPO.
This commit is contained in:
parent
a72084c568
commit
971becc905
2 changed files with 2 additions and 2 deletions
|
@ -56,7 +56,7 @@ DEFAULT_CONFIG = {
|
||||||
# Config params to pass to the model
|
# Config params to pass to the model
|
||||||
"model": {"free_log_std": False},
|
"model": {"free_log_std": False},
|
||||||
# Which observation filter to apply to the observation
|
# Which observation filter to apply to the observation
|
||||||
"observation_filter": "MeanStdFilter",
|
"observation_filter": "NoFilter",
|
||||||
# If >1, adds frameskip
|
# If >1, adds frameskip
|
||||||
"extra_frameskip": 1,
|
"extra_frameskip": 1,
|
||||||
# Number of timesteps collected in each outer loop
|
# Number of timesteps collected in each outer loop
|
||||||
|
|
|
@ -148,7 +148,7 @@ class Runner(object):
|
||||||
else:
|
else:
|
||||||
raise Exception("Unknown observation_filter: " +
|
raise Exception("Unknown observation_filter: " +
|
||||||
str(config["observation_filter"]))
|
str(config["observation_filter"]))
|
||||||
self.reward_filter = MeanStdFilter((), clip=5.0)
|
self.reward_filter = NoFilter()
|
||||||
self.sess.run(tf.global_variables_initializer())
|
self.sess.run(tf.global_variables_initializer())
|
||||||
|
|
||||||
def load_data(self, trajectories, full_trace):
|
def load_data(self, trajectories, full_trace):
|
||||||
|
|
Loading…
Add table
Reference in a new issue