[RLlib] De-flake 3 test cases; Fix config.simple_optimizer and SampleBatch.is_training warnings. (#17321)

This commit is contained in:
Sven Mika 2021-07-27 14:39:06 -04:00 committed by GitHub
parent e70d84953e
commit 90b21ce27e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 44 additions and 57 deletions

View file

@ -157,20 +157,19 @@ py_test(
)
# CQL
# Skipping due to high flakiness.
#py_test(
# name = "run_regression_tests_pendulum_cql_tf",
# main = "tests/run_regression_tests.py",
# tags = ["learning_tests_tf", "learning_tests_pendulum", "flaky"],
# size = "large",
# srcs = ["tests/run_regression_tests.py"],
# # Include the zipped json data file as well.
# data = [
# "tuned_examples/cql/pendulum-cql.yaml",
# "tests/data/pendulum/enormous.zip",
# ],
# args = ["--yaml-dir=tuned_examples/cql"]
#)
py_test(
name = "run_regression_tests_pendulum_cql_tf",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_tf", "learning_tests_pendulum", "flaky"],
size = "large",
srcs = ["tests/run_regression_tests.py"],
# Include the zipped json data file as well.
data = [
"tuned_examples/cql/pendulum-cql.yaml",
"tests/data/pendulum/enormous.zip",
],
args = ["--yaml-dir=tuned_examples/cql"]
)
py_test(
name = "run_regression_tests_pendulum_cql_torch",
@ -505,16 +504,15 @@ py_test(
args = ["--yaml-dir=tuned_examples/sac"]
)
# Skipping due to high flakiness.
#py_test(
# name = "run_regression_tests_pendulum_sac_torch",
# main = "tests/run_regression_tests.py",
# tags = ["learning_tests_torch", "learning_tests_pendulum", "flaky"],
# size = "large",
# srcs = ["tests/run_regression_tests.py"],
# data = ["tuned_examples/sac/pendulum-sac.yaml"],
# args = ["--yaml-dir=tuned_examples/sac", "--framework=torch"]
#)
py_test(
name = "run_regression_tests_pendulum_sac_torch",
main = "tests/run_regression_tests.py",
tags = ["learning_tests_torch", "learning_tests_pendulum", "flaky"],
size = "large",
srcs = ["tests/run_regression_tests.py"],
data = ["tuned_examples/sac/pendulum-sac.yaml"],
args = ["--yaml-dir=tuned_examples/sac", "--framework=torch"]
)
py_test(
name = "run_regression_tests_transformed_actions_pendulum_sac_tf",
@ -1378,13 +1376,12 @@ py_test(
# Tag: utils
# --------------------------------------------------------------------
# Skipping due to high flakiness.
#py_test(
# name = "test_curiosity",
# tags = ["utils", "flaky"],
# size = "large",
# srcs = ["utils/exploration/tests/test_curiosity.py"]
#)
py_test(
name = "test_curiosity",
tags = ["utils", "flaky"],
size = "large",
srcs = ["utils/exploration/tests/test_curiosity.py"]
)
py_test(
name = "test_explorations",

View file

@ -5,7 +5,6 @@ from ray.rllib.agents.trainer import with_common_config
from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer
from ray.rllib.agents.ddpg.ddpg_tf_policy import DDPGTFPolicy
from ray.rllib.policy.policy import Policy
from ray.rllib.utils.deprecation import DEPRECATED_VALUE
from ray.rllib.utils.typing import TrainerConfigDict
logger = logging.getLogger(__name__)
@ -188,11 +187,6 @@ def validate_config(config: TrainerConfigDict) -> None:
"'complete_episodes'. Setting batch_mode=complete_episodes.")
config["batch_mode"] = "complete_episodes"
if config["simple_optimizer"] != DEPRECATED_VALUE or \
config["simple_optimizer"] is False:
logger.warning("`simple_optimizer` must be True (or unset) for DDPG!")
config["simple_optimizer"] = True
def get_policy_class(config: TrainerConfigDict) -> Optional[Type[Policy]]:
"""Policy class picker function. Class is chosen based on DL-framework.

View file

@ -182,11 +182,6 @@ def validate_config(config: TrainerConfigDict) -> None:
if config["grad_clip"] is not None and config["grad_clip"] <= 0.0:
raise ValueError("`grad_clip` value must be > 0.0!")
if config["simple_optimizer"] != DEPRECATED_VALUE or \
config["simple_optimizer"] is False:
logger.warning("`simple_optimizer` must be True (or unset) for SAC!")
config["simple_optimizer"] = True
def get_policy_class(config: TrainerConfigDict) -> Optional[Type[Policy]]:
"""Policy class picker function. Class is chosen based on DL-framework.

View file

@ -428,9 +428,9 @@ class TestSAC(unittest.TestCase):
check(
tf_var,
np.transpose(torch_var.detach().cpu()),
rtol=0.1)
atol=0.002)
else:
check(tf_var, torch_var, rtol=0.1)
check(tf_var, torch_var, atol=0.002)
# And alpha.
check(policy.model.log_alpha,
tf_weights["default_policy/log_alpha"])
@ -445,9 +445,10 @@ class TestSAC(unittest.TestCase):
check(
tf_var,
np.transpose(torch_var.detach().cpu()),
rtol=0.1)
atol=0.002)
else:
check(tf_var, torch_var, rtol=0.1)
check(tf_var, torch_var, atol=0.002)
trainer.stop()
def _get_batch_helper(self, obs_size, actions, batch_size):
return SampleBatch({

View file

@ -241,7 +241,7 @@ class DynamicTFPolicy(TFPolicy):
True, (), name="is_exploring")
# Placeholder for `is_training` flag.
self._input_dict["is_training"] = self._get_is_training_placeholder()
self._input_dict.is_training = self._get_is_training_placeholder()
# Multi-GPU towers do not need any action computing/exploration
# graphs.
@ -266,7 +266,7 @@ class DynamicTFPolicy(TFPolicy):
prev_reward_batch=self._input_dict.get(
SampleBatch.PREV_REWARDS),
explore=explore,
is_training=self._input_dict["is_training"])
is_training=self._input_dict.is_training)
# Distribution generation is customized, e.g., DQN, DDPG.
else:
if action_distribution_fn:
@ -284,7 +284,7 @@ class DynamicTFPolicy(TFPolicy):
seq_lens=self._seq_lens,
explore=explore,
timestep=timestep,
is_training=in_dict["is_training"])
is_training=in_dict.is_training)
# Trying the old way (to stay backward compatible).
# TODO: Remove in future.
except TypeError as e:
@ -301,7 +301,7 @@ class DynamicTFPolicy(TFPolicy):
prev_reward_batch=in_dict.get(
SampleBatch.PREV_REWARDS),
explore=explore,
is_training=in_dict["is_training"])
is_training=in_dict.is_training)
else:
raise e
@ -379,6 +379,9 @@ class DynamicTFPolicy(TFPolicy):
self.config.get("num_multi_gpu_tower_stacks", 1))
]
# Initialize again after loss and tower init.
self.get_session().run(tf1.global_variables_initializer())
@override(TFPolicy)
@DeveloperAPI
def copy(self,
@ -693,9 +696,6 @@ class DynamicTFPolicy(TFPolicy):
if (v not in self._state_inputs and v != self._seq_lens)
}
# Initialize again after loss init.
self.get_session().run(tf1.global_variables_initializer())
def _do_loss_init(self, train_batch: SampleBatch):
loss = self._loss_fn(self, self.model, self.dist_class, train_batch)
if self._stats_fn:

View file

@ -6,7 +6,7 @@ pendulum-cql:
env: Pendulum-v0
run: CQL
stop:
evaluation/episode_reward_mean: -600
evaluation/episode_reward_mean: -700
timesteps_total: 100000
config:
# Works for both torch and tf.

View file

@ -4,7 +4,7 @@ pendulum-sac:
env: Pendulum-v0
run: SAC
stop:
episode_reward_mean: -500
episode_reward_mean: -600
timesteps_total: 10000
config:
# Works for both torch and tf.
@ -33,6 +33,6 @@ pendulum-sac:
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 0
clip_actions: False
clip_actions: false
normalize_actions: true
metrics_smoothing_episodes: 5

View file

@ -153,7 +153,7 @@ class TestCuriosity(unittest.TestCase):
config["lr"] = 0.001
num_iterations = 10
for fw in framework_iterator(config):
for _ in framework_iterator(config, frameworks=("tf", "torch")):
# W/ Curiosity. Expect to learn something.
config["exploration_config"] = {
"type": "Curiosity",