mirror of
https://github.com/vale981/ray
synced 2025-03-06 02:21:39 -05:00
[RLlib] De-flake 3 test cases; Fix config.simple_optimizer
and SampleBatch.is_training
warnings. (#17321)
This commit is contained in:
parent
e70d84953e
commit
90b21ce27e
8 changed files with 44 additions and 57 deletions
59
rllib/BUILD
59
rllib/BUILD
|
@ -157,20 +157,19 @@ py_test(
|
|||
)
|
||||
|
||||
# CQL
|
||||
# Skipping due to high flakiness.
|
||||
#py_test(
|
||||
# name = "run_regression_tests_pendulum_cql_tf",
|
||||
# main = "tests/run_regression_tests.py",
|
||||
# tags = ["learning_tests_tf", "learning_tests_pendulum", "flaky"],
|
||||
# size = "large",
|
||||
# srcs = ["tests/run_regression_tests.py"],
|
||||
# # Include the zipped json data file as well.
|
||||
# data = [
|
||||
# "tuned_examples/cql/pendulum-cql.yaml",
|
||||
# "tests/data/pendulum/enormous.zip",
|
||||
# ],
|
||||
# args = ["--yaml-dir=tuned_examples/cql"]
|
||||
#)
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum_cql_tf",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests_tf", "learning_tests_pendulum", "flaky"],
|
||||
size = "large",
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
# Include the zipped json data file as well.
|
||||
data = [
|
||||
"tuned_examples/cql/pendulum-cql.yaml",
|
||||
"tests/data/pendulum/enormous.zip",
|
||||
],
|
||||
args = ["--yaml-dir=tuned_examples/cql"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum_cql_torch",
|
||||
|
@ -505,16 +504,15 @@ py_test(
|
|||
args = ["--yaml-dir=tuned_examples/sac"]
|
||||
)
|
||||
|
||||
# Skipping due to high flakiness.
|
||||
#py_test(
|
||||
# name = "run_regression_tests_pendulum_sac_torch",
|
||||
# main = "tests/run_regression_tests.py",
|
||||
# tags = ["learning_tests_torch", "learning_tests_pendulum", "flaky"],
|
||||
# size = "large",
|
||||
# srcs = ["tests/run_regression_tests.py"],
|
||||
# data = ["tuned_examples/sac/pendulum-sac.yaml"],
|
||||
# args = ["--yaml-dir=tuned_examples/sac", "--framework=torch"]
|
||||
#)
|
||||
py_test(
|
||||
name = "run_regression_tests_pendulum_sac_torch",
|
||||
main = "tests/run_regression_tests.py",
|
||||
tags = ["learning_tests_torch", "learning_tests_pendulum", "flaky"],
|
||||
size = "large",
|
||||
srcs = ["tests/run_regression_tests.py"],
|
||||
data = ["tuned_examples/sac/pendulum-sac.yaml"],
|
||||
args = ["--yaml-dir=tuned_examples/sac", "--framework=torch"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "run_regression_tests_transformed_actions_pendulum_sac_tf",
|
||||
|
@ -1378,13 +1376,12 @@ py_test(
|
|||
# Tag: utils
|
||||
# --------------------------------------------------------------------
|
||||
|
||||
# Skipping due to high flakiness.
|
||||
#py_test(
|
||||
# name = "test_curiosity",
|
||||
# tags = ["utils", "flaky"],
|
||||
# size = "large",
|
||||
# srcs = ["utils/exploration/tests/test_curiosity.py"]
|
||||
#)
|
||||
py_test(
|
||||
name = "test_curiosity",
|
||||
tags = ["utils", "flaky"],
|
||||
size = "large",
|
||||
srcs = ["utils/exploration/tests/test_curiosity.py"]
|
||||
)
|
||||
|
||||
py_test(
|
||||
name = "test_explorations",
|
||||
|
|
|
@ -5,7 +5,6 @@ from ray.rllib.agents.trainer import with_common_config
|
|||
from ray.rllib.agents.dqn.dqn import GenericOffPolicyTrainer
|
||||
from ray.rllib.agents.ddpg.ddpg_tf_policy import DDPGTFPolicy
|
||||
from ray.rllib.policy.policy import Policy
|
||||
from ray.rllib.utils.deprecation import DEPRECATED_VALUE
|
||||
from ray.rllib.utils.typing import TrainerConfigDict
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -188,11 +187,6 @@ def validate_config(config: TrainerConfigDict) -> None:
|
|||
"'complete_episodes'. Setting batch_mode=complete_episodes.")
|
||||
config["batch_mode"] = "complete_episodes"
|
||||
|
||||
if config["simple_optimizer"] != DEPRECATED_VALUE or \
|
||||
config["simple_optimizer"] is False:
|
||||
logger.warning("`simple_optimizer` must be True (or unset) for DDPG!")
|
||||
config["simple_optimizer"] = True
|
||||
|
||||
|
||||
def get_policy_class(config: TrainerConfigDict) -> Optional[Type[Policy]]:
|
||||
"""Policy class picker function. Class is chosen based on DL-framework.
|
||||
|
|
|
@ -182,11 +182,6 @@ def validate_config(config: TrainerConfigDict) -> None:
|
|||
if config["grad_clip"] is not None and config["grad_clip"] <= 0.0:
|
||||
raise ValueError("`grad_clip` value must be > 0.0!")
|
||||
|
||||
if config["simple_optimizer"] != DEPRECATED_VALUE or \
|
||||
config["simple_optimizer"] is False:
|
||||
logger.warning("`simple_optimizer` must be True (or unset) for SAC!")
|
||||
config["simple_optimizer"] = True
|
||||
|
||||
|
||||
def get_policy_class(config: TrainerConfigDict) -> Optional[Type[Policy]]:
|
||||
"""Policy class picker function. Class is chosen based on DL-framework.
|
||||
|
|
|
@ -428,9 +428,9 @@ class TestSAC(unittest.TestCase):
|
|||
check(
|
||||
tf_var,
|
||||
np.transpose(torch_var.detach().cpu()),
|
||||
rtol=0.1)
|
||||
atol=0.002)
|
||||
else:
|
||||
check(tf_var, torch_var, rtol=0.1)
|
||||
check(tf_var, torch_var, atol=0.002)
|
||||
# And alpha.
|
||||
check(policy.model.log_alpha,
|
||||
tf_weights["default_policy/log_alpha"])
|
||||
|
@ -445,9 +445,10 @@ class TestSAC(unittest.TestCase):
|
|||
check(
|
||||
tf_var,
|
||||
np.transpose(torch_var.detach().cpu()),
|
||||
rtol=0.1)
|
||||
atol=0.002)
|
||||
else:
|
||||
check(tf_var, torch_var, rtol=0.1)
|
||||
check(tf_var, torch_var, atol=0.002)
|
||||
trainer.stop()
|
||||
|
||||
def _get_batch_helper(self, obs_size, actions, batch_size):
|
||||
return SampleBatch({
|
||||
|
|
|
@ -241,7 +241,7 @@ class DynamicTFPolicy(TFPolicy):
|
|||
True, (), name="is_exploring")
|
||||
|
||||
# Placeholder for `is_training` flag.
|
||||
self._input_dict["is_training"] = self._get_is_training_placeholder()
|
||||
self._input_dict.is_training = self._get_is_training_placeholder()
|
||||
|
||||
# Multi-GPU towers do not need any action computing/exploration
|
||||
# graphs.
|
||||
|
@ -266,7 +266,7 @@ class DynamicTFPolicy(TFPolicy):
|
|||
prev_reward_batch=self._input_dict.get(
|
||||
SampleBatch.PREV_REWARDS),
|
||||
explore=explore,
|
||||
is_training=self._input_dict["is_training"])
|
||||
is_training=self._input_dict.is_training)
|
||||
# Distribution generation is customized, e.g., DQN, DDPG.
|
||||
else:
|
||||
if action_distribution_fn:
|
||||
|
@ -284,7 +284,7 @@ class DynamicTFPolicy(TFPolicy):
|
|||
seq_lens=self._seq_lens,
|
||||
explore=explore,
|
||||
timestep=timestep,
|
||||
is_training=in_dict["is_training"])
|
||||
is_training=in_dict.is_training)
|
||||
# Trying the old way (to stay backward compatible).
|
||||
# TODO: Remove in future.
|
||||
except TypeError as e:
|
||||
|
@ -301,7 +301,7 @@ class DynamicTFPolicy(TFPolicy):
|
|||
prev_reward_batch=in_dict.get(
|
||||
SampleBatch.PREV_REWARDS),
|
||||
explore=explore,
|
||||
is_training=in_dict["is_training"])
|
||||
is_training=in_dict.is_training)
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
@ -379,6 +379,9 @@ class DynamicTFPolicy(TFPolicy):
|
|||
self.config.get("num_multi_gpu_tower_stacks", 1))
|
||||
]
|
||||
|
||||
# Initialize again after loss and tower init.
|
||||
self.get_session().run(tf1.global_variables_initializer())
|
||||
|
||||
@override(TFPolicy)
|
||||
@DeveloperAPI
|
||||
def copy(self,
|
||||
|
@ -693,9 +696,6 @@ class DynamicTFPolicy(TFPolicy):
|
|||
if (v not in self._state_inputs and v != self._seq_lens)
|
||||
}
|
||||
|
||||
# Initialize again after loss init.
|
||||
self.get_session().run(tf1.global_variables_initializer())
|
||||
|
||||
def _do_loss_init(self, train_batch: SampleBatch):
|
||||
loss = self._loss_fn(self, self.model, self.dist_class, train_batch)
|
||||
if self._stats_fn:
|
||||
|
|
|
@ -6,7 +6,7 @@ pendulum-cql:
|
|||
env: Pendulum-v0
|
||||
run: CQL
|
||||
stop:
|
||||
evaluation/episode_reward_mean: -600
|
||||
evaluation/episode_reward_mean: -700
|
||||
timesteps_total: 100000
|
||||
config:
|
||||
# Works for both torch and tf.
|
||||
|
|
|
@ -4,7 +4,7 @@ pendulum-sac:
|
|||
env: Pendulum-v0
|
||||
run: SAC
|
||||
stop:
|
||||
episode_reward_mean: -500
|
||||
episode_reward_mean: -600
|
||||
timesteps_total: 10000
|
||||
config:
|
||||
# Works for both torch and tf.
|
||||
|
@ -33,6 +33,6 @@ pendulum-sac:
|
|||
entropy_learning_rate: 0.0003
|
||||
num_workers: 0
|
||||
num_gpus: 0
|
||||
clip_actions: False
|
||||
clip_actions: false
|
||||
normalize_actions: true
|
||||
metrics_smoothing_episodes: 5
|
||||
|
|
|
@ -153,7 +153,7 @@ class TestCuriosity(unittest.TestCase):
|
|||
config["lr"] = 0.001
|
||||
|
||||
num_iterations = 10
|
||||
for fw in framework_iterator(config):
|
||||
for _ in framework_iterator(config, frameworks=("tf", "torch")):
|
||||
# W/ Curiosity. Expect to learn something.
|
||||
config["exploration_config"] = {
|
||||
"type": "Curiosity",
|
||||
|
|
Loading…
Add table
Reference in a new issue