[tune] fix flaky test (#24037)

This commit is contained in:
xwjiang2010 2022-04-20 02:14:32 -07:00 committed by GitHub
parent 34fb092656
commit a34dcfce85
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 12 deletions

View file

@ -76,7 +76,7 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir, searcher):
# The trainable returns every 0.5 seconds, so this should not miss
# the checkpoint.
trials = []
for i in range(50):
for i in range(100):
if TrialRunner.checkpoint_exists(local_checkpoint_dir):
# Inspect the internal trialrunner
runner = TrialRunner(
@ -105,7 +105,7 @@ def test_cluster_interrupt_searcher(start_connected_cluster, tmpdir, searcher):
register_trainable("trainable", MyTrainableClass)
reached = False
for i in range(50):
for i in range(100):
if TrialRunner.checkpoint_exists(local_checkpoint_dir):
# Inspect the internal trialrunner
runner = TrialRunner(

View file

@ -233,14 +233,23 @@ EXPECTED_SORT_RESULT_DESC = """Number of trials: 5 (1 PENDING, 1 RUNNING, 3 TERM
VERBOSE_EXP_OUT_1 = "Number of trials: 3/3 (2 PENDING, 1 RUNNING)"
VERBOSE_EXP_OUT_2 = "Number of trials: 3/3 (3 TERMINATED)"
VERBOSE_TRIAL_NORM = (
"Trial train_xxxxx_00000 reported acc=5 with "
+ """parameters={'do': 'complete'}. This trial completed.
VERBOSE_TRIAL_NORM_1 = (
"Trial train_xxxxx_00000 reported acc=5 "
"with parameters={'do': 'complete'}. This trial completed.\n"
)
VERBOSE_TRIAL_NORM_2 = """
Trial train_xxxxx_00001 reported _metric=6 with parameters={'do': 'once'}.
Trial train_xxxxx_00001 completed. Last result: _metric=6
"""
VERBOSE_TRIAL_NORM_3 = """
Trial train_xxxxx_00002 reported acc=7 with parameters={'do': 'twice'}.
Trial train_xxxxx_00002 reported acc=8 with parameters={'do': 'twice'}. """
+ "This trial completed."
"""
VERBOSE_TRIAL_NORM_4 = (
"Trial train_xxxxx_00002 reported acc=8 "
"with parameters={'do': 'twice'}. This trial completed.\n"
)
VERBOSE_TRIAL_DETAIL = """+-------------------+----------+-------------------+----------+
@ -602,7 +611,10 @@ class ProgressReporterTest(unittest.TestCase):
try:
self.assertNotIn(VERBOSE_EXP_OUT_1, output)
self.assertNotIn(VERBOSE_EXP_OUT_2, output)
self.assertNotIn(VERBOSE_TRIAL_NORM, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_1, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_2, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_3, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_4, output)
self.assertNotIn(VERBOSE_TRIAL_DETAIL, output)
except Exception:
print("*** BEGIN OUTPUT ***")
@ -615,7 +627,10 @@ class ProgressReporterTest(unittest.TestCase):
try:
self.assertIn(VERBOSE_EXP_OUT_1, output)
self.assertIn(VERBOSE_EXP_OUT_2, output)
self.assertNotIn(VERBOSE_TRIAL_NORM, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_1, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_2, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_3, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_4, output)
self.assertNotIn(VERBOSE_TRIAL_DETAIL, output)
except Exception:
print("*** BEGIN OUTPUT ***")
@ -628,7 +643,10 @@ class ProgressReporterTest(unittest.TestCase):
try:
self.assertIn(VERBOSE_EXP_OUT_1, output)
self.assertIn(VERBOSE_EXP_OUT_2, output)
self.assertIn(VERBOSE_TRIAL_NORM, output)
self.assertIn(VERBOSE_TRIAL_NORM_1, output)
self.assertIn(VERBOSE_TRIAL_NORM_2, output)
self.assertIn(VERBOSE_TRIAL_NORM_3, output)
self.assertIn(VERBOSE_TRIAL_NORM_4, output)
self.assertNotIn(VERBOSE_TRIAL_DETAIL, output)
except Exception:
print("*** BEGIN OUTPUT ***")
@ -641,7 +659,10 @@ class ProgressReporterTest(unittest.TestCase):
try:
self.assertIn(VERBOSE_EXP_OUT_1, output)
self.assertIn(VERBOSE_EXP_OUT_2, output)
self.assertNotIn(VERBOSE_TRIAL_NORM, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_1, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_2, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_3, output)
self.assertNotIn(VERBOSE_TRIAL_NORM_4, output)
self.assertIn(VERBOSE_TRIAL_DETAIL, output)
except Exception:
print("*** BEGIN OUTPUT ***")

View file

@ -341,7 +341,10 @@ class RayTrialExecutorTest(unittest.TestCase):
os.environ["TUNE_FORCE_TRIAL_CLEANUP_S"] = "0"
self._simulate_starting_trial(trial)
self.assertEqual(Trial.RUNNING, trial.status)
time.sleep(1)
# This should be enough time for `trial._default_result_or_future`
# to return. Otherwise, PID won't show up in `trial.last_result`,
# which is asserted down below.
time.sleep(2)
print("Stop trial")
self.trial_executor.stop_trial(trial)
print("Start trial cleanup")