[tune] Fix empty CSV headers on trial restart (#23860)

What: Only open (create) CSV files when actually reporting results.
Why: When trials crash before they report first (e.g. on init), they will have created an empty CSV file. When results are subsequently written, the CSV header is then missing.
This commit is contained in:
Kai Fricke 2022-04-12 21:05:29 +01:00 committed by GitHub
parent ff60ebd4b3
commit 4cb6205726
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 43 additions and 9 deletions

View file

@ -130,13 +130,22 @@ class CSVLogger(Logger):
"""
def _init(self):
self._initialized = False
def _maybe_init(self):
"""CSV outputted with Headers as first set of results."""
progress_file = os.path.join(self.logdir, EXPR_PROGRESS_FILE)
self._continuing = os.path.exists(progress_file)
self._file = open(progress_file, "a")
self._csv_out = None
if not self._initialized:
progress_file = os.path.join(self.logdir, EXPR_PROGRESS_FILE)
self._continuing = (
os.path.exists(progress_file) and os.path.getsize(progress_file) > 0
)
self._file = open(progress_file, "a")
self._csv_out = None
self._initialized = True
def on_result(self, result: Dict):
self._maybe_init()
tmp = result.copy()
if "config" in tmp:
del tmp["config"]
@ -151,11 +160,12 @@ class CSVLogger(Logger):
self._file.flush()
def flush(self):
if not self._file.closed:
if self._initialized and not self._file.closed:
self._file.flush()
def close(self):
self._file.close()
if self._initialized:
self._file.close()
class TBXLogger(Logger):
@ -556,20 +566,22 @@ class CSVLoggerCallback(LoggerCallback):
self._trial_files: Dict["Trial", TextIO] = {}
self._trial_csv: Dict["Trial", csv.DictWriter] = {}
def log_trial_start(self, trial: "Trial"):
def _setup_trial(self, trial: "Trial"):
if trial in self._trial_files:
self._trial_files[trial].close()
# Make sure logdir exists
trial.init_logdir()
local_file = os.path.join(trial.logdir, EXPR_PROGRESS_FILE)
self._trial_continue[trial] = os.path.exists(local_file)
self._trial_continue[trial] = (
os.path.exists(local_file) and os.path.getsize(local_file) > 0
)
self._trial_files[trial] = open(local_file, "at")
self._trial_csv[trial] = None
def log_trial_result(self, iteration: int, trial: "Trial", result: Dict):
if trial not in self._trial_files:
self.log_trial_start(trial)
self._setup_trial(trial)
tmp = result.copy()
tmp.pop("config", None)

View file

@ -81,6 +81,28 @@ class LoggerSuite(unittest.TestCase):
logger.on_trial_complete(3, [], t)
self._validate_csv_result()
def testCSVEmptyHeader(self):
"""Test that starting a trial twice does not lead to empty CSV headers.
In a previous bug, the CSV header was sometimes missing when a trial
crashed before reporting results. See
https://github.com/ray-project/ray/issues/15106
"""
config = {"a": 2, "b": 5, "c": {"c": {"D": 123}, "e": None}}
t = Trial(evaluated_params=config, trial_id="csv", logdir=self.test_dir)
logger = CSVLoggerCallback()
logger.on_trial_start(0, [], t)
logger.on_trial_start(0, [], t)
logger.on_trial_result(1, [], t, result(1, 5))
with open(os.path.join(self.test_dir, "progress.csv"), "rt") as f:
csv_contents = f.read()
csv_lines = csv_contents.split("\n")
# Assert header has been written to progress.csv
assert "training_iteration" in csv_lines[0]
def _validate_csv_result(self):
results = []
result_file = os.path.join(self.test_dir, EXPR_PROGRESS_FILE)