From 445400d727d2e462bd2db89e82c1ab568bbd46e4 Mon Sep 17 00:00:00 2001 From: Antoni Baum Date: Fri, 10 Jun 2022 02:03:53 +0200 Subject: [PATCH] [CI] Print a summary of broken links in LinkCheck (#25634) --- doc/source/conf.py | 5 +++ doc/source/custom_directives.py | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/doc/source/conf.py b/doc/source/conf.py index 9527ec8ca..9c17ae6bc 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -306,3 +306,8 @@ def setup(app): app.connect("builder-inited", github_docs.write_new_docs) # Restore original file content after build app.connect("build-finished", github_docs.write_original_docs) + + # Hook into the logger used by linkcheck to display a summary at the end. + linkcheck_summarizer = LinkcheckSummarizer() + app.connect("builder-inited", linkcheck_summarizer.add_handler_to_linkcheck) + app.connect("build-finished", linkcheck_summarizer.summarize) diff --git a/doc/source/custom_directives.py b/doc/source/custom_directives.py index fab2efe7b..f8629cdc1 100644 --- a/doc/source/custom_directives.py +++ b/doc/source/custom_directives.py @@ -6,6 +6,12 @@ import mock import sys from preprocess_github_markdown import preprocess_github_markdown_file +from sphinx.util import logging as sphinx_logging +import logging +import logging.handlers +from queue import Queue +from sphinx.util.console import red # type: ignore + # Note: the scipy import has to stay here, it's used implicitly down the line import scipy.stats # noqa: F401 import scipy.linalg # noqa: F401 @@ -15,6 +21,7 @@ __all__ = [ "DownloadAndPreprocessEcosystemDocs", "mock_modules", "update_context", + "LinkcheckSummarizer", ] try: @@ -261,3 +268,64 @@ class DownloadAndPreprocessEcosystemDocs: def __call__(self): self.write_new_docs() + + +class _BrokenLinksQueue(Queue): + """Queue that discards messages about non-broken links.""" + + def __init__(self, maxsize: int = 0) -> None: + self._last_line_no = None + super().__init__(maxsize) + + def put(self, item: logging.LogRecord, block=True, timeout=None): + message = item.getMessage() + # line nos are separate records + if ": line" in message: + self._last_line_no = item + # same formatting as in sphinx.builders.linkcheck + # to avoid false positives if "broken" is in url + if red("broken ") in message or "broken link:" in message: + if self._last_line_no: + super().put(self._last_line_no, block=block, timeout=timeout) + self._last_line_no = None + return super().put(item, block=block, timeout=timeout) + + +class _QueueHandler(logging.handlers.QueueHandler): + """QueueHandler without modifying the record.""" + + def prepare(self, record: logging.LogRecord) -> logging.LogRecord: + return record + + +class LinkcheckSummarizer: + """Hook into the logger used by linkcheck to display a summary at the end.""" + + def __init__(self) -> None: + self.logger = None + self.queue_handler = None + self.log_queue = _BrokenLinksQueue() + + def add_handler_to_linkcheck(self, *args, **kwargs): + """Adds a handler to the linkcheck logger.""" + self.logger = sphinx_logging.getLogger("sphinx.builders.linkcheck") + self.queue_handler = _QueueHandler(self.log_queue) + if not self.logger.hasHandlers(): + # If there are no handlers, add the one that would + # be used anyway. + self.logger.logger.addHandler(logging.lastResort) + self.logger.logger.addHandler(self.queue_handler) + + def summarize(self, *args, **kwargs): + """Summarizes broken links.""" + self.logger.logger.removeHandler(self.queue_handler) + + self.logger.info("\nBROKEN LINKS SUMMARY:\n") + has_broken_links = False + while self.log_queue.qsize() > 0: + has_broken_links = True + record: logging.LogRecord = self.log_queue.get() + self.logger.handle(record) + + if not has_broken_links: + self.logger.info("No broken links found!")