From a6772285f853468508ea616fd23068037c128993 Mon Sep 17 00:00:00 2001 From: Valentin Boettcher Date: Sun, 28 Jul 2024 15:54:35 -0400 Subject: [PATCH] fix: handle newlines correctly and add some more logging now, we at least get notified if something doesn't match expectations --- kindle_fetch/kindle_fetch.py | 13 ++++++++++--- pyproject.toml | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/kindle_fetch/kindle_fetch.py b/kindle_fetch/kindle_fetch.py index 3bc7510..7c55d2b 100644 --- a/kindle_fetch/kindle_fetch.py +++ b/kindle_fetch/kindle_fetch.py @@ -44,7 +44,9 @@ class Options: def get_document_title(header_string): """Get the title of the document from the email header.""" - m = re.search(r'"(.*?)" from your Kindle', header_string) + m = re.search( + r'"(.*?)" from your Kindle', header_string.replace("\n", " ").replace("\r", "") + ) if not m: return None @@ -57,9 +59,11 @@ def get_download_link(text): Get the download link and whether the file is the full document or just `page` pages from the email body. """ - m = re.search(r"\[Download PDF\]\((.*?)\)", text) + m = re.search( + r"\[Download PDF\]\((.*?)\)", text.replace("\n", " ").replace("\r", "") + ) - if not m: + IF not m: return None, None p = re.search(r"([0-9]+) page", text) @@ -164,11 +168,14 @@ async def wait_for_new_message(imap_client, options: Options): doc_title = get_document_title(head.as_string()) if doc_title is None: + logger.info(f"No document title found in '{head.as_string()}'.") continue link, page = get_download_link(str(body)) if link is None: + logger.info("No pdf download link found.") + logger.debug(str(body)) continue filename = f"{doc_title.replace(' ','')}" diff --git a/pyproject.toml b/pyproject.toml index d04174a..bb849d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "kindle_fetch" -version = "0.1.0" +version = "0.1.1" description = "" authors = ["Valentin Boettcher "] license = "GPL3"