From 0d5dd16f7fd54a8b758da7af93f50214a71669de Mon Sep 17 00:00:00 2001 From: Valentin Boettcher Date: Sat, 8 Jun 2024 12:23:22 -0400 Subject: [PATCH] basic version --- .direnv/bin/nix-direnv-reload | 19 ++++ .envrc | 1 + .gitignore | 1 + fetch_latest_kindle.py | 172 +++++++++++++++++++++++++++++++++ flake.lock | 175 ++++++++++++++++++++++++++++++++++ flake.nix | 55 +++++++++++ poetry.lock | 17 ++++ pyproject.toml | 16 ++++ 8 files changed, 456 insertions(+) create mode 100755 .direnv/bin/nix-direnv-reload create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 fetch_latest_kindle.py create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 poetry.lock create mode 100644 pyproject.toml diff --git a/.direnv/bin/nix-direnv-reload b/.direnv/bin/nix-direnv-reload new file mode 100755 index 0000000..29d97de --- /dev/null +++ b/.direnv/bin/nix-direnv-reload @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -e +if [[ ! -d "/home/hiro/.scripts/kindlehack" ]]; then + echo "Cannot find source directory; Did you move it?" + echo "(Looking for "/home/hiro/.scripts/kindlehack")" + echo 'Cannot force reload with this script - use "direnv reload" manually and then try again' + exit 1 +fi + +# rebuild the cache forcefully +_nix_direnv_force_reload=1 direnv exec "/home/hiro/.scripts/kindlehack" true + +# Update the mtime for .envrc. +# This will cause direnv to reload again - but without re-building. +touch "/home/hiro/.scripts/kindlehack/.envrc" + +# Also update the timestamp of whatever profile_rc we have. +# This makes sure that we know we are up to date. +touch -r "/home/hiro/.scripts/kindlehack/.envrc" "/home/hiro/.scripts/kindlehack/.direnv"/*.rc diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7ad6275 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.direnv diff --git a/fetch_latest_kindle.py b/fetch_latest_kindle.py new file mode 100644 index 0000000..e2c41ea --- /dev/null +++ b/fetch_latest_kindle.py @@ -0,0 +1,172 @@ +#! /usr/bin/env python +import glob +import os +from pathlib import Path +import re +import time +import shutil +import urllib.request +import asyncio +from aioimaplib import aioimaplib +from collections import namedtuple +import re +from asyncio import run, wait_for +from collections import namedtuple +from email.message import Message +from email.parser import BytesHeaderParser, BytesParser +from typing import Collection +from contextlib import suppress + +KINDLE_DIR = Path.home() / "kindle_dump/" + +LATEST_PATH = KINDLE_DIR / "latest.pdf" + + +def get_document_title(header_string): + m = re.search(r'"(.*?)" from your Kindle', header_string) + + if not m: + return None + + return m.group(1) + + +def get_download_link(text): + m = re.search(r"\[Download PDF\]\((.*?)\)", text) + + if not m: + return None, None + + p = re.search(r"([0-9]+) page", text) + page = p.group(1) if p else None + return m.group(1), page + + +# LAST_LINK = None +# while True: +# current_link = monitor_kindle() + +# if current_link != LAST_LINK and current_link is not None: +# LAST_LINK = current_link +# print("Downloading:", LAST_LINK) +# + +# time.sleep(5) + + +ID_HEADER_SET = { + "Subject", +} +FETCH_MESSAGE_DATA_UID = re.compile(rb".*UID (?P\d+).*") +FETCH_MESSAGE_DATA_SEQNUM = re.compile(rb"(?P\d+) FETCH.*") +FETCH_MESSAGE_DATA_FLAGS = re.compile(rb".*FLAGS \((?P.*?)\).*") +MessageAttributes = namedtuple("MessageAttributes", "uid flags sequence_number") + + +async def fetch_messages_headers(imap_client: aioimaplib.IMAP4_SSL, max_uid: int): + response = await imap_client.uid( + "fetch", + "%d:*" % (max_uid + 1), + "(UID FLAGS BODY.PEEK[HEADER.FIELDS (%s)])" % " ".join(ID_HEADER_SET), + ) + new_max_uid = max_uid + message_headers = "" + if response.result == "OK": + for i in range(0, len(response.lines) - 1, 3): + fetch_command_without_literal = b"%s %s" % ( + response.lines[i], + response.lines[i + 2], + ) + + uid = int( + FETCH_MESSAGE_DATA_UID.match(fetch_command_without_literal).group("uid") + ) + flags = FETCH_MESSAGE_DATA_FLAGS.match(fetch_command_without_literal).group( + "flags" + ) + seqnum = FETCH_MESSAGE_DATA_SEQNUM.match( + fetch_command_without_literal + ).group("seqnum") + # these attributes could be used for local state management + message_attrs = MessageAttributes(uid, flags, seqnum) + + # uid fetch always includes the UID of the last message in the mailbox + # cf https://tools.ietf.org/html/rfc3501#page-61 + if uid > max_uid: + message_headers = BytesHeaderParser().parsebytes(response.lines[i + 1]) + new_max_uid = uid + else: + print("error %s" % response) + return new_max_uid, message_headers + + +async def fetch_message_body(imap_client: aioimaplib.IMAP4_SSL, uid: int): + dwnld_resp = await imap_client.uid("fetch", str(uid), "BODY.PEEK[]") + return BytesParser().parsebytes(dwnld_resp.lines[1]) + + +async def wait_for_new_message(imap_client): + persistent_max_uid = 1 + persistent_max_uid, head = await fetch_messages_headers( + imap_client, persistent_max_uid + ) + while True: + idle_task = await imap_client.idle_start(timeout=60) + msg = await imap_client.wait_server_push() + print(msg) + imap_client.idle_done() + await wait_for(idle_task, timeout=5) + + for message in msg: + if message.endswith(b"EXISTS"): + persistent_max_uid, head = await fetch_messages_headers( + imap_client, persistent_max_uid + ) + + if not head: + continue + + body = await fetch_message_body(imap_client, persistent_max_uid) + + doc_title = get_document_title(head.as_string()) + + if doc_title is None: + continue + + link, page = get_download_link(str(body)) + + if link is None: + continue + + filename = f"{doc_title.replace(' ','')}" + + if page: + filename += f"_{page}_pages" + + filename += ".pdf" + + print(f"Got '{doc_title}'") + urllib.request.urlretrieve(link, LATEST_PATH) + shutil.copy(LATEST_PATH, KINDLE_DIR / filename) + + # await asyncio.wait_for(idle_task, timeout=5) + # print("ending idle") + + +async def make_client(host, user, password): + imap_client = aioimaplib.IMAP4_SSL(host=host) + await imap_client.wait_hello_from_server() + await imap_client.login(user, password) + + await imap_client.select("Kindle") + + return imap_client + + +if __name__ == "__main__": + loop = asyncio.get_event_loop() + client = loop.run_until_complete( + make_client("protagon.space", "hiro@protagon.space", "DsAgeviNZ.") + ) + loop.run_until_complete(wait_for_new_message(client)) + loop.run_until_complete(client.logout()) diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..04337bc --- /dev/null +++ b/flake.lock @@ -0,0 +1,175 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nix-github-actions": { + "inputs": { + "nixpkgs": [ + "poetry2nix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1703863825, + "narHash": "sha256-rXwqjtwiGKJheXB43ybM8NwWB8rO2dSRrEqes0S7F5Y=", + "owner": "nix-community", + "repo": "nix-github-actions", + "rev": "5163432afc817cf8bd1f031418d1869e4c9d5547", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nix-github-actions", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1717839683, + "narHash": "sha256-kC0eyEsfpXbtQ2Ee5kgjmLFSVsLgnqpj10LjEi+fK+g=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "3f3a01219b4a1b6ee63039a3aa63948fbf37c0dd", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable-small", + "repo": "nixpkgs", + "type": "github" + } + }, + "poetry2nix": { + "inputs": { + "flake-utils": "flake-utils_2", + "nix-github-actions": "nix-github-actions", + "nixpkgs": [ + "nixpkgs" + ], + "systems": "systems_3", + "treefmt-nix": "treefmt-nix" + }, + "locked": { + "lastModified": 1717774136, + "narHash": "sha256-comOhXDFUrbVba47gPenVBKy2foM3m3qOqpcP8umWDA=", + "owner": "nix-community", + "repo": "poetry2nix", + "rev": "370da3b6fefc6c11367463b68d010f9950aaa80c", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "poetry2nix", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "poetry2nix": "poetry2nix" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_3": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "id": "systems", + "type": "indirect" + } + }, + "treefmt-nix": { + "inputs": { + "nixpkgs": [ + "poetry2nix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1717278143, + "narHash": "sha256-u10aDdYrpiGOLoxzY/mJ9llST9yO8Q7K/UlROoNxzDw=", + "owner": "numtide", + "repo": "treefmt-nix", + "rev": "3eb96ca1ae9edf792a8e0963cc92fddfa5a87706", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "treefmt-nix", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..ba31860 --- /dev/null +++ b/flake.nix @@ -0,0 +1,55 @@ +{ + description = "A little python tool to automatically fetch my current kindle notes."; + + inputs = { + flake-utils.url = "github:numtide/flake-utils"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small"; + poetry2nix = { + url = "github:nix-community/poetry2nix"; + inputs.nixpkgs.follows = "nixpkgs"; + }; + }; + + outputs = inputs @ { self, nixpkgs, flake-utils, ... }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.${system}; + poetry2nix = inputs.poetry2nix.lib.mkPoetry2Nix { inherit pkgs; }; + in + { + packages = { + kindleFetch = poetry2nix.mkPoetryApplication { + projectDir = self; + preferWheels = true; + }; + default = self.packages.${system}.kindleFetch; + }; + + # Shell for app dependencies. + # + # nix develop + # + # Use this shell for developing your app. + devShells.default = pkgs.mkShell { + inputsFrom = [ self.packages.${system}.kindleFetch ]; + package = with pkgs; [ + ruff + pyright + python3Packages.jupyter + ]; + + shellHook = '' + export PYTHONPATH=$(pwd)/src:$PYTHONPATH + ''; + }; + + # Shell for poetry. + # + # nix develop .#poetry + # + # Use this shell for changes to pyproject.toml and poetry.lock. + devShells.poetry = pkgs.mkShell { + packages = [ pkgs.poetry ]; + }; + }); +} diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..4718dd2 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,17 @@ +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "aioimaplib" +version = "1.0.1" +description = "Python asyncio IMAP4rev1 client library" +optional = false +python-versions = "*" +files = [ + {file = "aioimaplib-1.0.1-py3-none-any.whl", hash = "sha256:ebf8387a8febf4d4ae21b35a54cc7c9f4b77c3a7132e3e50c7d6a14993b07bf3"}, + {file = "aioimaplib-1.0.1.tar.gz", hash = "sha256:287fd8a2386f2ff301ac4f8479633292dae80e223e8e40ce2f38038784822afa"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.11" +content-hash = "2bd870880d7a21b2c10ae350af75db6f5287116caaa33d0f55d048c4c28dce36" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b7575d9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,16 @@ +[tool.poetry] +name = "kindlefetch" +version = "0.1.0" +description = "" +authors = ["Valentin Boettcher "] +license = "GPL3" +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.11" +aioimaplib = "^1.0.1" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api"