basic version

This commit is contained in:
Valentin Boettcher 2024-06-08 12:23:22 -04:00
commit 0d5dd16f7f
No known key found for this signature in database
GPG key ID: E034E12B7AF56ACE
8 changed files with 456 additions and 0 deletions

19
.direnv/bin/nix-direnv-reload Executable file
View file

@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -e
if [[ ! -d "/home/hiro/.scripts/kindlehack" ]]; then
echo "Cannot find source directory; Did you move it?"
echo "(Looking for "/home/hiro/.scripts/kindlehack")"
echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
exit 1
fi
# rebuild the cache forcefully
_nix_direnv_force_reload=1 direnv exec "/home/hiro/.scripts/kindlehack" true
# Update the mtime for .envrc.
# This will cause direnv to reload again - but without re-building.
touch "/home/hiro/.scripts/kindlehack/.envrc"
# Also update the timestamp of whatever profile_rc we have.
# This makes sure that we know we are up to date.
touch -r "/home/hiro/.scripts/kindlehack/.envrc" "/home/hiro/.scripts/kindlehack/.direnv"/*.rc

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/.direnv

172
fetch_latest_kindle.py Normal file
View file

@ -0,0 +1,172 @@
#! /usr/bin/env python
import glob
import os
from pathlib import Path
import re
import time
import shutil
import urllib.request
import asyncio
from aioimaplib import aioimaplib
from collections import namedtuple
import re
from asyncio import run, wait_for
from collections import namedtuple
from email.message import Message
from email.parser import BytesHeaderParser, BytesParser
from typing import Collection
from contextlib import suppress
KINDLE_DIR = Path.home() / "kindle_dump/"
LATEST_PATH = KINDLE_DIR / "latest.pdf"
def get_document_title(header_string):
m = re.search(r'"(.*?)" from your Kindle', header_string)
if not m:
return None
return m.group(1)
def get_download_link(text):
m = re.search(r"\[Download PDF\]\((.*?)\)", text)
if not m:
return None, None
p = re.search(r"([0-9]+) page", text)
page = p.group(1) if p else None
return m.group(1), page
# LAST_LINK = None
# while True:
# current_link = monitor_kindle()
# if current_link != LAST_LINK and current_link is not None:
# LAST_LINK = current_link
# print("Downloading:", LAST_LINK)
#
# time.sleep(5)
ID_HEADER_SET = {
"Subject",
}
FETCH_MESSAGE_DATA_UID = re.compile(rb".*UID (?P<uid>\d+).*")
FETCH_MESSAGE_DATA_SEQNUM = re.compile(rb"(?P<seqnum>\d+) FETCH.*")
FETCH_MESSAGE_DATA_FLAGS = re.compile(rb".*FLAGS \((?P<flags>.*?)\).*")
MessageAttributes = namedtuple("MessageAttributes", "uid flags sequence_number")
async def fetch_messages_headers(imap_client: aioimaplib.IMAP4_SSL, max_uid: int):
response = await imap_client.uid(
"fetch",
"%d:*" % (max_uid + 1),
"(UID FLAGS BODY.PEEK[HEADER.FIELDS (%s)])" % " ".join(ID_HEADER_SET),
)
new_max_uid = max_uid
message_headers = ""
if response.result == "OK":
for i in range(0, len(response.lines) - 1, 3):
fetch_command_without_literal = b"%s %s" % (
response.lines[i],
response.lines[i + 2],
)
uid = int(
FETCH_MESSAGE_DATA_UID.match(fetch_command_without_literal).group("uid")
)
flags = FETCH_MESSAGE_DATA_FLAGS.match(fetch_command_without_literal).group(
"flags"
)
seqnum = FETCH_MESSAGE_DATA_SEQNUM.match(
fetch_command_without_literal
).group("seqnum")
# these attributes could be used for local state management
message_attrs = MessageAttributes(uid, flags, seqnum)
# uid fetch always includes the UID of the last message in the mailbox
# cf https://tools.ietf.org/html/rfc3501#page-61
if uid > max_uid:
message_headers = BytesHeaderParser().parsebytes(response.lines[i + 1])
new_max_uid = uid
else:
print("error %s" % response)
return new_max_uid, message_headers
async def fetch_message_body(imap_client: aioimaplib.IMAP4_SSL, uid: int):
dwnld_resp = await imap_client.uid("fetch", str(uid), "BODY.PEEK[]")
return BytesParser().parsebytes(dwnld_resp.lines[1])
async def wait_for_new_message(imap_client):
persistent_max_uid = 1
persistent_max_uid, head = await fetch_messages_headers(
imap_client, persistent_max_uid
)
while True:
idle_task = await imap_client.idle_start(timeout=60)
msg = await imap_client.wait_server_push()
print(msg)
imap_client.idle_done()
await wait_for(idle_task, timeout=5)
for message in msg:
if message.endswith(b"EXISTS"):
persistent_max_uid, head = await fetch_messages_headers(
imap_client, persistent_max_uid
)
if not head:
continue
body = await fetch_message_body(imap_client, persistent_max_uid)
doc_title = get_document_title(head.as_string())
if doc_title is None:
continue
link, page = get_download_link(str(body))
if link is None:
continue
filename = f"{doc_title.replace(' ','')}"
if page:
filename += f"_{page}_pages"
filename += ".pdf"
print(f"Got '{doc_title}'")
urllib.request.urlretrieve(link, LATEST_PATH)
shutil.copy(LATEST_PATH, KINDLE_DIR / filename)
# await asyncio.wait_for(idle_task, timeout=5)
# print("ending idle")
async def make_client(host, user, password):
imap_client = aioimaplib.IMAP4_SSL(host=host)
await imap_client.wait_hello_from_server()
await imap_client.login(user, password)
await imap_client.select("Kindle")
return imap_client
if __name__ == "__main__":
loop = asyncio.get_event_loop()
client = loop.run_until_complete(
make_client("protagon.space", "hiro@protagon.space", "DsAgeviNZ.")
)
loop.run_until_complete(wait_for_new_message(client))
loop.run_until_complete(client.logout())

175
flake.lock generated Normal file
View file

@ -0,0 +1,175 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"flake-utils_2": {
"inputs": {
"systems": "systems_2"
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nix-github-actions": {
"inputs": {
"nixpkgs": [
"poetry2nix",
"nixpkgs"
]
},
"locked": {
"lastModified": 1703863825,
"narHash": "sha256-rXwqjtwiGKJheXB43ybM8NwWB8rO2dSRrEqes0S7F5Y=",
"owner": "nix-community",
"repo": "nix-github-actions",
"rev": "5163432afc817cf8bd1f031418d1869e4c9d5547",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "nix-github-actions",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1717839683,
"narHash": "sha256-kC0eyEsfpXbtQ2Ee5kgjmLFSVsLgnqpj10LjEi+fK+g=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "3f3a01219b4a1b6ee63039a3aa63948fbf37c0dd",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-unstable-small",
"repo": "nixpkgs",
"type": "github"
}
},
"poetry2nix": {
"inputs": {
"flake-utils": "flake-utils_2",
"nix-github-actions": "nix-github-actions",
"nixpkgs": [
"nixpkgs"
],
"systems": "systems_3",
"treefmt-nix": "treefmt-nix"
},
"locked": {
"lastModified": 1717774136,
"narHash": "sha256-comOhXDFUrbVba47gPenVBKy2foM3m3qOqpcP8umWDA=",
"owner": "nix-community",
"repo": "poetry2nix",
"rev": "370da3b6fefc6c11367463b68d010f9950aaa80c",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "poetry2nix",
"type": "github"
}
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs",
"poetry2nix": "poetry2nix"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"systems_2": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"systems_3": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"id": "systems",
"type": "indirect"
}
},
"treefmt-nix": {
"inputs": {
"nixpkgs": [
"poetry2nix",
"nixpkgs"
]
},
"locked": {
"lastModified": 1717278143,
"narHash": "sha256-u10aDdYrpiGOLoxzY/mJ9llST9yO8Q7K/UlROoNxzDw=",
"owner": "numtide",
"repo": "treefmt-nix",
"rev": "3eb96ca1ae9edf792a8e0963cc92fddfa5a87706",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "treefmt-nix",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

55
flake.nix Normal file
View file

@ -0,0 +1,55 @@
{
description = "A little python tool to automatically fetch my current kindle notes.";
inputs = {
flake-utils.url = "github:numtide/flake-utils";
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small";
poetry2nix = {
url = "github:nix-community/poetry2nix";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = inputs @ { self, nixpkgs, flake-utils, ... }:
flake-utils.lib.eachDefaultSystem (system:
let
pkgs = nixpkgs.legacyPackages.${system};
poetry2nix = inputs.poetry2nix.lib.mkPoetry2Nix { inherit pkgs; };
in
{
packages = {
kindleFetch = poetry2nix.mkPoetryApplication {
projectDir = self;
preferWheels = true;
};
default = self.packages.${system}.kindleFetch;
};
# Shell for app dependencies.
#
# nix develop
#
# Use this shell for developing your app.
devShells.default = pkgs.mkShell {
inputsFrom = [ self.packages.${system}.kindleFetch ];
package = with pkgs; [
ruff
pyright
python3Packages.jupyter
];
shellHook = ''
export PYTHONPATH=$(pwd)/src:$PYTHONPATH
'';
};
# Shell for poetry.
#
# nix develop .#poetry
#
# Use this shell for changes to pyproject.toml and poetry.lock.
devShells.poetry = pkgs.mkShell {
packages = [ pkgs.poetry ];
};
});
}

17
poetry.lock generated Normal file
View file

@ -0,0 +1,17 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
[[package]]
name = "aioimaplib"
version = "1.0.1"
description = "Python asyncio IMAP4rev1 client library"
optional = false
python-versions = "*"
files = [
{file = "aioimaplib-1.0.1-py3-none-any.whl", hash = "sha256:ebf8387a8febf4d4ae21b35a54cc7c9f4b77c3a7132e3e50c7d6a14993b07bf3"},
{file = "aioimaplib-1.0.1.tar.gz", hash = "sha256:287fd8a2386f2ff301ac4f8479633292dae80e223e8e40ce2f38038784822afa"},
]
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "2bd870880d7a21b2c10ae350af75db6f5287116caaa33d0f55d048c4c28dce36"

16
pyproject.toml Normal file
View file

@ -0,0 +1,16 @@
[tool.poetry]
name = "kindlefetch"
version = "0.1.0"
description = ""
authors = ["Valentin Boettcher <hiro@protagon.space>"]
license = "GPL3"
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
aioimaplib = "^1.0.1"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"