mirror of
https://github.com/vale981/kindle_fetch
synced 2025-03-04 08:31:38 -05:00
basic version
This commit is contained in:
commit
0d5dd16f7f
8 changed files with 456 additions and 0 deletions
19
.direnv/bin/nix-direnv-reload
Executable file
19
.direnv/bin/nix-direnv-reload
Executable file
|
@ -0,0 +1,19 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -e
|
||||||
|
if [[ ! -d "/home/hiro/.scripts/kindlehack" ]]; then
|
||||||
|
echo "Cannot find source directory; Did you move it?"
|
||||||
|
echo "(Looking for "/home/hiro/.scripts/kindlehack")"
|
||||||
|
echo 'Cannot force reload with this script - use "direnv reload" manually and then try again'
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# rebuild the cache forcefully
|
||||||
|
_nix_direnv_force_reload=1 direnv exec "/home/hiro/.scripts/kindlehack" true
|
||||||
|
|
||||||
|
# Update the mtime for .envrc.
|
||||||
|
# This will cause direnv to reload again - but without re-building.
|
||||||
|
touch "/home/hiro/.scripts/kindlehack/.envrc"
|
||||||
|
|
||||||
|
# Also update the timestamp of whatever profile_rc we have.
|
||||||
|
# This makes sure that we know we are up to date.
|
||||||
|
touch -r "/home/hiro/.scripts/kindlehack/.envrc" "/home/hiro/.scripts/kindlehack/.direnv"/*.rc
|
1
.envrc
Normal file
1
.envrc
Normal file
|
@ -0,0 +1 @@
|
||||||
|
use flake
|
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
||||||
|
/.direnv
|
172
fetch_latest_kindle.py
Normal file
172
fetch_latest_kindle.py
Normal file
|
@ -0,0 +1,172 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
import glob
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import shutil
|
||||||
|
import urllib.request
|
||||||
|
import asyncio
|
||||||
|
from aioimaplib import aioimaplib
|
||||||
|
from collections import namedtuple
|
||||||
|
import re
|
||||||
|
from asyncio import run, wait_for
|
||||||
|
from collections import namedtuple
|
||||||
|
from email.message import Message
|
||||||
|
from email.parser import BytesHeaderParser, BytesParser
|
||||||
|
from typing import Collection
|
||||||
|
from contextlib import suppress
|
||||||
|
|
||||||
|
KINDLE_DIR = Path.home() / "kindle_dump/"
|
||||||
|
|
||||||
|
LATEST_PATH = KINDLE_DIR / "latest.pdf"
|
||||||
|
|
||||||
|
|
||||||
|
def get_document_title(header_string):
|
||||||
|
m = re.search(r'"(.*?)" from your Kindle', header_string)
|
||||||
|
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return m.group(1)
|
||||||
|
|
||||||
|
|
||||||
|
def get_download_link(text):
|
||||||
|
m = re.search(r"\[Download PDF\]\((.*?)\)", text)
|
||||||
|
|
||||||
|
if not m:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
p = re.search(r"([0-9]+) page", text)
|
||||||
|
page = p.group(1) if p else None
|
||||||
|
return m.group(1), page
|
||||||
|
|
||||||
|
|
||||||
|
# LAST_LINK = None
|
||||||
|
# while True:
|
||||||
|
# current_link = monitor_kindle()
|
||||||
|
|
||||||
|
# if current_link != LAST_LINK and current_link is not None:
|
||||||
|
# LAST_LINK = current_link
|
||||||
|
# print("Downloading:", LAST_LINK)
|
||||||
|
#
|
||||||
|
|
||||||
|
# time.sleep(5)
|
||||||
|
|
||||||
|
|
||||||
|
ID_HEADER_SET = {
|
||||||
|
"Subject",
|
||||||
|
}
|
||||||
|
FETCH_MESSAGE_DATA_UID = re.compile(rb".*UID (?P<uid>\d+).*")
|
||||||
|
FETCH_MESSAGE_DATA_SEQNUM = re.compile(rb"(?P<seqnum>\d+) FETCH.*")
|
||||||
|
FETCH_MESSAGE_DATA_FLAGS = re.compile(rb".*FLAGS \((?P<flags>.*?)\).*")
|
||||||
|
MessageAttributes = namedtuple("MessageAttributes", "uid flags sequence_number")
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_messages_headers(imap_client: aioimaplib.IMAP4_SSL, max_uid: int):
|
||||||
|
response = await imap_client.uid(
|
||||||
|
"fetch",
|
||||||
|
"%d:*" % (max_uid + 1),
|
||||||
|
"(UID FLAGS BODY.PEEK[HEADER.FIELDS (%s)])" % " ".join(ID_HEADER_SET),
|
||||||
|
)
|
||||||
|
new_max_uid = max_uid
|
||||||
|
message_headers = ""
|
||||||
|
if response.result == "OK":
|
||||||
|
for i in range(0, len(response.lines) - 1, 3):
|
||||||
|
fetch_command_without_literal = b"%s %s" % (
|
||||||
|
response.lines[i],
|
||||||
|
response.lines[i + 2],
|
||||||
|
)
|
||||||
|
|
||||||
|
uid = int(
|
||||||
|
FETCH_MESSAGE_DATA_UID.match(fetch_command_without_literal).group("uid")
|
||||||
|
)
|
||||||
|
flags = FETCH_MESSAGE_DATA_FLAGS.match(fetch_command_without_literal).group(
|
||||||
|
"flags"
|
||||||
|
)
|
||||||
|
seqnum = FETCH_MESSAGE_DATA_SEQNUM.match(
|
||||||
|
fetch_command_without_literal
|
||||||
|
).group("seqnum")
|
||||||
|
# these attributes could be used for local state management
|
||||||
|
message_attrs = MessageAttributes(uid, flags, seqnum)
|
||||||
|
|
||||||
|
# uid fetch always includes the UID of the last message in the mailbox
|
||||||
|
# cf https://tools.ietf.org/html/rfc3501#page-61
|
||||||
|
if uid > max_uid:
|
||||||
|
message_headers = BytesHeaderParser().parsebytes(response.lines[i + 1])
|
||||||
|
new_max_uid = uid
|
||||||
|
else:
|
||||||
|
print("error %s" % response)
|
||||||
|
return new_max_uid, message_headers
|
||||||
|
|
||||||
|
|
||||||
|
async def fetch_message_body(imap_client: aioimaplib.IMAP4_SSL, uid: int):
|
||||||
|
dwnld_resp = await imap_client.uid("fetch", str(uid), "BODY.PEEK[]")
|
||||||
|
return BytesParser().parsebytes(dwnld_resp.lines[1])
|
||||||
|
|
||||||
|
|
||||||
|
async def wait_for_new_message(imap_client):
|
||||||
|
persistent_max_uid = 1
|
||||||
|
persistent_max_uid, head = await fetch_messages_headers(
|
||||||
|
imap_client, persistent_max_uid
|
||||||
|
)
|
||||||
|
while True:
|
||||||
|
idle_task = await imap_client.idle_start(timeout=60)
|
||||||
|
msg = await imap_client.wait_server_push()
|
||||||
|
print(msg)
|
||||||
|
imap_client.idle_done()
|
||||||
|
await wait_for(idle_task, timeout=5)
|
||||||
|
|
||||||
|
for message in msg:
|
||||||
|
if message.endswith(b"EXISTS"):
|
||||||
|
persistent_max_uid, head = await fetch_messages_headers(
|
||||||
|
imap_client, persistent_max_uid
|
||||||
|
)
|
||||||
|
|
||||||
|
if not head:
|
||||||
|
continue
|
||||||
|
|
||||||
|
body = await fetch_message_body(imap_client, persistent_max_uid)
|
||||||
|
|
||||||
|
doc_title = get_document_title(head.as_string())
|
||||||
|
|
||||||
|
if doc_title is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
link, page = get_download_link(str(body))
|
||||||
|
|
||||||
|
if link is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
filename = f"{doc_title.replace(' ','')}"
|
||||||
|
|
||||||
|
if page:
|
||||||
|
filename += f"_{page}_pages"
|
||||||
|
|
||||||
|
filename += ".pdf"
|
||||||
|
|
||||||
|
print(f"Got '{doc_title}'")
|
||||||
|
urllib.request.urlretrieve(link, LATEST_PATH)
|
||||||
|
shutil.copy(LATEST_PATH, KINDLE_DIR / filename)
|
||||||
|
|
||||||
|
# await asyncio.wait_for(idle_task, timeout=5)
|
||||||
|
# print("ending idle")
|
||||||
|
|
||||||
|
|
||||||
|
async def make_client(host, user, password):
|
||||||
|
imap_client = aioimaplib.IMAP4_SSL(host=host)
|
||||||
|
await imap_client.wait_hello_from_server()
|
||||||
|
await imap_client.login(user, password)
|
||||||
|
|
||||||
|
await imap_client.select("Kindle")
|
||||||
|
|
||||||
|
return imap_client
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
client = loop.run_until_complete(
|
||||||
|
make_client("protagon.space", "hiro@protagon.space", "DsAgeviNZ.")
|
||||||
|
)
|
||||||
|
loop.run_until_complete(wait_for_new_message(client))
|
||||||
|
loop.run_until_complete(client.logout())
|
175
flake.lock
generated
Normal file
175
flake.lock
generated
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
{
|
||||||
|
"nodes": {
|
||||||
|
"flake-utils": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1710146030,
|
||||||
|
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"flake-utils_2": {
|
||||||
|
"inputs": {
|
||||||
|
"systems": "systems_2"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1710146030,
|
||||||
|
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "flake-utils",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nix-github-actions": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"poetry2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1703863825,
|
||||||
|
"narHash": "sha256-rXwqjtwiGKJheXB43ybM8NwWB8rO2dSRrEqes0S7F5Y=",
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "nix-github-actions",
|
||||||
|
"rev": "5163432afc817cf8bd1f031418d1869e4c9d5547",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "nix-github-actions",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nixpkgs": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1717839683,
|
||||||
|
"narHash": "sha256-kC0eyEsfpXbtQ2Ee5kgjmLFSVsLgnqpj10LjEi+fK+g=",
|
||||||
|
"owner": "NixOS",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"rev": "3f3a01219b4a1b6ee63039a3aa63948fbf37c0dd",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "NixOS",
|
||||||
|
"ref": "nixos-unstable-small",
|
||||||
|
"repo": "nixpkgs",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"poetry2nix": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils_2",
|
||||||
|
"nix-github-actions": "nix-github-actions",
|
||||||
|
"nixpkgs": [
|
||||||
|
"nixpkgs"
|
||||||
|
],
|
||||||
|
"systems": "systems_3",
|
||||||
|
"treefmt-nix": "treefmt-nix"
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1717774136,
|
||||||
|
"narHash": "sha256-comOhXDFUrbVba47gPenVBKy2foM3m3qOqpcP8umWDA=",
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "poetry2nix",
|
||||||
|
"rev": "370da3b6fefc6c11367463b68d010f9950aaa80c",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-community",
|
||||||
|
"repo": "poetry2nix",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": {
|
||||||
|
"inputs": {
|
||||||
|
"flake-utils": "flake-utils",
|
||||||
|
"nixpkgs": "nixpkgs",
|
||||||
|
"poetry2nix": "poetry2nix"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems_2": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"systems_3": {
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1681028828,
|
||||||
|
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||||
|
"owner": "nix-systems",
|
||||||
|
"repo": "default",
|
||||||
|
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"id": "systems",
|
||||||
|
"type": "indirect"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"treefmt-nix": {
|
||||||
|
"inputs": {
|
||||||
|
"nixpkgs": [
|
||||||
|
"poetry2nix",
|
||||||
|
"nixpkgs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"locked": {
|
||||||
|
"lastModified": 1717278143,
|
||||||
|
"narHash": "sha256-u10aDdYrpiGOLoxzY/mJ9llST9yO8Q7K/UlROoNxzDw=",
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "treefmt-nix",
|
||||||
|
"rev": "3eb96ca1ae9edf792a8e0963cc92fddfa5a87706",
|
||||||
|
"type": "github"
|
||||||
|
},
|
||||||
|
"original": {
|
||||||
|
"owner": "numtide",
|
||||||
|
"repo": "treefmt-nix",
|
||||||
|
"type": "github"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"root": "root",
|
||||||
|
"version": 7
|
||||||
|
}
|
55
flake.nix
Normal file
55
flake.nix
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
{
|
||||||
|
description = "A little python tool to automatically fetch my current kindle notes.";
|
||||||
|
|
||||||
|
inputs = {
|
||||||
|
flake-utils.url = "github:numtide/flake-utils";
|
||||||
|
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable-small";
|
||||||
|
poetry2nix = {
|
||||||
|
url = "github:nix-community/poetry2nix";
|
||||||
|
inputs.nixpkgs.follows = "nixpkgs";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
outputs = inputs @ { self, nixpkgs, flake-utils, ... }:
|
||||||
|
flake-utils.lib.eachDefaultSystem (system:
|
||||||
|
let
|
||||||
|
pkgs = nixpkgs.legacyPackages.${system};
|
||||||
|
poetry2nix = inputs.poetry2nix.lib.mkPoetry2Nix { inherit pkgs; };
|
||||||
|
in
|
||||||
|
{
|
||||||
|
packages = {
|
||||||
|
kindleFetch = poetry2nix.mkPoetryApplication {
|
||||||
|
projectDir = self;
|
||||||
|
preferWheels = true;
|
||||||
|
};
|
||||||
|
default = self.packages.${system}.kindleFetch;
|
||||||
|
};
|
||||||
|
|
||||||
|
# Shell for app dependencies.
|
||||||
|
#
|
||||||
|
# nix develop
|
||||||
|
#
|
||||||
|
# Use this shell for developing your app.
|
||||||
|
devShells.default = pkgs.mkShell {
|
||||||
|
inputsFrom = [ self.packages.${system}.kindleFetch ];
|
||||||
|
package = with pkgs; [
|
||||||
|
ruff
|
||||||
|
pyright
|
||||||
|
python3Packages.jupyter
|
||||||
|
];
|
||||||
|
|
||||||
|
shellHook = ''
|
||||||
|
export PYTHONPATH=$(pwd)/src:$PYTHONPATH
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
|
# Shell for poetry.
|
||||||
|
#
|
||||||
|
# nix develop .#poetry
|
||||||
|
#
|
||||||
|
# Use this shell for changes to pyproject.toml and poetry.lock.
|
||||||
|
devShells.poetry = pkgs.mkShell {
|
||||||
|
packages = [ pkgs.poetry ];
|
||||||
|
};
|
||||||
|
});
|
||||||
|
}
|
17
poetry.lock
generated
Normal file
17
poetry.lock
generated
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aioimaplib"
|
||||||
|
version = "1.0.1"
|
||||||
|
description = "Python asyncio IMAP4rev1 client library"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "aioimaplib-1.0.1-py3-none-any.whl", hash = "sha256:ebf8387a8febf4d4ae21b35a54cc7c9f4b77c3a7132e3e50c7d6a14993b07bf3"},
|
||||||
|
{file = "aioimaplib-1.0.1.tar.gz", hash = "sha256:287fd8a2386f2ff301ac4f8479633292dae80e223e8e40ce2f38038784822afa"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[metadata]
|
||||||
|
lock-version = "2.0"
|
||||||
|
python-versions = "^3.11"
|
||||||
|
content-hash = "2bd870880d7a21b2c10ae350af75db6f5287116caaa33d0f55d048c4c28dce36"
|
16
pyproject.toml
Normal file
16
pyproject.toml
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
[tool.poetry]
|
||||||
|
name = "kindlefetch"
|
||||||
|
version = "0.1.0"
|
||||||
|
description = ""
|
||||||
|
authors = ["Valentin Boettcher <hiro@protagon.space>"]
|
||||||
|
license = "GPL3"
|
||||||
|
readme = "README.md"
|
||||||
|
|
||||||
|
[tool.poetry.dependencies]
|
||||||
|
python = "^3.11"
|
||||||
|
aioimaplib = "^1.0.1"
|
||||||
|
|
||||||
|
|
||||||
|
[build-system]
|
||||||
|
requires = ["poetry-core"]
|
||||||
|
build-backend = "poetry.core.masonry.api"
|
Loading…
Add table
Reference in a new issue