From fe24620afcbdd6b7d6e25d3baecbae9231c31003 Mon Sep 17 00:00:00 2001 From: Valentin Boettcher Date: Mon, 12 Aug 2019 23:02:00 +0200 Subject: [PATCH] Generating methods works. --- Makefile | 2 + generate-api.ros | 11 +++++ scrape.lisp | 107 +++++++++++++++++++++++++++++++++++++---------- utils.lisp | 1 + 4 files changed, 98 insertions(+), 23 deletions(-) create mode 100644 Makefile create mode 100755 generate-api.ros diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b4bbf27 --- /dev/null +++ b/Makefile @@ -0,0 +1,2 @@ +build generate-api.ros: + ros build generate-api.ros diff --git a/generate-api.ros b/generate-api.ros new file mode 100755 index 0000000..02497bf --- /dev/null +++ b/generate-api.ros @@ -0,0 +1,11 @@ +#!/bin/sh +#|-*- mode:lisp -*-|# +#| +exec ros -Q -- $0 "$@" +|# + +(load "./utils.lisp") +(load "./scrape.lisp") +(in-package :space.protagon.cl-telegram-scrape) +(defun main (out-file &optional (api-url *url*)) + (scrape-to-disk :out-file out-file :url api-url)) diff --git a/scrape.lisp b/scrape.lisp index a249d56..8a3993b 100644 --- a/scrape.lisp +++ b/scrape.lisp @@ -5,19 +5,22 @@ ;; Load the api spec (defvar *url* "https://core.telegram.org/bots/api") -(defvar *request* (dex:get *url*)) -(defvar *parsed-content* (plump:parse *request*)) +(defvar *request* "") +(defvar *parsed-content* nil) +(defvar *out-package* :space.protagon.cl-telegram) +(defvar *out-file* "out.lisp") ;; Unimportant categories -(defconstant *unimportant-categories* #("Recent Changes" - "Authorizing your bot" - "Making requests" - "Getting updates" - "Available Types")) +(defconstant unimportant-categories* #("Recent Changes" + "Authorizing your bot" + "Making requests" + "Getting updates" + "Available Types" + "Payments")) -(defconstant *method-categories* #("Available methods" - "Updating messages" - "Stickers")) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Structures ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defstruct tg-param (name "" :type string) @@ -34,7 +37,6 @@ (defun param->keyword (param) (-> (tg-param-name param) (string-upcase) (make-keyword))) - (defstruct (tg-method (:constructor create-tg-method (name parameters doc anchor))) (name "" :type string) @@ -42,34 +44,61 @@ (doc "" :type string) (anchor "" :type string)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Scraping ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (defun find-categories () "Returns an alist of categories from the telegram api spec." (let ((cat-data (remove-if #'(lambda (el) - (find (lquery-funcs:text el) *unimportant-categories* :test 'string-equal)) + (find (lquery-funcs:text el) unimportant-categories* :test 'string-equal)) (lquery:$ *parsed-content* "#dev_page_content h3")))) (map 'list #'(lambda (el) (cons (lquery-funcs:text el) el)) cat-data))) (defun parse-parameters (param-table) "Creates a vector of th-parameter from an apropriate table element." (->> (lquery:$ param-table "tr") - (map 'vector #'(lambda (el) - (lquery:$ el "td" (text)))) - (remove-if #'(lambda (el) (not (= (length el) 4)))) - (map 'vector #'make-tg-param-from-vec)) - ) + (map 'vector #'(lambda (el) + (lquery:$ el "td" (text)))) + (remove-if #'(lambda (el) (not (= (length el) 4)))) + (map 'vector #'make-tg-param-from-vec))) (defun h4->tg-method (h4) (declare (type plump-dom:element h4)) + (if (not (lquery:$ h4 (is "h4"))) + (return-from h4->tg-method nil)) + (let* ((name (lquery:$1 h4 (text))) (anchor (lquery:$1 h4 "a" (attr :href))) (doc-elt (lquery:$1 h4 (next))) - (doc (lquery:$1 doc-elt (text))) - (param-elt (lquery:$1 doc-elt (next))) - (params (parse-parameters param-elt))) - (print doc-elt) - (create-tg-method name params doc anchor))) + (param-elt (lquery:$ h4 (next-until "table") (next)))) + (if (and (lquery:$ h4 (next) (is "p")) + (or (lquery:$ doc-elt (next) (is "table")) (lquery:$ doc-elt (next) (next) (is "table")))) + (let ((doc (lquery:$1 doc-elt (text))) + (params (parse-parameters param-elt))) + (create-tg-method name params doc anchor)) + nil))) + +(defun parse-categories (categories) + "Parses the given categorues into a `tg-method` returning an alist of (name . (vektor of parsed))." + (mapcar #'(lambda (it) + (let ((name (car it)) + (element (cdr it)) + (parsed nil)) + (do ((el (lquery:$1 element (next)) (lquery:$1 el (next)))) + ((or (not (lquery:$1 el (next))) (lquery:$ el (is "h3")))) + (when (lquery:$ el (is "h4")) + (let ((meth (h4->tg-method el))) + (when meth (push (h4->tg-method el) parsed))))) + (cons name (nreverse parsed)))) + categories)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Code Generator ; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun separate-params (params) "Separates a sequence of `tg-param` into (required optional)" @@ -82,6 +111,10 @@ :initial-value (list nil nil) :from-end t)) +(defun param->arg (param) + "Converts a TG-PARAM into a symbol to be used as argument.n" + (-> (tg-param-name param) (snake->symbol))) + (defun tg-method->function (method) "Creates a function for use in `cl-telegram-bot` from a cl-method-object." (let* ((name (tg-method-name method)) @@ -89,7 +122,7 @@ (params (separate-params (tg-method-parameters method))) (req-args (first params)) (opt-args (second params))) - `(defun ,name-sym (bot ,@(map 'list #'param->arg req-args) &key ,@(map 'list #'param->arg opt-args)) + `(defun ,name-sym (bot ,@(map 'list #'param->arg req-args) ,@(if opt-args `(&key ,@(map 'list #'param->arg opt-args)))) ,(format nil "~a~a~%~a" *url* (tg-method-anchor method) (tg-method-doc method)) (let ((options (list @@ -103,3 +136,31 @@ (nconc options (list (cons ,(param->keyword param) ,(param->arg param)))))) opt-args) (make-request bot ,name options))))) + + +(defun write-file-header (stream) + (write `(in-package ,*out-package*) :stream stream)) + +(defun print-methods (parsed-cats stream) + "Takes parsed categories and prints them out to functions." + (dolist (item parsed-cats) + (let ((name (car item)) + (methods (cdr item))) + (format stream "~%;----~a----~%" name) + (dolist (method methods) + (write (tg-method->function method) :stream stream) + (format stream "~%~%"))))) + +(defun generate-and-write-functions () + "Discovers and generates methods from the telegram api as funcions and writes them to a file." + (with-open-file (out *out-file* :direction :output :if-exists :supersede) + (write-file-header out) + (-> (find-categories) (parse-categories) (print-methods out)))) + +(defun scrape-to-disk (&key (url *url*) (out-file *out-file*) (out-package *out-package*)) + "Main entry. Makes the web request and scrapes the telegram api docs." + (let* ((*request* (dex:get url)) + (*parsed-content* (plump:parse *request*)) + (*out-package* out-package) + (*out-file* out-file)) + (generate-and-write-functions))) diff --git a/utils.lisp b/utils.lisp index 6eab7a1..bd8d987 100644 --- a/utils.lisp +++ b/utils.lisp @@ -1,3 +1,4 @@ +(ql:quickload '(:alexandria :cl-arrows :cl-json :cl-ppcre)) (defpackage :space.protagon.cl-telegram-scrape.utils (:use :common-lisp :alexandria :cl-arrows) (:export :lispify