(in-package :space.protagon.cl-telegram-scrape) ;; Load the api spec (defvar *url* "https://core.telegram.org/bots/api") (defvar *request* "") (defvar *parsed-content* nil) (defvar *out-package* :space.protagon.cl-telegram) (defvar *out-file* "out/out.lisp") ;; Unimportant categories (defconstant +unimportant-categories+ #("Recent Changes" "Authorizing your bot" "Making requests" "Getting updates" "Payments")) (defconstant +type-map+ '(("Integer" . integer) ("String" . string) ("Boolean" . boolean))) (defconstant +blacklist+ #("Formatting options" "Inline mode methods" "CallbackGame") "Headers not to be mistaken for methods or types.") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Structures ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defstruct tg-param (name "" :type string) (type nil :type (or list symbol)) (optional t :type boolean) (desc "" :type string)) (defun param->keyword (param) (-> (tg-param-name param) (string-upcase) (make-keyword))) (defstruct (tg-object (:constructor create-tg-object (name parameters doc anchor type))) (name "" :type string) (type "" :type keyword) (parameters nil :type vector) (doc "" :type string) (anchor "" :type string)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Scraping ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun find-categories () "Returns an alist of categories from the telegram api spec." (let ((cat-data (remove-if #'(lambda (el) (find (lquery-funcs:text el) +unimportant-categories+ :test 'string-equal)) (lquery:$ *parsed-content* "#dev_page_content h3")))) (map 'list #'(lambda (el) (cons (lquery-funcs:text el) el)) cat-data))) ;; TODO: MAYBE REMOVE (defun tg-type->lisp-type (type-str) (let ((types (ppcre:split " or " type-str))) (mapcar #'(lambda (single-type) (let* ((is-array (search "Array of " single-type)) (type (assoc single-type +type-map+ :test #'string=))) (if is-array nil ;todo support arrays (if type (cdr type) (camel->symbol single-type))))) types))) (defun parse-parameters (param-table) "Creates a vector of th-parameter from an apropriate table element (description of a method)." (->> (lquery:$ param-table "tr") (map 'vector #'(lambda (el) (lquery:$ el "td" (text)))) (remove-if #'(lambda (el) (not (= (length el) 4)))) (map 'vector #'(lambda (el) (match el ((array :rank 1 :contents (name type optional doc)) (make-tg-param :name name :type (tg-type->lisp-type type) :optional (string= "Optional" optional) :desc doc))))))) (defun parse-fields (field-table) "Creates a vector of th-parameter from an apropriate table element (description of a method)." (->> (lquery:$ field-table "tr") (map 'vector #'(lambda (el) (lquery:$ el "td" (text)))) (remove-if #'(lambda (el) (not (= (length el) 3)))) (map 'vector #'(lambda (el) (match el ((array :rank 1 :contents (name type desc)) (multiple-value-bind (doc optional) (ppcre:regex-replace "Optional\\. " desc "") (make-tg-param :name name :type (tg-type->lisp-type type) :optional optional :desc doc)))))))) (defun detect-api-type (table) "Detects the type of the declaration in the api. 3 collumns => field, 4 collumns => parameter." (case (lquery:$ table "tr" (first) (children) (length)) (3 :object) (4 :method) (otherwise :method))) (defun parse-h4 (h4) "Parses an H4 into a method or a struct." (declare (type plump-dom:element h4)) (let ((name (lquery:$1 h4 (text)))) (when (or (not (and (lquery:$ h4 (is "h4")) (lquery:$ h4 (next) (is "p")))) (find name +blacklist+ :test #'string=)) (return-from parse-h4 nil)) (let* ((anchor (lquery:$1 h4 "a" (attr :href))) (doc-elt (lquery:$1 h4 (next))) (doc (lquery:$1 doc-elt (text)))) (h4->tg-object h4 name anchor doc)))) (defun has-table-p (h4) "Checks if there is a corresponding table to a h4." (emptyp (lquery:$ h4 (next-until "table") (filter "h4")))) (defun h4->tg-object (h4 name anchor doc) (declare (type plump-dom:element h4)) (let* ((table (lquery:$ h4 (next-until "table") (next))) (type (detect-api-type table)) (params (if (has-table-p h4) (case type (:method (parse-parameters table)) (:object (parse-fields table))) #()))) (create-tg-object name params doc anchor type))) (defun parse-categories (categories) "Parses the given categorues into a `tg-method` returning an alist of (name . (vektor of parsed))." (mapcar #'(lambda (it) (let ((name (car it)) (element (cdr it)) (parsed nil)) (do ((el (lquery:$1 element (next)) (lquery:$1 el (next)))) ((or (not (lquery:$1 el (next))) (lquery:$ el (is "h3")))) (when (lquery:$ el (is "h4")) (let ((meth (parse-h4 el))) (when meth (push meth parsed))))) (cons name (nreverse parsed)))) categories)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Code Generator ; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun separate-params (params) "Separates a sequence of `tg-param` into (required optional)" (reduce (lambda (a b) (if (tg-param-optional a) (push a (second b)) (push a (first b))) b) params :initial-value (list nil nil) :from-end t)) (defun param->arg (param) "Converts a TG-PARAM into a symbol to be used as argument.n" (-> (tg-param-name param) (snake->symbol))) (defun tg-object->lisp (object) (let* ((name (tg-object-name object)) (name-sym (camel->symbol name)) (params (tg-object-parameters object)) (sep-params (separate-params params)) (req-args (first sep-params)) (opt-args (second sep-params)) (type (tg-object-type object)) (docstring (make-docstring-from-tg-object object))) (case type (:method (tg-object->function name name-sym req-args opt-args docstring)) (:object (tg-object->clos name name-sym params docstring))))) (defun make-docstring-from-tg-object (object) (format nil "~a~a~%~a" *url* (tg-object-anchor object) (tg-object-doc object))) (defun make-argument-list (req-args opt-args) `(,@(map 'list #'param->arg req-args) ,@(if opt-args `(&key ,@(map 'list #'param->arg opt-args))))) (defun tg-object->function (name name-sym req-args opt-args docstring) "Creates a function for use in `cl-telegram-bot` from a tg-object." `(defun ,name-sym (bot ,@(make-argument-list req-args opt-args)) ,docstring ,@(mapcar #'(lambda (opt) (when (tg-param-type opt) `(check-type ,(param->arg opt) (or ,@(tg-param-type opt))))) req-args) (let ((options (list ,@(mapcar #'(lambda (opt) `(cons ,(param->keyword opt) ,(param->arg opt))) req-args)))) ,@(mapcar #'(lambda (param) `(when ,(param->arg param) (nconc options (list (cons ,(param->keyword param) ,(param->arg param)))))) opt-args) (make-request bot ,name options)))) ;; TODO: convert name to kw, symbol directly in class (defun tg-object->clos (name name-sym params docstring) "Creates clos object from a tg-object." `(defclass ,name-sym () ((type-name :allocation :class :reader name :initform ,name) ,@(map 'list #'(lambda (param) (let* ((pname-sym (param->arg param)) (pname-kw (make-keyword pname-sym)) (optional (tg-param-optional param)) (type (tg-param-type param)) (doc (tg-param-desc param))) `(,pname-sym :initarg ,pname-kw ,@(when optional '(:initform nil)) :accessor ,pname-sym :type (or ,@type) :documentation ,doc))) params)) (:documentation ,docstring))) (defun write-file-header (stream) (write `(in-package ,*out-package*) :stream stream)) (defun print-objects (parsed-cats stream) "Takes parsed categories and prints them out to functions." (dolist (item parsed-cats) (let ((name (car item)) (objects (cdr item))) (format stream "~%;----~a----~%" name) (dolist (object objects) (write (tg-object->lisp object) :stream stream) ; nicer with generics (format stream "~%~%") )))) (defun generate-and-write-functions () "Discovers and generates methods from the telegram api as funcions and writes them to a file." (with-open-file (out *out-file* :direction :output :if-exists :supersede) (write-file-header out) (-> (find-categories) (parse-categories) (print-objects out)))) (defun scrape-to-disk (&key (url *url*) (out-file *out-file*) (out-package *out-package*)) "Main entry. Makes the web request and scrapes the telegram api docs." (let* ((*request* (dex:get url)) (*parsed-content* (plump:parse *request*)) (*out-package* out-package) (*out-file* out-file)) (generate-and-write-functions)))