melpazoid/melpazoid.py
2020-03-01 12:51:00 -08:00

654 lines
24 KiB
Python

# -*- coding: utf-8 -*-
"""
For checking MELPA recipe pull-requests, building docker containers
that run the checks against the package pointed to by the recipe, and
for running a handful of other miscellaneous checks that are easier to
write here than in elisp.
Test this file:
pytest --doctest-modules
Use this file a script:
python <this_file>.py
"""
from __future__ import print_function
import functools
import glob
import io # noqa: F401 -- used by doctests
import os
import random
import re
import requests
import subprocess
import sys
import tempfile
import time
from typing import Iterator, TextIO, Tuple
DEBUG = False # eagerly load installed packages, etc.
_RETURN_CODE = 0
# define the colors of the report (or none), per https://no-color.org
# https://misc.flogisoft.com/bash/tip_colors_and_formatting
NO_COLOR = os.environ.get('NO_COLOR')
CLR_OFF = '' if NO_COLOR else '\033[0m'
CLR_ERROR = '' if NO_COLOR else '\033[31m'
CLR_WARN = '' if NO_COLOR else '\033[33m'
CLR_INFO = '' if NO_COLOR else '\033[32m'
CLR_ULINE = '' if NO_COLOR else '\033[4m'
GITHUB_API = 'https://api.github.com/repos'
MELPA_PR = r'https://github.com/melpa/melpa/pull/([0-9]+)'
MELPA_PULL_API = f"{GITHUB_API}/melpa/melpa/pulls"
MELPA_RECIPES = f"{GITHUB_API}/melpa/melpa/contents/recipes"
# Valid licenses and their names according to the GitHub API
# TODO: complete this list!
VALID_LICENSES_GITHUB = {
'Apache License 2.0',
'GNU Affero General Public License v3.0',
'GNU General Public License v2.0',
'GNU General Public License v3.0',
'GNU Lesser General Public License v3.0',
'ISC License',
'MIT License',
'The Unlicense',
}
def run_checks(
recipe: str, # e.g. of the form (shx :repo ...)
elisp_dir: str, # where the package is on this machine
clone_address: str = None, # optional repo address
pr_data: dict = None, # optional data from the PR
):
"""Entrypoint for running all checks."""
return_code(0)
if not validate_recipe(recipe):
return
files: list = _files_in_recipe(recipe, elisp_dir)
subprocess.check_output(['rm', '-rf', '_elisp'])
os.makedirs('_elisp')
for ii, recipe_file in enumerate(files):
subprocess.check_output(['cp', '-r', recipe_file, '_elisp/'])
files[ii] = os.path.join('_elisp', os.path.basename(recipe_file))
_write_requirements(files, recipe)
check_containerized_build(_package_name(recipe))
check_license(files, elisp_dir, clone_address)
check_packaging(files, recipe)
print_related_packages(recipe) # could throw ConnectionError
print_details(recipe, files, pr_data, clone_address)
def return_code(return_code: int = None) -> int:
"""
Return (and optionally set) the current return code.
If return_code matches env var EXPECT_ERROR, return 0 --
this is useful for running CI checks on melpazoid itself.
"""
global _RETURN_CODE
if return_code is not None:
_RETURN_CODE = return_code
expect_error = int(os.environ.get('EXPECT_ERROR', 0))
return 0 if _RETURN_CODE == expect_error else _RETURN_CODE
def validate_recipe(recipe: str) -> bool:
tokenized_recipe = _tokenize_lisp_list(recipe)
valid = (
tokenized_recipe[0] == '('
and tokenized_recipe[-1] == ')'
and len([pp for pp in tokenized_recipe if pp == '('])
== len([pp for pp in tokenized_recipe if pp == ')'])
)
if not valid:
_fail(f"Recipe '{recipe}' appears to be invalid")
return valid
def _note(message: str, color: str = None, highlight: str = None):
"""Print a note, possibly in color, possibly highlighting specific text."""
color = color or ''
if highlight:
print(re.sub(f"({highlight})", f"{color}\\g<1>{CLR_OFF}", message))
else:
print(f"{color}{message}{CLR_OFF}")
def _fail(message: str, color: str = CLR_ERROR, highlight: str = None):
_note(message, color, highlight)
return_code(2)
def check_containerized_build(package_name):
print(f"Building container for {package_name}... 🐳")
output = subprocess.check_output(['make', 'test', f"PACKAGE_NAME={package_name}"])
for output_line in output.decode().strip().split('\n'):
# byte-compile-file writes ":Error: ", package-lint ": error: "
if ':Error: ' in output_line or ': error: ' in output_line:
_fail(output_line, highlight=r' ?[Ee]rror:')
elif ':Warning: ' in output_line or ': warning: ' in output_line:
_note(output_line, CLR_WARN, highlight=r' ?[Ww]arning:')
elif output_line.startswith('### '):
_note(output_line, CLR_INFO)
else:
print(output_line)
def _files_in_recipe(recipe: str, elisp_dir: str) -> list:
files: list = subprocess.check_output(['find', elisp_dir]).decode().split()
recipe_tokens: list = _tokenize_lisp_list(recipe)
if ':files' in recipe_tokens:
files_inc, files_exc = _apply_recipe(recipe, elisp_dir)
else:
files_inc, files_exc = _apply_default_recipe(elisp_dir)
return list(set(files) & set(files_inc) - set(files_exc))
def _tokenize_lisp_list(recipe: str) -> list:
"""
>>> _tokenize_lisp_list('(shx :repo "riscy/shx-for-emacs" :fetcher github)')
['(', 'shx', ':repo', '"riscy/shx-for-emacs"', ':fetcher', 'github', ')']
"""
recipe = ' '.join(recipe.split())
recipe = recipe.replace('(', ' ( ')
recipe = recipe.replace(')', ' ) ')
tokenized_lisp_list: list = recipe.split()
return tokenized_lisp_list
def _apply_recipe(recipe: str, elisp_dir: str) -> Tuple[list, list]:
# TODO: this could possibly use the MELPA machinery instead
files_inc: list = []
files_exc: list = []
excluding = False
nesting = 0
recipe_tokens = _tokenize_lisp_list(recipe)
for token in recipe_tokens[recipe_tokens.index(':files') + 1 :]:
if token == '(':
nesting += 1
elif token == ')':
excluding = False
nesting -= 1
if not nesting:
break
elif token == ':defaults':
include, exclude = _apply_default_recipe(elisp_dir)
files_inc += include
files_exc += exclude
elif token == ':exclude':
excluding = True
elif excluding:
files_exc += glob.glob(os.path.join(elisp_dir, token.strip('"')))
else:
files_inc += glob.glob(os.path.join(elisp_dir, token.strip('"')))
return files_inc, files_exc
def _apply_default_recipe(elisp_dir: str) -> Tuple[list, list]:
# TODO: this could possibly use the MELPA machinery instead
files_inc = glob.glob(os.path.abspath(os.path.join(elisp_dir, '*.el')))
files_exc = (
glob.glob(os.path.abspath(os.path.join(elisp_dir, 'test.el')))
+ glob.glob(os.path.abspath(os.path.join(elisp_dir, 'tests.el')))
+ glob.glob(os.path.abspath(os.path.join(elisp_dir, '*-test.el')))
+ glob.glob(os.path.abspath(os.path.join(elisp_dir, '*-tests.el')))
)
return files_inc, files_exc
@functools.lru_cache()
def _package_name(recipe: str) -> str:
"""
>>> _package_name('(shx :files ...)')
'shx'
"""
return _tokenize_lisp_list(recipe)[1] if recipe else ''
def _main_file(recipe_files: list, recipe: str) -> str:
"""
>>> _main_file(['_elisp/a.el', '_elisp/b.el'], '(a :files ...)')
'_elisp/a.el'
>>> _main_file(['a.el', 'b.el'], '(b :files ...)')
'b.el'
>>> _main_file(['a.el', 'a-pkg.el'], '(a :files ...)')
'a-pkg.el'
"""
package_name = _package_name(recipe)
try:
return next(
el
for el in sorted(recipe_files)
if os.path.basename(el) == f"{package_name}-pkg.el"
or os.path.basename(el) == f"{package_name}.el"
)
except StopIteration:
return ''
def _write_requirements(recipe_files: list, recipe: str):
"""Create a little elisp script that Docker will run as setup."""
with open('_requirements.el', 'w') as requirements_el:
requirements_el.write(
'''
(require 'package)
(package-initialize)
(setq package-archives nil)
;; FIXME: is it still necessary to use GNU elpa mirror?
(add-to-list 'package-archives '("gnu" . "http://mirrors.163.com/elpa/gnu/"))
(add-to-list 'package-archives '("melpa" . "http://melpa.org/packages/"))
(add-to-list 'package-archives '("org" . "http://orgmode.org/elpa/"))
(package-refresh-contents)
(package-reinstall 'package-lint)
'''
)
for req in _requirements(recipe_files, recipe):
if req != 'emacs':
# TODO check if we need to reinstall outdated package?
# e.g. (package-installed-p 'map (version-to-list "2.0"))
requirements_el.write(f"(package-install '{req})\n")
if DEBUG:
requirements_el.write(f"(require '{req})\n")
def _requirements(
recipe_files: list, recipe: str = None, with_versions: bool = False
) -> set:
reqs: list = []
if recipe:
main_file = _main_file(recipe_files, recipe)
if main_file:
recipe_files = [main_file]
for filename in recipe_files:
if not os.path.isfile(filename):
continue
elif filename.endswith('-pkg.el'):
with open(filename, 'r') as pkg_el:
reqs.append(_reqs_from_pkg_el(pkg_el))
elif filename.endswith('.el'):
with open(filename, 'r') as el_file:
reqs.append(_reqs_from_el_file(el_file))
reqs = sum([req.split('(')[1:] for req in reqs], [])
reqs = [req.replace(')', '').strip().lower() for req in reqs if req.strip()]
if with_versions:
return set(reqs)
return {req.split('"')[0].strip() for req in reqs}
def _reqs_from_pkg_el(pkg_el: TextIO) -> str:
"""
>>> _reqs_from_pkg_el(io.StringIO('''(define-package "x" "1.2" "A pkg." '((emacs "31.5") (xyz "123.4"))'''))
'( ( emacs "31.5" ) ( xyz "123.4" ) )'
"""
reqs = pkg_el.read()
reqs = ' '.join(_tokenize_lisp_list(reqs))
reqs = reqs[reqs.find('( (') :]
reqs = reqs[: reqs.find(') )') + 3]
return reqs
def _reqs_from_el_file(el_file: TextIO) -> str:
"""
>>> _reqs_from_el_file(io.StringIO(';; x y z\\n ;; package-requires: ((emacs "24.4"))'))
';; package-requires: ((emacs "24.4"))'
"""
for line in el_file.readlines():
if re.match('[; ]*Package-Requires:', line, re.I):
return line.strip()
return ''
def check_license(recipe_files: list, elisp_dir: str, clone_address: str = None):
_note('\n### License ###\n', CLR_INFO)
repo_licensed = False
if clone_address:
repo_licensed = _check_license_github_api(clone_address)
if not repo_licensed:
repo_licensed = _check_license_file(elisp_dir)
individual_files_licensed = _check_license_in_files(recipe_files)
if not repo_licensed and not individual_files_licensed:
_fail('- Use a GPL-compatible license.')
print(
' See: https://www.gnu.org/licenses/license-list.en.html#GPLCompatibleLicenses'
)
def _check_license_github_api(clone_address: str) -> bool:
"""
>>> _check_license_github_api('https://github.com/magit/magit.git')
- GitHub API found `GNU General Public License v3.0`
True
"""
# TODO: gitlab also has a license API -- support it?
# e.g. https://gitlab.com/api/v4/users/jagrg/projects ?
if clone_address.endswith('.git'):
clone_address = clone_address[:-4]
match = re.search(r'github.com/([^"]*)', clone_address, flags=re.I)
if not match:
return False
repo_suffix = match.groups()[0].rstrip('/')
license_ = requests.get(f"{GITHUB_API}/{repo_suffix}").json().get('license')
if license_ and license_.get('name') in VALID_LICENSES_GITHUB:
print(f"- GitHub API found `{license_.get('name')}`")
return True
if license_:
_note(f"- GitHub API found `{license_.get('name')}`", CLR_WARN)
if license_.get('name') == 'Other':
_fail(' - Use a GitHub-compatible format for your license file.')
print(' See: https://github.com/licensee/licensee')
return False
_fail('- Use a LICENSE file that GitHub can detect (e.g. no markup).')
print(' See: https://github.com/licensee/licensee')
return False
def _check_license_file(elisp_dir: str) -> bool:
"""Scan any COPYING or LICENSE files."""
for license_ in glob.glob(os.path.join(elisp_dir, '*')):
license_ = os.path.basename(license_)
if license_.startswith('LICENSE') or license_.startswith('COPYING'):
with open(os.path.join(elisp_dir, license_)) as stream:
print(f"- {license_} excerpt: `{stream.readline().strip()}...`")
return True
_fail('- No LICENSE or COPYING file found in repository')
return False
def _check_license_in_files(elisp_files: list) -> bool:
"""Check the elisp files themselves."""
individual_files_licensed = True
for elisp_file in elisp_files:
if not elisp_file.endswith('.el'):
continue
license_ = _check_license_in_file(elisp_file)
basename = os.path.basename(elisp_file)
if not license_:
_fail(f"- {basename} has no detectable license text")
individual_files_licensed = False
else:
print(f"- {basename} has {license_} license text")
return individual_files_licensed
def _check_license_in_file(elisp_file: str) -> str:
"""Scan the elisp file for some recognized license text."""
# TODO: this function could be more comprehensive; don't use grep
licenses = {
'GPL': r'GNU.* General Public License',
'ISC': r'Permission to use, copy, modify, and/or',
'MIT': r'Permission is hereby granted, free of charge, to any person',
'Unlicense': 'This is free and unencumbered software released into the public domain',
}
for license_key, license_txt in licenses.items():
try:
subprocess.check_output(['grep', '-i', license_txt, elisp_file])
return license_key
except subprocess.CalledProcessError:
pass
return ''
def check_packaging(recipe_files: list, recipe: str):
if ':branch "master"' in recipe:
_fail('- No need to specify `:branch "master"` in recipe')
if 'gitlab' in recipe and (':repo' not in recipe or ':url' in recipe):
_fail('- With the GitLab fetcher you MUST set :repo and you MUST NOT set :url')
# MELPA looks for a -pkg.el file and if it finds it, it uses that. It is
# okay to have a -pkg.el file, but doing it incorrectly can break the build.
# If it can't find a -pkg.el file, it looks in <your-package-name>.el. If
# you put your package info in your main file then we can use package-lint
# to catch mistakes and enforce consistency.
if not _main_file(recipe_files, recipe):
package_name = _package_name(recipe)
_fail(f"- There is no .el file matching the package name '{package_name}'")
# In fact, if you have different Package-Requires among your source files,
# the Package-Requires that aren't in <your-package-name>.el are ignored,
# and there is at least one package in MELPA that accidentally does this.
all_requirements = set(_requirements(recipe_files, recipe))
for el in recipe_files:
el_requirements = set(_requirements([el]))
if el_requirements and el_requirements != all_requirements:
basename = os.path.basename(el)
_fail(f"- Package-Requires mismatch between {basename} and another file!")
def print_details(
recipe: str, recipe_files: list, pr_data: dict = None, clone_address: str = None
):
_note('\n### Details ###\n', CLR_INFO)
print(f"- `{recipe}`")
if ':files' in recipe:
_note(' - Prefer the default recipe, especially for small packages', CLR_WARN)
print('- Package-Requires: ', end='')
if _requirements(recipe_files):
print(', '.join(req for req in _requirements(recipe_files, with_versions=True)))
else:
print('n/a')
for recipe_file in recipe_files:
if os.path.isdir(recipe_file):
print(f"- {recipe_file}: (directory)")
continue
with open(recipe_file) as stream:
try:
header = stream.readline()
header = header.split('-*-')[0]
header = header.split(' --- ')[1]
header = header.strip()
except (IndexError, UnicodeDecodeError):
header = '(no elisp header)'
print(
f"- {CLR_ULINE}{recipe_file}{CLR_OFF}"
f" ({_check_license_in_file(recipe_file) or 'unknown license'})"
+ (f": {header}" if header else "")
)
if recipe_file.endswith('-pkg.el'):
_note(f" - Consider excluding this file; MELPA will create one", CLR_WARN)
if pr_data and clone_address:
# Check the maintainer
print(f"- PR by {pr_data['user']['login']}: {clone_address}")
if pr_data['user']['login'].lower() not in clone_address.lower():
_note(" - NOTE: Repo and recipe owner don't match", CLR_WARN)
def print_related_packages(recipe: str):
"""
Print list of potentially related packages.
Could throw ConnectionError.
"""
# TODO: can this be made more useful?
package_tokens = {
token for token in _package_name(recipe).split('-') if token != 'mode'
}
related_packages = [
f"- https://melpa.org/#/{_melpa_archive()[other_package_tokens]}"
for other_package_tokens in _melpa_archive()
if package_tokens & other_package_tokens
]
if related_packages:
_note('\n### Similarly named packages ###\n', CLR_INFO)
print('\n'.join(related_packages[:5]))
@functools.lru_cache()
def _melpa_archive() -> dict:
return {
frozenset(package.split('-')): package
for package in requests.get('http://melpa.org/archive.json').json()
}
def yes_p(text: str) -> bool:
while True:
keep = input(f"{text} [y/n] ").strip().lower()
if keep.startswith('y') or keep.startswith('n'):
break
return not keep.startswith('n')
def check_remote_package(recipe: str = ''):
"""Check a remotely-hosted package."""
name = _tokenize_lisp_list(recipe)[1]
with tempfile.TemporaryDirectory() as elisp_dir:
clone_address = _clone_address(name, recipe)
_clone(clone_address, _branch(recipe), into=elisp_dir)
run_checks(recipe, elisp_dir, clone_address)
def _clone(repo: str, branch: str, into: str):
"""
Raises RuntimeError if repo doesn't exist, and
subprocess.CalledProcessError if git clone fails.
"""
print(f"Cloning {repo}")
if not requests.get(repo).ok:
_fail(f"Unable to locate '{repo}'")
raise RuntimeError
subprocess.check_output(['mkdir', '-p', into])
# git clone prints to stderr, oddly enough:
subprocess.check_output(
['git', 'clone', '-b', branch, repo, into], stderr=subprocess.STDOUT
)
@functools.lru_cache()
def _branch(recipe: str) -> str:
"""
>>> _branch('(shx :branch "develop" ...)')
'develop'
"""
match = re.search(':branch "([^"]*)"', recipe)
if match:
return match.groups()[0]
return 'master'
def check_local_package(elisp_dir: str = None, package_name: str = None):
"""Check a locally-hosted package."""
elisp_dir = elisp_dir or input('Path: ').strip()
assert os.path.isdir(elisp_dir)
package_name = package_name or input(f"Name of package at {elisp_dir}: ")
recipe = f'({package_name or "NONAME"} :repo "N/A")'
run_checks(recipe, elisp_dir)
def check_melpa_pr(pr_url: str):
"""Check a PR on MELPA."""
match = re.search(MELPA_PR, pr_url) # MELPA_PR's 0th group has the number
assert match
pr_data = requests.get(f"{MELPA_PULL_API}/{match.groups()[0]}").json()
if 'changed_files' not in pr_data:
_note(f"{pr_url} does not appear to be a GitHub PR", CLR_ERROR)
return
if int(pr_data['changed_files']) != 1:
_note('Please only add one recipe per pull request', CLR_ERROR)
return
name, recipe = _name_and_recipe(pr_data['diff_url'])
if not name or not recipe:
_note(f"Unable to build the pull request at {pr_url}", CLR_ERROR)
return
clone_address: str = _clone_address(name, recipe)
with tempfile.TemporaryDirectory() as elisp_dir:
_clone(clone_address, _branch(recipe), into=elisp_dir)
return run_checks(recipe, elisp_dir, clone_address, pr_data)
@functools.lru_cache()
def _name_and_recipe(pr_data_diff_url: str) -> Tuple[str, str]:
"""Determine the filename and the contents of the user's recipe."""
# TODO: use https://developer.github.com/v3/repos/contents/ instead of 'patch'
with tempfile.TemporaryDirectory() as tmpdir:
try:
diff_text = requests.get(pr_data_diff_url).text
if (
'new file mode' not in diff_text
or 'a/recipes' not in diff_text
or 'b/recipes' not in diff_text
):
_note('This does not appear to add a new recipe', CLR_WARN)
return '', ''
recipe_name = diff_text.split('\n')[0].split('/')[-1]
diff_filename = os.path.join(tmpdir, 'diff')
recipe_filename = os.path.join(tmpdir, recipe_name)
with open(diff_filename, 'w') as diff_file:
diff_file.write(diff_text)
subprocess.check_output(
f"patch {recipe_filename} < {diff_filename}", shell=True
)
with open(recipe_filename) as recipe_file:
recipe = re.sub(r'\s+', ' ', recipe_file.read())
return recipe_name, recipe.strip()
except subprocess.CalledProcessError as err:
_note(str(err), CLR_WARN)
return '', ''
def _clone_address(name: str, recipe: str) -> str:
"""
This is a HACK to get the clone address from the
filename/recipe pair using the builtin MELPA machinery. As a
bonus, it validates the recipe.
>>> _clone_address('shx', '(shx :repo "riscy/shx-for-emacs" :fetcher github)')
'https://github.com/riscy/shx-for-emacs.git'
>>> _clone_address('pmdm', '(pmdm :fetcher hg :url "https://hg.serna.eu/emacs/pmdm")')
'https://hg.serna.eu/emacs/pmdm'
"""
with tempfile.TemporaryDirectory() as tmpdir:
with open(os.path.join(tmpdir, name), 'w') as recipe_file:
recipe_file.write(recipe)
with open(os.path.join(tmpdir, 'script.el'), 'w') as script:
script.write(
requests.get(
'https://raw.githubusercontent.com/melpa/melpa/master/'
'package-build/package-recipe.el'
).text
+ f"""(let ((package-build-recipes-dir "{tmpdir}"))
(send-string-to-terminal
(package-recipe--upstream-url
(package-recipe-lookup "{name}"))))"""
)
return subprocess.check_output(['emacs', '--script', script.name]).decode()
def check_melpa_pr_loop():
"""Check MELPA pull requests in a loop."""
for pr_url in _fetch_pull_requests():
print(f"Found MELPA PR {pr_url}")
check_melpa_pr(pr_url)
print('-' * 79)
def _fetch_pull_requests() -> Iterator[str]:
"""Repeatedly yield PR URL's."""
# TODO: only supports macOS (needs pbpaste or equivalents)
previous_pr_url = None
while True:
while True:
match = re.search(MELPA_PR, subprocess.check_output('pbpaste').decode())
pr_url = match.string[: match.end()] if match else None
if match and pr_url and pr_url != previous_pr_url:
break
print(
'Watching clipboard for MELPA PR... '
+ ('😐' if random.randint(0, 2) else '🤨'),
end='\r',
)
time.sleep(1)
previous_pr_url = pr_url
yield pr_url
if __name__ == '__main__':
if 'MELPA_PR_URL' in os.environ:
check_melpa_pr(os.environ['MELPA_PR_URL'])
sys.exit(return_code())
elif 'PKG_PATH' in os.environ and 'PKG_NAME' in os.environ:
check_local_package(os.environ['PKG_PATH'], os.environ['PKG_NAME'])
sys.exit(return_code())
elif 'RECIPE' in os.environ:
check_remote_package(os.environ['RECIPE'])
sys.exit(return_code())
else:
check_melpa_pr_loop()