Rename the project as recommonmark

This commit is contained in:
Luca Barbato 2014-10-17 22:29:41 +02:00
parent f5ddea5f9d
commit fec4719f9f
7 changed files with 64 additions and 570 deletions

View file

@ -1,31 +1,35 @@
# remarkdown
# recommonmark
A markdown parser based on `docutils`
A `docutils`-compatibility bridge to [CommonMark][cm] derived from
[remarkdown][rmd].
**Note that this code is still alpha, some markdown features might not work yet**
## Why a bridge?
## Why another markdown library?
Many python tools (mostly for documentation creation) rely on `docutils`.
But [docutils][dc] only supports a ReStructuredText syntax.
remarkdown is not just only another markdown library. It mostly contains a parser
that outputs a [`docutils` document tree][docutils]. The different scripts
bundled then use `docutils` for generation of different types of documents.
For instance [this issue][sphinx-issue] and [this StackOverflow
question][so-question] show that there is an interest in allowing `docutils`
to use markdown as an alternative syntax.
Why is this important? Many python tools (mostly for documentation creation)
rely on `docutils`. But `docutils` only supports a ReStructuredText syntax. For
instance [this issue][sphinx-issue] and [this StackOverflow
question][so-question] show that there is an interest in allowing `docutils` to
use markdown as an alternative syntax.
## Why another bridge to docutils?
[docutils]: http://docutils.sourceforge.net/docs/ref/doctree.html
[sphinx-issue]: https://bitbucket.org/birkenfeld/sphinx/issue/825/markdown-capable-sphinx
[so-question]: http://stackoverflow.com/questions/2471804/using-sphinx-with-markdown-instead-of-rst
recommonmark uses the [python implementation][pcm] of [CommonMark][cm] while
[remarkdown][rmd] implements a stand-alone parser leveraging [parsley][prs].
Both output a [`docutils` document tree][dc] and provide scripts
that leverage `docutils` for generation of different types of documents.
## Acknowledgement
The remarkdown PEG is heavily inspired by [peg-markdown by John
MacFarlane][peg-md].
[peg-md]: https://github.com/jgm/peg-markdown
recommonmark is mainly derived from [remarkdown][rmd] by Steve Genoud and
leverages the python CommonMark implementation.
[cm]: http://commonmark.org
[pcm]: https://github.com/rolandshoemaker/CommonMark-py
[rmd]: https://github.com/sgenoud/remarkdown
[prs]: https://github.com/python-parsley/parsley
[dc]: http://docutils.sourceforge.net/docs/ref/doctree.html
[sphinx-issue]: https://bitbucket.org/birkenfeld/sphinx/issue/825/markdown-capable-sphinx
[so-question]: http://stackoverflow.com/questions/2471804/using-sphinx-with-markdown-instead-of-rst

View file

@ -6,7 +6,7 @@ from docutils import parsers, nodes
from CommonMark import DocParser
from warnings import warn
__all__ = ['MarkdownParser']
__all__ = ['CommonMarkParser']
def flatten(iterator):
return itertools.chain.from_iterable(iterator)
@ -33,7 +33,7 @@ class _SectionHandler(object):
self._prune_levels(level)
class MarkdownParser(object, parsers.Parser):
class CommonMarkParser(object, parsers.Parser):
supported = ('md', 'markdown')
def convert_blocks(self, blocks):
@ -153,7 +153,7 @@ class MarkdownParser(object, parsers.Parser):
target_node['refuri'] = block.destination
if title:
if block.title:
target_node['title'] = block.title
self.current_node.append(target_node)

View file

@ -15,40 +15,53 @@ except:
pass
from docutils.core import publish_cmdline, default_description
from remarkdown.parser import MarkdownParser
from recommonmark.parser import CommonMarkParser
def md2html():
def cm2html():
description = ('Generate html document from markdown sources. '
+ default_description)
publish_cmdline(writer_name='html',
parser=MarkdownParser(),
parser=CommonMarkParser(),
description=description)
def md2xml():
def cm2man():
description = ('Generate a manpage from markdown sources. '
+ default_description)
publish_cmdline(writer_name='manpage',
parser=CommonMarkParser(),
description=description)
def cm2xml():
description = ('Generate XML document from markdown sources. '
+ default_description)
publish_cmdline(writer_name='xml',
parser=MarkdownParser(),
parser=CommonMarkParser(),
description=description)
def md2pseudoxml():
def cm2pseudoxml():
description = ('Generate pseudo-XML document from markdown sources. '
+ default_description)
publish_cmdline(writer_name='pseudoxml',
parser=MarkdownParser(),
parser=CommonMarkParser(),
description=description)
def md2latex():
def cm2latex():
description = ('Generate latex document from markdown sources. '
+ default_description)
publish_cmdline(writer_name='latex',
parser=MarkdownParser(),
parser=CommonMarkParser(),
description=description)
def md2xetex():
def cm2xetex():
description = ('Generate xetex document from markdown sources. '
+ default_description)
publish_cmdline(writer_name='latex',
parser=MarkdownParser(),
parser=CommonMarkParser(),
description=description)

View file

@ -1,314 +0,0 @@
# Basic Elements
## Spaces
spacechar = ' ' | '\t'
sp = spacechar*
newline = '\n' | '\r' '\n'?
blank_line = sp newline
spnl = sp (newline sp)?
## Characters
nonspacechar = ~spacechar ~newline anything
special_char = '~' | '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"'
normal_char = ~(special_char | spacechar | newline) anything
escapable_char = :x ?(x in "-\\`|*_{}[\]()#+.!><") -> x
ascii_char = :x ?(x in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") -> x
## HTML entities
hex_entity = <'&' '#' ('X' | 'x') hexdigit+ ';'>
dec_entity = <'&' '#' digit+ ';'>
char_entity = <'&' letterOrDigit+ ';'>
## Indentation
indent = ' '{4} | '\t'
nonindent_space = ' '{0,3}
## End of Line
endline = line_break | terminal_endline | normal_endline
normal_endline = sp newline
~blank_line
~'>'
~(line ('='+ | '-'+) newline)
~atx_start
-> '\n'
terminal_endline = sp newline end -> '\n'
line_break = " " normal_endline -> dict(id_='line_break')
## Line definitions
line = <(~'\r' ~'\n' anything)* newline> | (<anything+> end)
indented_line = indent line
optionally_indented_line = indent? line
non_blank_indented_line = ~blank_line indented_line
# Inline parsing
inline = text
| endline
| ul_or_star_line
| space
| emph
| strong
| image
| link
| code
| raw_html
| entity
| escaped_char
| symbol
inlines = inline*
space = spacechar+ -> ' '
text = <normal_char+ text_chunk*>
text_chunk = <normal_char | ('_' ~~letterOrDigit)+> | apos_chunk
# should be part of the smart extension
apos_chunk = '\'' ~~letterOrDigit
-> dict(id_='apostrophe')
escaped_char = '\\' ~newline <escapable_char>
entity = (hex_entity | dec_entity | char_entity):en
-> dict(id_='html', children=en)
symbol = <special_char>
# This keeps the parser from getting bogged down on long strings of '*' or '_',
# or strings of '*' or '_' with space on each side:
ul_or_star_line = (ul_line | star_line)
star_line = <'****' '*'*> | <spacechar '*'+ ~~spacechar>
ul_line = <'____' '_'*> | <spacechar '_'+ ~~spacechar>
whitespace = spacechar | newline
emph = (emph_star | emph_ul):child
-> builder.emph(child)
emph_star = '*' ~whitespace (~'*' inline | strong_star)+:txt '*'
-> txt
emph_ul = '_' ~whitespace (~'_' inline | strong_ul)+:txt '_'
-> txt
strong = (strong_star | strong_ul):child
-> builder.strong(child)
strong_star = '**' ~whitespace (~'**' inline)+:txt '**'
-> txt
strong_ul = '__' ~whitespace (~'__' inline)+:txt '__'
-> txt
#TODO: make the ~^ part of the notes extentions
label = '[' (~'^')
(~']' inline)*:label_elements
']'
-> label_elements
image = '!' (explicit_link | reference_link):link
-> builder.image(**link)
link = (explicit_link | reference_link | auto_link):link
-> builder.reference(**link)
explicit_link = label:link_label '(' sp source:url spnl title?:title sp ')'
-> dict(content=link_label, uri=url, title=title)
source = '<' <source_contents>:so '>' -> so
| <source_contents>
source_contents = (( ~'(' ~')' ~'>' nonspacechar)+ | '(' source_contents ')')*
reference_link = reference_link_double | reference_link_single
reference_link_double = label:label spnl ~"[]" label:description
-> dict(content=label, label=description)
reference_link_single = label:label (spnl "[]")?
-> dict(content=label)
title = ('\'' | '"'):quote
<(~(exactly(quote) sp ( ')' | newline)) anything)*>:title
exactly(quote)
-> title
auto_link = auto_link_url | auto_link_email
auto_link_url = '<' <ascii_char+ "://" (~newline ~'>' anything)+>:url '>'
-> dict(uri=url, content=url)
email_special_char = '-' | '+' | '_' | '.' | '/' | '!' | '%'| '~' | '$'
auto_link_email = '<' ( "mailto:" )?
<(email_special_char | letterOrDigit)+
'@' ( ~newline ~'>' anything )+>:address '>'
-> dict(uri='mailto:' + address, content=address)
target = nonindent_space ~'[]' label:label
':' spnl ref_src:src ref_title?:title blank_line+
-> builder.target(label=label, uri=src, title=title)
ref_src = <nonspacechar+>
ref_title = ref_title_quote | ref_title_parens
ref_title_quote = spnl ('\'' | '\"'):quote
<(~(exactly(quote) sp newline | newline) anything)*>:title
exactly(quote)
-> title
ref_title_parens = spnl '(' <(~(')' sp newline | newline) anything)*>:title ')'
-> title
references = (target | skip_block)*
code = (<'`'+ ~'`'>:t sp
<((~'`' nonspacechar)+
| ~(exactly(t) ~'`') '`'+
| ~(sp exactly(t) ~'`') (spacechar | newline ~blank_line))+>:code
sp exactly(t) ~'`')
-> builder.literal(code)
quoted = '"' (~'"' anything)* '"' | '\'' (~'\'' anything)* '\''
html_attribute = <(letterOrDigit | '-')+ spnl ('=' spnl (quoted | (~'>' nonspacechar)+))? spnl>
html_comment = <"<!--" (~"-->" anything)* "-->">
html_tag = <'<' spnl '/'? letter+ spnl html_attribute* '/'? spnl '>'>
#TODO: Add html_block_script
raw_html = (html_comment | html_tag):html
-> dict(id_='html_tag', data=html)
# Blocks definitions
## Block Quote
quote = quote_lines+:q
-> builder.quote(''.join(q))
quote_line = '>' ' '? line:quote
-> quote
lazy_quote_line = ~'>' ~blank_line line:quote
-> quote
quote_lines = quote_line:first lazy_quote_line*:rest blank_line*:blank
-> first + ''.join(rest) + ('\n' if blank else '')
## Verbatim
verbatim = verbatim_chunk+:chunks
-> builder.verbatim(chunks)
verbatim_chunk = blank_line*:blank
non_blank_indented_line+:nbil
-> (['\n'] * len(blank) if blank else []) + nbil
## Horizontal Rule
horizontal_rule = nonindent_space
(('*' sp){3} ('*' sp)*
| ('-' sp){3} ('-' sp)*
| ('_' sp){3} ('_' sp)*)
sp newline blank_line+
-> builder.horizontal_rule()
## Headings
heading = setext_heading | atx_heading
atx_heading = atx_start:level sp atx_inline+:txt (sp '#'* sp)? newline
-> builder.section(txt, level)
atx_inline = ~newline ~(sp '#'* sp newline) inline
atx_start = '#'{1,6}:x -> len(x)
setext_heading = (setext_heading1 | setext_heading2):(txt, level)
-> builder.section(txt, level)
settext_bottom1 = '='+ newline
settext_bottom2 = '-'+ newline
settext_inline = (~endline inline)+:txt sp newline -> txt
setext_heading1 = ~~(line settext_bottom1) settext_inline:txt settext_bottom1 -> txt, 1
setext_heading2 = ~~(line settext_bottom2) settext_inline:txt settext_bottom2 -> txt, 2
## Bullet and Ordered lists
bullet = ~horizontal_rule nonindent_space ('*' | '-' | '+') spacechar+
enumerator = nonindent_space digit+ '.' spacechar+
bullet_list = ~~bullet (list_tight | list_loose):list_items
-> builder.bullet_list(list_items)
ordered_list = ~~enumerator (list_tight | list_loose):list_items
-> builder.ordered_list(list_items)
list_loose = (list_item_loose:it blank_line* -> it)+:items
-> items
list_item_loose = (bullet | enumerator)
list_block:item_block
(list_continuation_block)*:continuation
-> item_block + ''.join(continuation)
list_tight = list_item_tight+:items blank_line* ~(bullet | enumerator)
-> items
list_item_tight = (bullet | enumerator)
list_block:item_block
(~blank_line list_continuation_block)*:continuation
~list_continuation_block
-> item_block + ''.join(continuation)
list_block = ~blank_line line:first list_block_line*:rest
-> first + '\n'.join(rest)
list_block_line = ~blank_line
~(indent? (bullet | enumerator))
~horizontal_rule
optionally_indented_line:line
-> line
# TODO add block separator when blankline is not empty
list_continuation_block = blank_line*:blanks
(indent list_block:b -> b)+:block
-> ''.join(blanks) + ''.join(block)
## HTML blocks
tag_name = "address" | "blockquote" | "center" | "dir" | "div" | "dl"
| "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr"
| "isindex" | "menu" | "noframes" | "noscript" | "ol" | "p" | "pre" | "table"
| "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th"
| "thead" | "tr" | "script" | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR"
| "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | "H4" | "H5" | "H6"
| "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "P" | "PRE"
| "TABLE" | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT"
| "TH" | "THEAD" | "TR" | "SCRIPT"
#TODO: make the tags case insensitive
html_block_in_tags = <'<' spnl tag_name:my_tag spnl html_attribute* '>'
( ~('<' '/' spnl exactly(my_tag) spnl '>') anything)*
~('<' '/' spnl exactly(my_tag) spnl '>')>
html_block_self_closing = '<' spnl tag_name spnl html_attribute* '/' spnl '>'
html_block = < ( html_block_in_tags | html_comment | html_block_self_closing ) >
## Paragraph
paragraph = nonindent_space inlines:d ~~blank_line
-> builder.paragraph(d)
## Being extension ready
do_nothing = ~'.' '.'
begin_hook = do_nothing
before_verbatim = do_nothing
before_horizontal_rule = do_nothing
before_lists = do_nothing
before_paragraph = do_nothing
before_plain = do_nothing
before_heading = do_nothing
block = blank_line* (
begin_hook
| quote
| before_verbatim
| verbatim
| target
| before_horizontal_rule
| horizontal_rule
| before_heading
| heading
| before_lists
| bullet_list
| ordered_list
| before_paragraph
| html_block
| paragraph
| before_plain
)
document = block*:p blank_line* -> p

View file

@ -1,207 +0,0 @@
from contextlib import contextmanager
import itertools
import os.path
from docutils import parsers, nodes
import parsley
__all__ = ['MarkdownParser']
def flatten(iterator):
return itertools.chain.from_iterable(iterator)
class _SectionHandler(object):
def __init__(self, document):
self._level_to_elem = {0: document}
def _parent_elem(self, child_level):
parent_level = max(level for level in self._level_to_elem
if child_level > level)
return self._level_to_elem[parent_level]
def _prune_levels(self, limit_level):
self._level_to_elem = dict((level, elem)
for level, elem in self._level_to_elem.items()
if level <= limit_level)
def add_new_section(self, section, level):
parent = self._parent_elem(level)
parent.append(section)
self._level_to_elem[level] = section
self._prune_levels(level)
class MarkdownParser(object, parsers.Parser):
supported = ('md', 'markdown')
def parse(self, inputstring, document):
self.setup_parse(inputstring, document)
self.document = document
self.current_node = document
self.section_handler = _SectionHandler(document)
base = os.path.dirname(os.path.abspath(__file__))
filename = os.path.join(base, "markdown.parsley")
with open(filename) as pry_file:
self.grammar_raw = pry_file.read()
self.grammar = parsley.makeGrammar(
self.grammar_raw,
dict(builder=self),
name='Markdown'
)
self.grammar(inputstring + '\n').document()
self.finish_parse()
@contextmanager
def _temp_current_node(self, current_node):
saved_node = self.current_node
self.current_node = current_node
yield
self.current_node = saved_node
# Blocks
def section(self, text, level):
new_section = nodes.section()
new_section['level'] = level
title_node = nodes.title()
append_inlines(title_node, text)
new_section.append(title_node)
self.section_handler.add_new_section(new_section, level)
self.current_node = new_section
def verbatim(self, text):
verbatim_node = nodes.literal_block()
text = ''.join(flatten(text))
if text.endswith('\n'):
text = text[:-1]
verbatim_node.append(nodes.Text(text))
self.current_node.append(verbatim_node)
def paragraph(self, text):
p = nodes.paragraph()
append_inlines(p, text)
self.current_node.append(p)
def quote(self, text):
q = nodes.block_quote()
with self._temp_current_node(q):
self.grammar(text).document()
self.current_node.append(q)
def _build_list(self, items, node):
for item in items:
list_item = nodes.list_item()
with self._temp_current_node(list_item):
self.grammar(item + "\n\n").document()
node.append(list_item)
return node
def bullet_list(self, items):
bullet_list = nodes.bullet_list()
self._build_list(items, bullet_list)
self.current_node.append(bullet_list)
def ordered_list(self, items):
ordered_list = nodes.enumerated_list()
self._build_list(items, ordered_list)
self.current_node.append(ordered_list)
def horizontal_rule(self):
self.current_node.append(nodes.transition())
def target(self, label, uri, title):
target_node = nodes.target()
target_node['names'].append(make_refname(label))
target_node['refuri'] = uri
if title:
target_node['title'] = title
self.current_node.append(target_node)
# Inlines
def emph(self, inlines):
emph_node = nodes.emphasis()
append_inlines(emph_node, inlines)
return emph_node
def strong(self, inlines):
strong_node = nodes.strong()
append_inlines(strong_node, inlines)
return strong_node
def literal(self, inlines):
literal_node = nodes.literal()
append_inlines(literal_node, inlines)
return literal_node
def reference(self, content, label=None, uri=None, title=None):
ref_node = nodes.reference()
label = make_refname(content if label is None else label)
ref_node['name'] = label
if uri is not None:
ref_node['refuri'] = uri
else:
ref_node['refname'] = label
self.document.note_refname(ref_node)
if title:
ref_node['title'] = title
append_inlines(ref_node, content)
return ref_node
def image(self, content, label=None, uri=None, title=None):
label = make_refname(content if label is None else label)
if uri is not None:
img_node = nodes.image()
img_node['uri'] = uri
else:
img_node = nodes.substitution_reference()
img_node['refname'] = label
self.document.note_refname(img_node)
if title:
img_node['title'] = title
img_node['alt'] = text_only(content)
return img_node
def _is_string(val):
return isinstance(val, basestring)
def make_refname(label):
return text_only(label).lower()
def text_only(nodes):
return "".join(s if _is_string(s) else text_only(s.children)
for s in nodes)
def append_inlines(parent_node, inlines):
for is_text, elem_group in itertools.groupby(inlines, _is_string):
if is_text:
parent_node.append(nodes.Text("".join(elem_group)))
else:
map(parent_node.append, elem_group)

View file

@ -2,27 +2,25 @@
# -*- coding: utf-8 -*-
'''
File: setup.py
Author: Steve Genoud
Date: 2013-08-25
Author: Steve Genoud and Luca Barbato
Date: 2014-10-17
'''
from setuptools import setup
import remarkdown
import recommonmark
setup(name='remarkdown',
version=remarkdown.__version__,
setup(name='recommonmark',
version=recommonmark.__version__,
install_requires=[
'Parsley>= 1.2',
'commonmark>= 0.5.4',
'docutils>=0.11'
],
entry_points={'console_scripts': [
'md2html = remarkdown.scripts:md2html',
'md2xml = remarkdown.scripts:md2xml',
'md2pseudoxml = remarkdown.scripts:md2pseudoxml',
'md2latex = remarkdown.scripts:md2latex',
'md2xetex = remarkdown.scripts:md2xetex',
'cm2html = recommonmark.scripts:cm2html',
'cm2latex = recommonmark.scripts:cm2latex',
'cm2man = recommonmark.scripts:cm2man',
'cm2pseudoxml = recommonmark.scripts:cm2pseudoxml',
'cm2xetex = recommonmark.scripts:cm2xetex',
'cm2xml = recommonmark.scripts:cm2xml',
]},
package_data = {
'': ['*.parsley']
},
packages=['remarkdown']
packages=['recommonmark']
)