#!/usr/bin/env python # Copyright (c) 2012 Trent Mick. # Copyright (c) 2007-2008 ActiveState Corp. # License: MIT (http://www.opensource.org/licenses/mit-license.php) r"""A fast and complete Python implementation of Markdown. [from http://daringfireball.net/projects/markdown/] > Markdown is a text-to-HTML filter; it translates an easy-to-read / > easy-to-write structured text format into HTML. Markdown's text > format is most similar to that of plain text email, and supports > features such as headers, *emphasis*, code blocks, blockquotes, and > links. > > Markdown's syntax is designed not as a generic markup language, but > specifically to serve as a front-end to (X)HTML. You can use span-level > HTML tags anywhere in a Markdown document, and you can use block level > HTML tags (like
tags.
"""
yield 0, ""
for tup in inner:
yield tup
yield 0, "
"
def wrap(self, source, outfile):
"""Return the source with a code, pre, and div."""
return self._wrap_div(self._wrap_pre(self._wrap_code(source)))
formatter_opts.setdefault("cssclass", "codehilite")
formatter = HtmlCodeFormatter(**formatter_opts)
return pygments.highlight(codeblock, lexer, formatter)
def _code_block_sub(self, match, is_fenced_code_block=False):
lexer_name = None
if is_fenced_code_block:
lexer_name = match.group(1)
if lexer_name:
formatter_opts = self.extras['fenced-code-blocks'] or {}
codeblock = match.group(2)
codeblock = codeblock[:-1] # drop one trailing newline
else:
codeblock = match.group(1)
codeblock = self._outdent(codeblock)
codeblock = self._detab(codeblock)
codeblock = codeblock.lstrip('\n') # trim leading newlines
codeblock = codeblock.rstrip() # trim trailing whitespace
# Note: "code-color" extra is DEPRECATED.
if "code-color" in self.extras and codeblock.startswith(":::"):
lexer_name, rest = codeblock.split('\n', 1)
lexer_name = lexer_name[3:].strip()
codeblock = rest.lstrip("\n") # Remove lexer declaration line.
formatter_opts = self.extras['code-color'] or {}
# Use pygments only if not using the highlightjs-lang extra
if lexer_name and "highlightjs-lang" not in self.extras:
def unhash_code(codeblock):
for key, sanitized in list(self.html_spans.items()):
codeblock = codeblock.replace(key, sanitized)
replacements = [
("&", "&"),
("<", "<"),
(">", ">")
]
for old, new in replacements:
codeblock = codeblock.replace(old, new)
return codeblock
lexer = self._get_pygments_lexer(lexer_name)
if lexer:
codeblock = unhash_code( codeblock )
colored = self._color_with_pygments(codeblock, lexer,
**formatter_opts)
return "\n\n%s\n\n" % colored
codeblock = self._encode_code(codeblock)
pre_class_str = self._html_class_str_from_tag("pre")
if "highlightjs-lang" in self.extras and lexer_name:
code_class_str = ' class="%s"' % lexer_name
else:
code_class_str = self._html_class_str_from_tag("code")
return "\n\n%s\n
\n\n" % (
pre_class_str, code_class_str, codeblock)
def _html_class_str_from_tag(self, tag):
"""Get the appropriate ' class="..."' string (note the leading
space), if any, for the given tag.
"""
if "html-classes" not in self.extras:
return ""
try:
html_classes_from_tag = self.extras["html-classes"]
except TypeError:
return ""
else:
if tag in html_classes_from_tag:
return ' class="%s"' % html_classes_from_tag[tag]
return ""
def _do_code_blocks(self, text):
"""Process Markdown `` blocks."""
code_block_re = re.compile(r'''
(?:\n\n|\A\n?)
( # $1 = the code block -- one or more lines, starting with a space/tab
(?:
(?:[ ]{%d} | \t) # Lines must start with a tab or a tab-width of spaces
.*\n+
)+
)
((?=^[ ]{0,%d}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
# Lookahead to make sure this block isn't already in a code block.
# Needed when syntax highlighting is being used.
(?![^<]*\
)
''' % (self.tab_width, self.tab_width),
re.M | re.X)
return code_block_re.sub(self._code_block_sub, text)
_fenced_code_block_re = re.compile(r'''
(?:\n+|\A\n?)
^```\s*?([\w+-]+)?\s*?\n # opening fence, $1 = optional lang
(.*?) # $2 = code block content
^```[ \t]*\n # closing fence
''', re.M | re.X | re.S)
def _fenced_code_block_sub(self, match):
return self._code_block_sub(match, is_fenced_code_block=True)
def _do_fenced_code_blocks(self, text):
"""Process ```-fenced unindented code blocks ('fenced-code-blocks' extra)."""
return self._fenced_code_block_re.sub(self._fenced_code_block_sub, text)
# Rules for a code span:
# - backslash escapes are not interpreted in a code span
# - to include one or or a run of more backticks the delimiters must
# be a longer run of backticks
# - cannot start or end a code span with a backtick; pad with a
# space and that space will be removed in the emitted HTML
# See `test/tm-cases/escapes.text` for a number of edge-case
# examples.
_code_span_re = re.compile(r'''
(?%s
" % c
def _do_code_spans(self, text):
# * Backtick quotes are used for
spans.
#
# * You can use multiple backticks as the delimiters if you want to
# include literal backticks in the code span. So, this input:
#
# Just type ``foo `bar` baz`` at the prompt.
#
# Will translate to:
#
# Just type foo `bar` baz
at the prompt.
`bar`
...
return self._code_span_re.sub(self._code_span_sub, text)
def _encode_code(self, text):
"""Encode/escape certain characters inside Markdown code runs.
The point is that in code, these characters are literals,
and lose their special Markdown meanings.
"""
replacements = [
# Encode all ampersands; HTML entities are not
# entities within a Markdown code span.
('&', '&'),
# Do the angle bracket song and dance:
('<', '<'),
('>', '>'),
]
for before, after in replacements:
text = text.replace(before, after)
hashed = _hash_text(text)
self._escape_table[text] = hashed
return hashed
_strike_re = re.compile(r"~~(?=\S)(.+?)(?<=\S)~~", re.S)
def _do_strike(self, text):
text = self._strike_re.sub(r".+?)', re.S) def _dedent_two_spaces_sub(self, match): return re.sub(r'(?m)^ ', '', match.group(1)) def _block_quote_sub(self, match): bq = match.group(1) is_spoiler = 'spoiler' in self.extras and self._bq_all_lines_spoilers.match(bq) # trim one level of quoting if is_spoiler: bq = self._bq_one_level_re_spoiler.sub('', bq) else: bq = self._bq_one_level_re.sub('', bq) # trim whitespace-only lines bq = self._ws_only_line_re.sub('', bq) bq = self._run_block_gamut(bq) # recurse bq = re.sub('(?m)^', ' ', bq) # These leading spaces screw with
content, so we need to fix that: bq = self._html_pre_block_re.sub(self._dedent_two_spaces_sub, bq) if is_spoiler: return '\n%s\n\n\n' % bq else: return '\n%s\n\n\n' % bq def _do_block_quotes(self, text): if '>' not in text: return text if 'spoiler' in self.extras: return self._block_quote_re_spoiler.sub(self._block_quote_sub, text) else: return self._block_quote_re.sub(self._block_quote_sub, text) def _form_paragraphs(self, text): # Strip leading and trailing lines: text = text.strip('\n') # Wraptags. grafs = [] for i, graf in enumerate(re.split(r"\n{2,}", text)): if graf in self.html_blocks: # Unhashify HTML blocks grafs.append(self.html_blocks[graf]) else: cuddled_list = None if "cuddled-lists" in self.extras: # Need to put back trailing '\n' for `_list_item_re` # match at the end of the paragraph. li = self._list_item_re.search(graf + '\n') # Two of the same list marker in this paragraph: a likely # candidate for a list cuddled to preceding paragraph # text (issue 33). Note the `[-1]` is a quick way to # consider numeric bullets (e.g. "1." and "2.") to be # equal. if (li and len(li.group(2)) <= 3 and ( (li.group("next_marker") and li.group("marker")[-1] == li.group("next_marker")[-1]) or li.group("next_marker") is None ) ): start = li.start() cuddled_list = self._do_lists(graf[start:]).rstrip("\n") assert cuddled_list.startswith("
tags. graf = self._run_span_gamut(graf) grafs.append("
" % self._html_class_str_from_tag('p') + graf.lstrip(" \t") + "
") if cuddled_list: grafs.append(cuddled_list) return "\n\n".join(grafs) def _add_footnotes(self, text): if self.footnotes: footer = [ '%s
" % backlink) footer.append('