[Python-modules-commits] [commonmark-bkrs] 01/03: Import commonmark-bkrs_0.5.4+ds.orig.tar.xz
Jerome Benoit
calculus-guest at moszumanska.debian.org
Tue Oct 4 00:32:29 UTC 2016
This is an automated email from the git hooks/post-receive script.
calculus-guest pushed a commit to branch master
in repository commonmark-bkrs.
commit 8e7c099cf02a2bfa86c3308a4962fc72220663db
Author: Jerome Benoit <calculus at rezozer.net>
Date: Tue Oct 4 01:00:09 2016 +0100
Import commonmark-bkrs_0.5.4+ds.orig.tar.xz
---
CommonMark/CommonMark.py | 1379 ++++++++
CommonMark/__init__.py | 6 +
CommonMark/entitytrans.py | 2349 +++++++++++++
CommonMark/test/test-CommonMark.py | 157 +
LICENSE | 32 +
MANIFEST.in | 8 +
README.md | 77 +
README.rst | 111 +
bin/cmark.py | 32 +
docs/CommonMark.py.html | 6676 ++++++++++++++++++++++++++++++++++++
setup.py | 30 +
spec.txt | 6149 +++++++++++++++++++++++++++++++++
12 files changed, 17006 insertions(+)
diff --git a/CommonMark/CommonMark.py b/CommonMark/CommonMark.py
new file mode 100755
index 0000000..31ca501
--- /dev/null
+++ b/CommonMark/CommonMark.py
@@ -0,0 +1,1379 @@
+#!/usr/bin/env python
+# 2014 - Bibek Kafle & Roland Shoemaker
+# Port of @jgm's JavaScript stmd.js implementation of the CommonMark spec
+
+# Basic usage:
+#
+# import CommonMark
+# parser = CommonMark.DocParser()
+# renderer = CommonMark.HtmlRenderer()
+# print(renderer.render(parser.parse('Hello *world*')))
+import re, sys, argparse, json
+
+# if python3 use html.parser and urllib.parse, else use HTMLParser and urllib
+if sys.version_info >= (3, 0):
+ import urllib.parse
+ if sys.version_info >= (3, 4):
+ import html.parser
+ HTMLunescape = html.parser.HTMLParser().unescape
+ else:
+ from .entitytrans import _unescape
+ HTMLunescape = _unescape
+ HTMLquote = urllib.parse.quote
+ HTMLunquote = urllib.parse.unquote
+ URLparse = urllib.parse.urlparse
+else:
+ import urllib, urlparse
+ import entitytrans
+ HTMLunescape = entitytrans._unescape
+ HTMLquote = urllib.quote
+ HTMLunquote = urllib.unquote
+ URLparse = urlparse.urlparse
+
+# Some of the regexps used in inline parser :<
+
+ESCAPABLE = '[!"#$%&\'()*+,./:;<=>?@[\\\\\\]^_`{|}~-]'
+ESCAPED_CHAR = '\\\\' + ESCAPABLE
+IN_DOUBLE_QUOTES = '"(' + ESCAPED_CHAR + '|[^"\\x00])*"'
+IN_SINGLE_QUOTES = '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\''
+IN_PARENS = '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\)'
+REG_CHAR = '[^\\\\()\\x00-\\x20]'
+IN_PARENS_NOSP = '\\((' + REG_CHAR + '|' + ESCAPED_CHAR + ')*\\)'
+TAGNAME = '[A-Za-z][A-Za-z0-9]*'
+BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'
+ATTRIBUTENAME = '[a-zA-Z_:][a-zA-Z0-9:._-]*'
+UNQUOTEDVALUE = "[^\"'=<>`\\x00-\\x20]+"
+SINGLEQUOTEDVALUE = "'[^']*'"
+DOUBLEQUOTEDVALUE = '"[^"]*"'
+ATTRIBUTEVALUE = "(?:" + UNQUOTEDVALUE + "|" + \
+ SINGLEQUOTEDVALUE + "|" + DOUBLEQUOTEDVALUE + ")"
+ATTRIBUTEVALUESPEC = "(?:" + "\\s*=" + "\\s*" + ATTRIBUTEVALUE + ")"
+ATTRIBUTE = "(?:" + "\\s+" + ATTRIBUTENAME + ATTRIBUTEVALUESPEC + "?)"
+OPENTAG = "<" + TAGNAME + ATTRIBUTE + "*" + "\\s*/?>"
+CLOSETAG = "</" + TAGNAME + "\\s*[>]"
+OPENBLOCKTAG = "<" + BLOCKTAGNAME + ATTRIBUTE + "*" + "\\s*/?>"
+CLOSEBLOCKTAG = "</" + BLOCKTAGNAME + "\\s*[>]"
+HTMLCOMMENT = "<!--([^-]+|[-][^-]+)*-->"
+PROCESSINGINSTRUCTION = "[<][?].*?[?][>]"
+DECLARATION = "<![A-Z]+" + "\\s+[^>]*>"
+CDATA = "<!\\[CDATA\\[([^\\]]+|\\][^\\]]|\\]\\][^>])*\\]\\]>"
+HTMLTAG = "(?:" + OPENTAG + "|" + CLOSETAG + "|" + HTMLCOMMENT + \
+ "|" + PROCESSINGINSTRUCTION + "|" + DECLARATION + "|" + CDATA + ")"
+HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + \
+ "[\\s/>]" + "|" + "/" + \
+ BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"
+
+reHtmlTag = re.compile('^' + HTMLTAG, re.IGNORECASE)
+reHtmlBlockOpen = re.compile('^' + HTMLBLOCKOPEN, re.IGNORECASE)
+reLinkTitle = re.compile(
+ '^(?:"(' + ESCAPED_CHAR + '|[^"\\x00])*"' + '|' + '\'(' + ESCAPED_CHAR + '|[^\'\\x00])*\'' + '|' + '\\((' + ESCAPED_CHAR + '|[^)\\x00])*\\))')
+reLinkDestinationBraces = re.compile(
+ '^(?:[<](?:[^<>\\n\\\\\\x00]' + '|' + ESCAPED_CHAR + '|' + '\\\\)*[>])')
+reLinkDestination = re.compile(
+ '^(?:' + REG_CHAR + '+|' + ESCAPED_CHAR + '|' + IN_PARENS_NOSP + ')*')
+reEscapable = re.compile(ESCAPABLE)
+reAllEscapedChar = '\\\\(' + ESCAPABLE + ')'
+reEscapedChar = re.compile('^\\\\(' + ESCAPABLE + ')')
+reAllTab = re.compile("\t")
+reHrule = re.compile(r"^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$")
+
+# Matches a character with a special meaning in markdown,
+# or a string of non-special characters.
+reMain = r"^(?:[\n`\[\]\\!<&*_]|[^\n`\[\]\\!<&*_]+)"
+
+# Utility functions
+
+def ASTtoJSON(block):
+ """ Output AST in JSON form, this is destructive of block."""
+ def prepare(block):
+ """ Strips circular 'parent' references and trims empty block elements."""
+ if block.parent:
+ block.parent = None
+ if not block.__dict__['isOpen'] is None:
+ block.__dict__['open'] = block.isOpen
+ del(block.isOpen)
+ # trim empty elements...
+ for attr in dir(block):
+ if not callable(attr) and not attr.startswith("__") and not attr == "makeBlock":
+ if block.__dict__[attr] in ["", [], None, {}]:
+ del(block.__dict__[attr])
+ if 'children' in block.__dict__ and len(block.children) > 0:
+ for i, child in enumerate(block.children):
+ block.children[i] = prepare(child)
+ if 'inline_content' in block.__dict__ and len(block.inline_content) > 0:
+ for i, child in enumerate(block.inline_content):
+ block.inline_content[i] = prepare(child)
+ if 'label' in block.__dict__ and len(block.label) > 0:
+ for i, child in enumerate(block.label):
+ block.label[i] = prepare(child)
+ if 'c' in block.__dict__ and type(block.c) is list and len(block.c) > 0:
+ for i, child in enumerate(block.c):
+ block.c[i] = prepare(child)
+ return block
+ return json.dumps(prepare(block), default=lambda o: o.__dict__) # sort_keys=True) # indent=4)
+
+def dumpAST(obj, ind=0):
+ """ Print out a block/entire AST."""
+ indChar = ("\t" * ind) + "-> " if ind else ""
+ print(indChar + "[" + obj.t + "]")
+ if not obj.title == "":
+ print("\t" + indChar + "Title: " + obj.title)
+ if not obj.info == "":
+ print("\t" + indChar + "Info: " + obj.info)
+ if not obj.destination == "":
+ print("\t" + indChar + "Destination: " + obj.destination)
+ if obj.isOpen:
+ print("\t" + indChar + "Open: " + str(obj.isOpen))
+ if obj.last_line_blank:
+ print(
+ "\t" + indChar + "Last line blank: " + str(obj.last_line_blank))
+ if obj.start_line:
+ print("\t" + indChar + "Start line: " + str(obj.start_line))
+ if obj.start_column:
+ print("\t" + indChar + "Start Column: " + str(obj.start_column))
+ if obj.end_line:
+ print("\t" + indChar + "End line: " + str(obj.end_line))
+ if not obj.string_content == "":
+ print("\t" + indChar + "String content: " + obj.string_content)
+ if not obj.info == "":
+ print("\t" + indChar + "Info: " + obj.info)
+ if len(obj.strings) > 0:
+ print("\t" + indChar + "Strings: ['" + "', '".join(obj.strings) + "'']")
+ if obj.c:
+ if type(obj.c) is list:
+ print("\t" + indChar + "c:")
+ for b in obj.c:
+ dumpAST(b, ind + 2)
+ else:
+ print("\t" + indChar + "c: "+obj.c)
+ if obj.label:
+ print("\t" + indChar + "Label:")
+ for b in obj.label:
+ dumpAST(b, ind + 2)
+ if hasattr(obj.list_data, "type"):
+ print("\t" + indChar + "List Data: ")
+ print("\t\t" + indChar + "[type] = " + obj.list_data['type'])
+ if hasattr(obj.list_data, "bullet_char"):
+ print(
+ "\t\t" + indChar + "[bullet_char] = " + obj.list_data['bullet_char'])
+ if hasattr(obj.list_data, "start"):
+ print("\t\t" + indChar + "[start] = " + obj.list_data['start'])
+ if hasattr(obj.list_data, "delimiter"):
+ print(
+ "\t\t" + indChar + "[delimiter] = " + obj.list_data['delimiter'])
+ if hasattr(obj.list_data, "padding"):
+ print(
+ "\t\t" + indChar + "[padding] = " + obj.list_data['padding'])
+ if hasattr(obj.list_data, "marker_offset"):
+ print(
+ "\t\t" + indChar + "[marker_offset] = " + obj.list_data['marker_offset'])
+ if len(obj.inline_content) > 0:
+ print("\t" + indChar + "Inline content:")
+ for b in obj.inline_content:
+ dumpAST(b, ind + 2)
+ if len(obj.children) > 0:
+ print("\t" + indChar + "Children:")
+ for b in obj.children:
+ dumpAST(b, ind + 2)
+
+def unescape(s):
+ """ Replace backslash escapes with literal characters."""
+ return re.sub(reAllEscapedChar, r"\g<1>", s)
+
+
+def isBlank(s):
+ """ Returns True if string contains only space characters."""
+ return bool(re.compile("^\s*$").match(s))
+
+
+def normalizeReference(s):
+ """ Normalize reference label: collapse internal whitespace to
+ single space, remove leading/trailing whitespace, case fold."""
+ return re.sub(r'\s+', ' ', s.strip()).upper()
+
+
+def matchAt(pattern, s, offset):
+ """ Attempt to match a regex in string s at offset offset.
+ Return index of match or None."""
+ matched = re.search(pattern, s[offset:])
+ if matched:
+ return offset + s[offset:].index(matched.group(0))
+ else:
+ return None
+
+
+def detabLine(text):
+ """ Convert tabs to spaces on each line using a 4-space tab stop."""
+ if re.match('\t', text) and text.index('\t') == -1:
+ return text
+ else:
+ def tabber(m):
+ result = " "[(m.end() - 1 - tabber.lastStop) % 4:]
+ tabber.lastStop = m.end()
+ return result
+ tabber.lastStop = 0
+ text = re.sub("\t", tabber, text)
+ return text
+
+
+class Block(object):
+
+ @staticmethod
+ def makeBlock(tag, start_line, start_column):
+ return Block(t=tag, start_line=start_line, start_column=start_column)
+
+ def __init__(self, t="", c="", destination="", label="", start_line="", start_column="", title=""):
+ self.t = t
+ self.c = c
+ self.destination = destination
+ self.label = label
+ self.isOpen = True
+ self.last_line_blank = False
+ self.start_line = start_line
+ self.start_column = start_column
+ self.end_line = start_line
+ self.children = []
+ self.parent = None
+ self.string_content = ""
+ self.strings = []
+ self.inline_content = []
+ self.list_data = {}
+ self.title = title
+ self.info = ""
+ self.tight = bool()
+
+class InlineParser(object):
+
+ """ INLINE PARSER
+
+ These are methods of an InlineParser class, defined below.
+ An InlineParser keeps track of a subject (a string to be
+ parsed) and a position in that subject.
+
+ If re matches at current position in the subject, advance
+ position in subject and return the match; otherwise return null."""
+
+ def __init__(self):
+ self.subject = ""
+ self.label_nest_level = 0
+ self.pos = 0
+ self.refmap = {}
+
+ def match(self, regexString, reCompileFlags=0):
+ """ If re matches at current position in the subject, advance
+ position in subject and return the match; otherwise return null."""
+ match = re.search(
+ regexString, self.subject[self.pos:], flags=reCompileFlags)
+ if match:
+ self.pos += match.end(0)
+ return match.group()
+ else:
+ return None
+
+ def peek(self):
+ """ Returns the character at the current subject position, or null if
+ there are no more characters."""
+ try:
+ return self.subject[self.pos]
+ except IndexError:
+ return None
+
+ def spnl(self):
+ """ Parse zero or more space characters, including at most one newline."""
+ self.match(r"^ *(?:\n *)?")
+ return 1
+
+ # All of the parsers below try to match something at the current position
+ # in the subject. If they succeed in matching anything, they
+ # push an inline element onto the 'inlines' list. They return the
+ # number of characters parsed (possibly 0).
+
+ def parseBackticks(self, inlines):
+ """ Attempt to parse backticks, adding either a backtick code span or a
+ literal sequence of backticks to the 'inlines' list."""
+ startpos = self.pos
+ ticks = self.match(r"^`+")
+ if not ticks:
+ return 0
+ afterOpenTicks = self.pos
+ foundCode = False
+ match = self.match(r"`+", re.MULTILINE)
+ while (not foundCode) and (not match is None):
+ if (match == ticks):
+ c = self.subject[afterOpenTicks:(self.pos - len(ticks))]
+ c = re.sub(r"[ \n]+", ' ', c)
+ c = c.strip()
+ inlines.append(Block(t="Code", c=c))
+ return (self.pos - startpos)
+ match = self.match(r"`+", re.MULTILINE)
+ inlines.append(Block(t="Str", c=ticks))
+ self.pos = afterOpenTicks
+ return (self.pos - startpos)
+
+ def parseEscaped(self, inlines):
+ """ Parse a backslash-escaped special character, adding either the escaped
+ character, a hard line break (if the backslash is followed by a newline),
+ or a literal backslash to the 'inlines' list."""
+ subj = self.subject
+ pos = self.pos
+ if (subj[pos] == "\\"):
+ if len(subj) > pos + 1 and (subj[pos + 1] == "\n"):
+ inlines.append(Block(t="Hardbreak"))
+ self.pos += 2
+ return 2
+ elif (reEscapable.search(subj[pos + 1:pos + 2])):
+ inlines.append(Block(t="Str", c=subj[pos + 1:pos + 2]))
+ self.pos += 2
+ return 2
+ else:
+ self.pos += 1
+ inlines.append(Block(t="Str", c="\\"))
+ return 1
+ else:
+ return 0
+
+ def parseAutoLink(self, inlines):
+ """ Attempt to parse an autolink (URL or email in pointy brackets)."""
+ m = self.match(
+ "^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>")
+ m2 = self.match(
+ "^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|a [...]
+ if m:
+ # email
+ dest = m[1:-1]
+ inlines.append(
+ Block(t="Link", label=[Block(t="Str", c=dest)], destination="mailto:" + dest))
+ return len(m)
+ elif m2:
+ # link
+ dest2 = m2[1:-1]
+ inlines.append(
+ Block(t="Link", label=[Block(t="Str", c=dest2)], destination=dest2))
+ return len(m2)
+ else:
+ return 0
+
+ def parseHtmlTag(self, inlines):
+ """ Attempt to parse a raw HTML tag."""
+ m = self.match(reHtmlTag)
+ if (m):
+ inlines.append(Block(t="Html", c=m))
+ return len(m)
+ else:
+ return 0
+
+ def scanDelims(self, c):
+ """ Scan a sequence of characters == c, and return information about
+ the number of delimiters and whether they are positioned such that
+ they can open and/or close emphasis or strong emphasis. A utility
+ function for strong/emph parsing."""
+ numdelims = 0
+ first_close_delims = 0
+ char_before = char_after = None
+ startpos = self.pos
+
+ char_before = '\n' if self.pos == 0 else self.subject[self.pos - 1]
+
+ while (self.peek() == c):
+ numdelims += 1
+ self.pos += 1
+
+ a = self.peek()
+ char_after = a if a else "\\n"
+
+ can_open = (numdelims > 0) and (
+ numdelims <= 3) and (not re.match("\s", char_after))
+ can_close = (numdelims > 0) and (
+ numdelims <= 3) and (not re.match("\s", char_before))
+
+ if (c == "_"):
+ can_open = can_open and (
+ not re.match("[a-z0-9]", char_before, re.IGNORECASE))
+ can_close = can_close and (
+ not re.match("[a-z0-9]", char_after, re.IGNORECASE))
+ self.pos = startpos
+ return {
+ "numdelims": numdelims,
+ "can_open": can_open,
+ "can_close": can_close
+ }
+
+ def parseEmphasis(self, inlines):
+ """ Attempt to parse emphasis or strong emphasis in an efficient way,
+ with no backtracking."""
+ startpos = self.pos
+ first_close = 0
+ nxt = self.peek()
+ if ((nxt == "*") or (nxt == "_")):
+ c = nxt
+ else:
+ return 0
+
+ res = self.scanDelims(c)
+ numdelims = res["numdelims"]
+ self.pos += numdelims
+ if startpos > 0:
+ inlines.append(
+ Block(t="Str", c=self.subject[self.pos - numdelims:numdelims + startpos]))
+ else:
+ inlines.append(
+ Block(t="Str", c=self.subject[self.pos - numdelims:numdelims]))
+ delimpos = len(inlines) - 1
+
+ if ((not res["can_open"]) or (numdelims == 0)):
+ return 0
+
+ first_close_delims = 0
+
+ if (numdelims == 1):
+ while (True):
+ res = self.scanDelims(c)
+ if (res["numdelims"] >= 1 and res["can_close"]):
+ self.pos += 1
+ inlines[delimpos].t = "Emph"
+ inlines[delimpos].c = inlines[delimpos + 1:]
+ if len(inlines) > 1:
+ for x in range(delimpos + 1, len(inlines)):
+ inlines.pop(len(inlines) - 1)
+ break
+ else:
+ if (self.parseInline(inlines) == 0):
+ break
+ return (self.pos - startpos)
+ elif (numdelims == 2):
+ while (True):
+ res = self.scanDelims(c)
+ if (res["numdelims"] >= 2 and res["can_close"]):
+ self.pos += 2
+ inlines[delimpos].t = "Strong"
+ inlines[delimpos].c = inlines[delimpos + 1:]
+ if len(inlines) > 1:
+ for x in range(delimpos + 1, len(inlines)):
+ inlines.pop(len(inlines) - 1)
+ break
+ else:
+ if (self.parseInline(inlines) == 0):
+ break
+ return (self.pos - startpos)
+ elif (numdelims == 3):
+ while (True):
+ res = self.scanDelims(c)
+ if (res["numdelims"] >= 1 and res["numdelims"] <= 3 and res["can_close"] and not res["numdelims"] == first_close_delims):
+ if first_close_delims == 1 and numdelims > 2:
+ res["numdelims"] = 2
+ elif first_close_delims == 2:
+ res['numdelims'] = 1
+ elif res['numdelims'] == 3:
+ res['numdelims'] = 1
+ self.pos += res['numdelims']
+
+ if first_close > 0:
+ inlines[
+ delimpos].t = "Strong" if first_close_delims == 1 else "Emph"
+ temp = "Emph" if first_close_delims == 1 else "Strong"
+ inlines[delimpos].c = [Block(t=temp, c=inlines[delimpos + 1:first_close])] + inlines[
+ first_close + 1:] # error on 362?
+ if len(inlines) > 1:
+ for x in range(delimpos + 1, len(inlines)):
+ inlines.pop(len(inlines) - 1)
+ break
+ else:
+ inlines.append(
+ Block(t="Str", c=self.subject[self.pos - res["numdelims"]:self.pos]))
+ first_close = len(inlines) - 1
+ first_close_delims = res["numdelims"]
+ else:
+ if self.parseInline(inlines) == 0:
+ break
+ return (self.pos - startpos)
+ else:
+ return res
+
+ return 0
+
+ def parseLinkTitle(self):
+ """ Attempt to parse link title (sans quotes), returning the string
+ or null if no match."""
+ title = self.match(reLinkTitle)
+ if title:
+ return unescape(title[1:len(title)-1])
+ else:
+ return None
+
+ def parseLinkDestination(self):
+ """ Attempt to parse link destination, returning the string or
+ null if no match."""
+ res = self.match(reLinkDestinationBraces)
+ if not res is None:
+ return unescape(res[1:len(res) - 1])
+ else:
+ res2 = self.match(reLinkDestination)
+ if not res2 is None:
+ return unescape(res2)
+ else:
+ return None
+
+ def parseLinkLabel(self):
+ """ Attempt to parse a link label, returning number of characters parsed."""
+ if not self.peek() == "[":
+ return 0
+ startpos = self.pos
+ nest_level = 0
+ if self.label_nest_level > 0:
+ self.label_nest_level -= 1
+ return 0
+ self.pos += 1
+ c = self.peek()
+ while ((not c == "]") or (nest_level > 0)) and not c is None:
+ if c == "`":
+ self.parseBackticks([])
+ elif c == "<":
+ self.parseAutoLink([]) or self.parseHtmlTag(
+ []) or self.parseString([])
+ elif c == "[":
+ nest_level += 1
+ self.pos += 1
+ elif c == "]":
+ nest_level -= 1
+ self.pos += 1
+ elif c == "\\":
+ self.parseEscaped([])
+ else:
+ self.parseString([])
+ c = self.peek()
+ if c == "]":
+ self.label_nest_level = 0
+ self.pos += 1
+ return self.pos - startpos
+ else:
+ if c is None:
+ self.label_nest_level = nest_level
+ self.pos = startpos
+ return 0
+
+ def parseRawLabel(self, s):
+ """ Parse raw link label, including surrounding [], and return
+ inline contents. (Note: this is not a method of InlineParser.)"""
+ return InlineParser().parse(s[1:-1])
+
+ def parseLink(self, inlines):
+ """ Attempt to parse a link. If successful, add the link to
+ inlines."""
+ startpos = self.pos
+ n = self.parseLinkLabel()
+
+ if n == 0:
+ return 0
+
+ afterlabel = self.pos
+ rawlabel = self.subject[startpos:n+startpos]
+
+ if self.peek() == "(":
+ self.pos += 1
+ if self.spnl():
+ dest = self.parseLinkDestination()
+ if not dest is None and self.spnl():
+ if re.match(r"^\s", self.subject[self.pos - 1]):
+ title = self.parseLinkTitle()
+ else:
+ title = ""
+ if self.spnl() and self.match(r"^\)"):
+ inlines.append(
+ Block(t="Link", destination=dest, title=title, label=self.parseRawLabel(rawlabel)))
+ return self.pos - startpos
+ else:
+ self.pos = startpos
+ return 0
+ else:
+ self.pos = startpos
+ return 0
+ else:
+ self.pos = startpos
+ return 0
+
+ savepos = self.pos
+ self.spnl()
+ beforelabel = self.pos
+ n = self.parseLinkLabel()
+ if n == 2:
+ reflabel = rawlabel
+ elif n > 0:
+ reflabel = self.subject[beforelabel:beforelabel + n]
+ else:
+ self.pos = savepos
+ reflabel = rawlabel
+ if normalizeReference(reflabel) in self.refmap:
+ link = self.refmap[normalizeReference(reflabel)]
+ else:
+ link = None
+ if link:
+ if link.get("title", None):
+ title = link['title']
+ else:
+ title = ""
+ if link.get("destination", None):
+ destination = link['destination']
+ else:
+ destination = ""
+ inlines.append(
+ Block(t="Link", destination=destination, title=title, label=self.parseRawLabel(rawlabel)))
+ return self.pos - startpos
+ else:
+ self.pos = startpos
+ return 0
+ self.pos = startpos
+ return 0
+
+ def parseEntity(self, inlines):
+ """ Attempt to parse an entity, adding to inlines if successful."""
+ m = self.match(
+ r"^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});", re.IGNORECASE)
+ if m:
+ inlines.append(Block(t="Entity", c=m))
+ return len(m)
+ else:
+ return 0
+
+ def parseString(self, inlines):
+ """ Parse a run of ordinary characters, or a single character with
+ a special meaning in markdown, as a plain string, adding to inlines."""
+ m = self.match(reMain, re.MULTILINE)
+ if m:
+ inlines.append(Block(t="Str", c=m))
+ return len(m)
+ else:
+ return 0
+
+ def parseNewline(self, inlines):
+ """ Parse a newline. If it was preceded by two spaces, return a hard
+ line break; otherwise a soft line break."""
+ if (self.peek() == '\n'):
+ self.pos += 1
+ last = inlines[len(inlines) - 1]
+ if last and last.t == "Str" and last.c[-2:] == " ":
+ last.c = re.sub(r' *$', '', last.c)
+ inlines.append(Block(t="Hardbreak"))
+ else:
+ if last and last.t == "Str" and last.c[-1:] == " ":
+ last.c = last.c[0:-1]
+ inlines.append(Block(t="Softbreak"))
+ return 1
+ else:
+ return 0
+
+ def parseImage(self, inlines):
+ """ Attempt to parse an image. If the opening '!' is not followed
+ by a link, add a literal '!' to inlines."""
+ if (self.match("^!")):
+ n = self.parseLink(inlines)
+ if (n == 0):
+ inlines.append(Block(t="Str", c="!"))
+ return 1
+ elif (inlines[len(inlines) - 1] and
+ (inlines[len(inlines) - 1].t == "Link")):
+ inlines[len(inlines) - 1].t = "Image"
+ return n + 1
+ else:
+ raise Exception("Shouldn't happen")
+ else:
+ return 0
+
+ def parseReference(self, s, refmap):
+ """ Attempt to parse a link reference, modifying refmap."""
+ self.subject = s
+ self.pos = 0
+ startpos = self.pos
+
+ matchChars = self.parseLinkLabel()
+ if (matchChars == 0):
+ return 0
+ else:
+ rawlabel = self.subject[:matchChars]
+
+ test = self.peek()
+ if (test == ":"):
+ self.pos += 1
+ else:
+ self.pos = startpos
+ return 0
+ self.spnl()
+
+ dest = self.parseLinkDestination()
+ if (dest is None or len(dest) == 0):
+ self.pos = startpos
+ return 0
+
+ beforetitle = self.pos
+ self.spnl()
+ title = self.parseLinkTitle()
+ if (title is None):
+ title = ""
+ self.pos = beforetitle
+
+ if (self.match(r"^ *(?:\n|$)") is None):
+ self.pos = startpos
+ return 0
+
+ normlabel = normalizeReference(rawlabel)
+ if (not refmap.get(normlabel, None)):
+ refmap[normlabel] = {
+ "destination": dest,
+ "title": title
+ }
+ return (self.pos - startpos)
+
+ def parseInline(self, inlines):
+ """ Parse the next inline element in subject, advancing subject position
+ and adding the result to 'inlines'."""
+ c = self.peek()
+ res = None
+ if (c == '\n'):
+ res = self.parseNewline(inlines)
+ elif (c == "\\"):
+ res = self.parseEscaped(inlines)
+ elif (c == "`"):
+ res = self.parseBackticks(inlines)
+ elif ((c == "*") or (c == "_")):
+ res = self.parseEmphasis(inlines)
+ elif (c == "["):
+ res = self.parseLink(inlines)
+ elif (c == "!"):
+ res = self.parseImage(inlines)
+ elif (c == "<"):
+ res = self.parseAutoLink(inlines) or self.parseHtmlTag(inlines)
+ elif (c == "&"):
+ res = self.parseEntity(inlines)
+ return res or self.parseString(inlines)
+
+ def parseInlines(self, s, refmap={}):
+ """ Parse s as a list of inlines, using refmap to resolve references."""
+ self.subject = s
+ self.pos = 0
+ self.refmap = refmap
+ inlines = []
+ while (self.parseInline(inlines)):
+ pass
+ return inlines
+
+ def parse(self, s, refmap={}):
+ """ Pass through to parseInlines."""
+ return self.parseInlines(s, refmap)
+
+
+class DocParser:
+
+ def __init__(self, subject=None, pos=0):
+ self.doc = Block.makeBlock("Document", 1, 1)
+ self.subject = subject
+ self.pos = pos
+ self.tip = self.doc
+ self.refmap = {}
+ self.inlineParser = InlineParser()
+
+ def acceptsLines(self, block_type):
+ """ Returns true if block type can accept lines of text."""
+ return block_type == "Paragraph" or block_type == "IndentedCode" or block_type == "FencedCode"
+
+ def endsWithBlankLine(self, block):
+ """ Returns true if block ends with a blank line, descending if needed
+ into lists and sublists."""
+ if block.last_line_blank:
+ return True
+ if (block.t == "List" or block.t == "ListItem") and len(block.children) > 0:
+ return self.endsWithBlankLine(block.children[len(block.children) - 1])
+ else:
+ return False
+
+ def breakOutOfLists(self, block, line_number):
+ """ Break out of all containing lists, resetting the tip of the
+ document to the parent of the highest list, and finalizing
+ all the lists. (This is used to implement the "two blank lines
+ break of of all lists" feature.)"""
+ b = block
+ last_list = None
+ while True:
+ if (b.t == "List"):
+ last_list = b
+ b = b.parent
+ if not b:
+ break
+
+ if (last_list):
+ while (not block == last_list):
+ self.finalize(block, line_number)
+ block = block.parent
+ self.finalize(last_list, line_number)
+ self.tip = last_list.parent
+
+ def addLine(self, ln, offset):
+ """ Add a line to the block at the tip. We assume the tip
+ can accept lines -- that check should be done before calling this."""
+ s = ln[offset:]
+ if not self.tip.isOpen:
+ raise Exception(
+ "Attempted to add line (" + ln + ") to closed container.")
+ self.tip.strings.append(s)
+
+ def addChild(self, tag, line_number, offset):
+ """ Add block of type tag as a child of the tip. If the tip can't
+ accept children, close and finalize it and try its parent,
+ and so on til we find a block that can accept children."""
+ while not (self.tip.t == "Document" or self.tip.t == "BlockQuote" or self.tip.t == "ListItem" or (self.tip.t == "List" and tag == "ListItem")):
+ self.finalize(self.tip, line_number)
+ column_number = offset + 1
+ newBlock = Block.makeBlock(tag, line_number, column_number)
+ self.tip.children.append(newBlock)
+ newBlock.parent = self.tip
+ self.tip = newBlock
+ return newBlock
+
+ def listsMatch(self, list_data, item_data):
+ """ Returns true if the two list items are of the same type,
+ with the same delimiter and bullet character. This is used
+ in agglomerating list items into lists."""
+ return (list_data.get("type", None) == item_data.get("type", None) and
+ list_data.get("delimiter", None) == item_data.get("delimiter", None) and
+ list_data.get("bullet_char", None) == item_data.get("bullet_char", None))
+
+ def parseListMarker(self, ln, offset):
+ """ Parse a list marker and return data on the marker (type,
+ start, delimiter, bullet character, padding) or null."""
+ rest = ln[offset:]
+ data = {}
+ blank_item = bool()
+ if re.match(reHrule, rest):
+ return None
+ match = re.search(r'^[*+-]( +|$)', rest)
+ match2 = re.search(r'^(\d+)([.)])( +|$)', rest)
+ if match:
+ spaces_after_marker = len(match.group(1))
+ data['type'] = 'Bullet'
+ data['bullet_char'] = match.group(0)[0]
+ blank_item = match.group(0) == len(rest)
+ elif match2:
+ spaces_after_marker = len(match2.group(3))
+ data['type'] = 'Ordered'
+ data['start'] = int(match2.group(1))
+ data['delimiter'] = match2.group(2)
+ blank_item = match2.group(0) == len(rest)
+ else:
+ return None
+ if spaces_after_marker >= 5 or spaces_after_marker < 1 or blank_item:
+ if match:
+ data['padding'] = len(match.group(0)) - spaces_after_marker + 1
+ elif match2:
+ data['padding'] = len(
+ match2.group(0)) - spaces_after_marker + 1
+ else:
+ if match:
+ data['padding'] = len(match.group(0))
+ elif match2:
+ data['padding'] = len(match2.group(0))
+ return data
+
+ def incorporateLine(self, ln, line_number):
+ """ Analyze a line of text and update the document appropriately.
+ We parse markdown text by calling this on each line of input,
+ then finalizing the document."""
+ all_matched = True
+ offset = 0
+ CODE_INDENT = 4
+ blank = None
+ already_done = False
+
+ container = self.doc
+ oldtip = self.tip
+
+ ln = detabLine(ln)
+
+ while len(container.children) > 0:
+ last_child = container.children[-1]
+ if not last_child.isOpen:
+ break
+ container = last_child
+
+ match = matchAt(r"[^ ]", ln, offset)
+ if match is None:
+ first_nonspace = len(ln)
+ blank = True
+ else:
+ first_nonspace = match
+ blank = False
+ indent = first_nonspace - offset
+ if container.t == "BlockQuote":
+ matched = bool()
+ if len(ln) > first_nonspace and len(ln) > 0:
+ matched = ln[first_nonspace] == ">"
+ matched = indent <= 3 and matched
+ if matched:
+ offset = first_nonspace + 1
+ try:
+ if ln[offset] == " ":
+ offset += 1
+ except IndexError:
+ pass
+ else:
+ all_matched = False
+ elif container.t == "ListItem":
+ if (indent >= container.list_data['marker_offset'] +
+ container.list_data['padding']):
+ offset += container.list_data[
+ 'marker_offset'] + container.list_data['padding']
+ elif blank:
+ offset = first_nonspace
+ else:
+ all_matched = False
+ elif container.t == "IndentedCode":
+ if indent >= CODE_INDENT:
+ offset += CODE_INDENT
+ elif blank:
+ offset = first_nonspace
+ else:
+ all_matched = False
+ elif container.t in ["ATXHeader", "SetextHeader", "HorizontalRule"]:
+ all_matched = False
+ elif container.t == "FencedCode":
+ i = container.fence_offset
+ while i > 0 and len(ln) > offset and ln[offset] == " ":
+ offset += 1
+ i -= 1
+ elif container.t == "HtmlBlock":
+ if blank:
+ all_matched = False
+ elif container.t == "Paragraph":
+ if blank:
+ container.last_line_blank = True
+ all_matched = False
+ if not all_matched:
+ container = container.parent
+ break
+ last_matched_container = container
+
+ def closeUnmatchedBlocks(self, already_done, oldtip):
+ """ This function is used to finalize and close any unmatched
+ blocks. We aren't ready to do this now, because we might
+ have a lazy paragraph continuation, in which case we don't
+ want to close unmatched blocks. So we store this closure for
+ use later, when we have more information."""
+ while not already_done and not oldtip == last_matched_container:
+ self.finalize(oldtip, line_number)
+ oldtip = oldtip.parent
+ return True, oldtip
+
+ if blank and container.last_line_blank:
+ self.breakOutOfLists(container, line_number)
+ while not container.t == "FencedCode" and not container.t == "IndentedCode" and not container.t == "HtmlBlock" and not matchAt(r"^[ #`~*+_=<>0-9-]", ln, offset) is None:
+ match = matchAt("[^ ]", ln, offset)
+ if match is None:
+ first_nonspace = len(ln)
+ blank = True
+ else:
+ first_nonspace = match
+ blank = False
+ ATXmatch = re.search(r"^#{1,6}(?: +|$)", ln[first_nonspace:])
+ FENmatch = re.search(
... 16100 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/commonmark-bkrs.git
More information about the Python-modules-commits
mailing list