[Python-modules-commits] [mistune] 01/04: Import mistune_0.7.2.orig.tar.gz

Wed Apr 6 20:07:26 UTC 2016

This is an automated email from the git hooks/post-receive script.

mithrandi pushed a commit to branch master
in repository mistune.

commit 5eb916e36f8d8a0310a6ba330a11300cc0098170
Author: Tristan Seligmann <mithrandi at debian.org>
Date:   Wed Apr 6 22:05:04 2016 +0200

    Import mistune_0.7.2.orig.tar.gz
---
 CHANGES.rst                                        | 12 ++++++
 PKG-INFO                                           |  6 +--
 README.rst                                         |  4 +-
 mistune.egg-info/PKG-INFO                          |  6 +--
 mistune.py                                         | 49 ++++++++++++++--------
 .../fixtures/normal/amps_and_angles_encoding.html  |  6 +--
 tests/test_cases.py                                |  7 +---
 tests/test_extra.py                                | 32 ++++++++++++--
 8 files changed, 83 insertions(+), 39 deletions(-)

diff --git a/CHANGES.rst b/CHANGES.rst
index 6621357..8dbadb4 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -3,6 +3,18 @@ Changelog
 
 Here is the full history of mistune.
 
+Version 0.7.2
+~~~~~~~~~~~~~
+
+* Fix `hard_wrap` options on renderer.
+* Fix emphasis regex pattern
+* Fix base64 image link `#80`_.
+* Fix link security per `#87`_.
+
+.. _`#80`: https://github.com/lepture/mistune/issues/80
+.. _`#87`: https://github.com/lepture/mistune/issues/87
+
+
 Version 0.7.1
 ~~~~~~~~~~~~~
 
diff --git a/PKG-INFO b/PKG-INFO
index 21bf5bc..3ded548 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: mistune
-Version: 0.7.1
+Version: 0.7.2
 Summary: The fastest markdown parser in pure Python
 Home-page: https://github.com/lepture/mistune
 Author: Hsiaoming Yang
@@ -119,7 +119,7 @@ Description: Mistune
             import mistune
             from pygments import highlight
             from pygments.lexers import get_lexer_by_name
-            from pygments.formatters import HtmlFormatter
+            from pygments.formatters import html
         
             class HighlightRenderer(mistune.Renderer):
                 def block_code(self, code, lang):
@@ -127,7 +127,7 @@ Description: Mistune
                         return '\n<pre><code>%s</code></pre>\n' % \
                             mistune.escape(code)
                     lexer = get_lexer_by_name(lang, stripall=True)
-                    formatter = HtmlFormatter()
+                    formatter = html.HtmlFormatter()
                     return highlight(code, lexer, formatter)
         
             renderer = HighlightRenderer()
diff --git a/README.rst b/README.rst
index 04664bc..d4017d4 100644
--- a/README.rst
+++ b/README.rst
@@ -111,7 +111,7 @@ Here is an example of code highlighting:
     import mistune
     from pygments import highlight
     from pygments.lexers import get_lexer_by_name
-    from pygments.formatters import HtmlFormatter
+    from pygments.formatters import html
 
     class HighlightRenderer(mistune.Renderer):
         def block_code(self, code, lang):
@@ -119,7 +119,7 @@ Here is an example of code highlighting:
                 return '\n<pre><code>%s</code></pre>\n' % \
                     mistune.escape(code)
             lexer = get_lexer_by_name(lang, stripall=True)
-            formatter = HtmlFormatter()
+            formatter = html.HtmlFormatter()
             return highlight(code, lexer, formatter)
 
     renderer = HighlightRenderer()
diff --git a/mistune.egg-info/PKG-INFO b/mistune.egg-info/PKG-INFO
index 21bf5bc..3ded548 100644
--- a/mistune.egg-info/PKG-INFO
+++ b/mistune.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: mistune
-Version: 0.7.1
+Version: 0.7.2
 Summary: The fastest markdown parser in pure Python
 Home-page: https://github.com/lepture/mistune
 Author: Hsiaoming Yang
@@ -119,7 +119,7 @@ Description: Mistune
             import mistune
             from pygments import highlight
             from pygments.lexers import get_lexer_by_name
-            from pygments.formatters import HtmlFormatter
+            from pygments.formatters import html
         
             class HighlightRenderer(mistune.Renderer):
                 def block_code(self, code, lang):
@@ -127,7 +127,7 @@ Description: Mistune
                         return '\n<pre><code>%s</code></pre>\n' % \
                             mistune.escape(code)
                     lexer = get_lexer_by_name(lang, stripall=True)
-                    formatter = HtmlFormatter()
+                    formatter = html.HtmlFormatter()
                     return highlight(code, lexer, formatter)
         
             renderer = HighlightRenderer()
diff --git a/mistune.py b/mistune.py
index a8e177d..1e5f963 100644
--- a/mistune.py
+++ b/mistune.py
@@ -11,7 +11,7 @@
 import re
 import inspect
 
-__version__ = '0.7.1'
+__version__ = '0.7.2'
 __author__ = 'Hsiaoming Yang <me at lepture.com>'
 __all__ = [
     'BlockGrammar', 'BlockLexer',
@@ -22,10 +22,11 @@ __all__ = [
 
 
 _key_pattern = re.compile(r'\s+')
+_nonalpha_pattern = re.compile(r'\W')
 _escape_pattern = re.compile(r'&(?!#?\w+;)')
 _newline_pattern = re.compile(r'\r\n|\r')
 _block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
-_block_code_leadning_pattern = re.compile(r'^ {4}', re.M)
+_block_code_leading_pattern = re.compile(r'^ {4}', re.M)
 _inline_tags = [
     'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
     'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
@@ -36,6 +37,7 @@ _pre_tags = ['pre', 'script', 'style']
 _valid_end = r'(?!:/|[^\w\s@]*@)\b'
 _valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
 _block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
+_scheme_blacklist = ('javascript', 'data', 'vbscript')
 
 
 def _pure_pattern(regex):
@@ -70,6 +72,19 @@ def escape(text, quote=False, smart_amp=True):
     return text
 
 
+def escape_link(url, **kwargs):
+    """Remove dangerous URL schemes like javascript: and escape afterwards."""
+    if ':' in url:
+        scheme, _ = url.split(':', 1)
+        scheme = _nonalpha_pattern.sub('', scheme)
+        # whitelist would be better but mistune's use case is too general
+        if scheme.lower() in _scheme_blacklist:
+            return ''
+    # escape &entities; to &entities;
+    kwargs['smart_amp'] = False
+    return escape(url, **kwargs)
+
+
 def preprocessing(text, tab=4):
     text = _newline_pattern.sub('\n', text)
     text = text.replace('\t', ' ' * tab)
@@ -224,7 +239,7 @@ class BlockLexer(object):
 
     def parse_block_code(self, m):
         # clean leading whitespace
-        code = _block_code_leadning_pattern.sub('', m.group(0))
+        code = _block_code_leading_pattern.sub('', m.group(0))
         self.tokens.append({
             'type': 'code',
             'lang': None,
@@ -285,7 +300,7 @@ class BlockLexer(object):
                 pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
                 item = pattern.sub('', item)
 
-            # determin whether item is loose or not
+            # determine whether item is loose or not
             loose = _next
             if not loose and re.search(r'\n\n(?!\s*$)', item):
                 loose = True
@@ -459,9 +474,9 @@ class InlineGrammar(object):
         r'^\*{2}([\s\S]+?)\*{2}(?!\*)'  # **word**
     )
     emphasis = re.compile(
-        r'^\b_((?:__|[\s\S])+?)_\b'  # _word_
+        r'^\b_((?:__|[^_])+?)_\b'  # _word_
         r'|'
-        r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)'  # *word*
+        r'^\*((?:\*\*|[^\*])+?)\*(?!\*)'  # *word*
     )
     code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)')  # `code`
     linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
@@ -504,12 +519,14 @@ class InlineLexer(object):
         if not rules:
             rules = self.grammar_class()
 
+        kwargs.update(self.renderer.options)
+        if kwargs.get('hard_wrap'):
+            rules.hard_wrap()
+
         self.rules = rules
 
         self._in_link = False
         self._in_footnote = False
-
-        kwargs.update(self.renderer.options)
         self._parse_inline_html = kwargs.get('parse_inline_html')
 
     def __call__(self, text, rules=None):
@@ -836,8 +853,7 @@ class Renderer(object):
         :param title: title content for `title` attribute.
         :param text: text content for description.
         """
-        if link.startswith('javascript:'):
-            link = ''
+        link = escape_link(link, quote=True)
         if not title:
             return '<a href="%s">%s</a>' % (link, text)
         title = escape(title, quote=True)
@@ -850,8 +866,7 @@ class Renderer(object):
         :param title: title text of the image.
         :param text: alt text of the image.
         """
-        if src.startswith('javascript:'):
-            src = ''
+        src = escape_link(src, quote=True)
         text = escape(text, quote=True)
         if title:
             title = escape(title, quote=True)
@@ -923,6 +938,8 @@ class Markdown(object):
     def __init__(self, renderer=None, inline=None, block=None, **kwargs):
         if not renderer:
             renderer = Renderer(**kwargs)
+        else:
+            kwargs.update(renderer.options)
 
         self.renderer = renderer
 
@@ -934,13 +951,9 @@ class Markdown(object):
         if inline:
             self.inline = inline
         else:
-            rules = InlineGrammar()
-            if kwargs.get('hard_wrap'):
-                rules.hard_wrap()
-            self.inline = InlineLexer(renderer, rules=rules)
+            self.inline = InlineLexer(renderer, **kwargs)
 
         self.block = block or BlockLexer(BlockGrammar())
-        self.options = kwargs
         self.footnotes = []
         self.tokens = []
 
@@ -1134,7 +1147,7 @@ def markdown(text, escape=True, **kwargs):
     :param text: markdown formatted text content.
     :param escape: if set to False, all html tags will not be escaped.
     :param use_xhtml: output with xhtml tags.
-    :param hard_wrap: if set to True, it will has GFM line breaks feature.
+    :param hard_wrap: if set to True, it will use the GFM line breaks feature.
     :param parse_block_html: parse text only in block level html.
     :param parse_inline_html: parse text only in inline level html.
     """
diff --git a/tests/fixtures/normal/amps_and_angles_encoding.html b/tests/fixtures/normal/amps_and_angles_encoding.html
index 138f4d5..483f8ff 100644
--- a/tests/fixtures/normal/amps_and_angles_encoding.html
+++ b/tests/fixtures/normal/amps_and_angles_encoding.html
@@ -8,10 +8,10 @@
 
 <p>6 > 5.</p>
 
-<p>Here's a <a href="http://example.com/?foo=1&bar=2">link</a> with an ampersand in the URL.</p>
+<p>Here's a <a href="http://example.com/?foo=1&bar=2">link</a> with an ampersand in the URL.</p>
 
 <p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p>
 
-<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
 
-<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
diff --git a/tests/test_cases.py b/tests/test_cases.py
index 6daa32a..7e68d2e 100644
--- a/tests/test_cases.py
+++ b/tests/test_cases.py
@@ -5,12 +5,7 @@ import mistune
 root = os.path.dirname(__file__)
 
 known = []
-
-rules = [
-    'table', 'fenced_code', 'footnotes',
-    'autolink', 'strikethrough',
-]
-m = mistune.Markdown(rules=rules)
+m = mistune.Markdown()
 
 
 def render(folder, name):
diff --git a/tests/test_extra.py b/tests/test_extra.py
index 76cf716..7318444 100644
--- a/tests/test_extra.py
+++ b/tests/test_extra.py
@@ -18,10 +18,27 @@ def test_linebreak():
 
 
 def test_safe_links():
-    ret = mistune.markdown('javascript ![foo](<javascript:alert>) alert')
-    assert 'src=""' in ret
-    ret = mistune.markdown('javascript [foo](<javascript:alert>) alert')
-    assert 'href=""' in ret
+    attack_vectors = (
+        # "standard" javascript pseudo protocol
+        ('javascript:alert`1`', ''),
+        # bypass attempt
+        ('jAvAsCrIpT:alert`1`', ''),
+        # javascript pseudo protocol with entities
+        ('javascript:alert`1`', 'javascript&colon;alert`1`'),
+        # javascript pseudo protocol with prefix (dangerous in Chrome)
+        ('\x1Ajavascript:alert`1`', ''),
+        # data-URI (dangerous in Firefox)
+        ('data:text/html,<script>alert`1`</script>', ''),
+        # vbscript-URI (dangerous in Internet Explorer)
+        ('vbscript:msgbox', ''),
+        # breaking out of the attribute
+        ('"<>', '"<>'),
+    )
+    for vector, expected in attack_vectors:
+        # image
+        assert 'src="%s"' % expected in mistune.markdown('![atk](%s)' % vector)
+        # link
+        assert 'href="%s"' % expected in mistune.markdown('[atk](%s)' % vector)
 
 
 def test_skip_style():
@@ -89,3 +106,10 @@ def test_not_escape_block_tags():
 def test_not_escape_inline_tags():
     text = '<a name="top"></a>'
     assert text in mistune.markdown(text, escape=False)
+
+
+def test_hard_wrap_renderer():
+    text = 'foo\nnewline'
+    renderer = mistune.Renderer(hard_wrap=True)
+    func = mistune.Markdown(renderer=renderer)
+    assert '<br>' in func(text)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/mistune.git