[Python-modules-commits] [mistune] 01/04: Import mistune_0.7.2.orig.tar.gz
Tristan Seligmann
mithrandi at moszumanska.debian.org
Wed Apr 6 20:07:26 UTC 2016
This is an automated email from the git hooks/post-receive script.
mithrandi pushed a commit to branch master
in repository mistune.
commit 5eb916e36f8d8a0310a6ba330a11300cc0098170
Author: Tristan Seligmann <mithrandi at debian.org>
Date: Wed Apr 6 22:05:04 2016 +0200
Import mistune_0.7.2.orig.tar.gz
---
CHANGES.rst | 12 ++++++
PKG-INFO | 6 +--
README.rst | 4 +-
mistune.egg-info/PKG-INFO | 6 +--
mistune.py | 49 ++++++++++++++--------
.../fixtures/normal/amps_and_angles_encoding.html | 6 +--
tests/test_cases.py | 7 +---
tests/test_extra.py | 32 ++++++++++++--
8 files changed, 83 insertions(+), 39 deletions(-)
diff --git a/CHANGES.rst b/CHANGES.rst
index 6621357..8dbadb4 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -3,6 +3,18 @@ Changelog
Here is the full history of mistune.
+Version 0.7.2
+~~~~~~~~~~~~~
+
+* Fix `hard_wrap` options on renderer.
+* Fix emphasis regex pattern
+* Fix base64 image link `#80`_.
+* Fix link security per `#87`_.
+
+.. _`#80`: https://github.com/lepture/mistune/issues/80
+.. _`#87`: https://github.com/lepture/mistune/issues/87
+
+
Version 0.7.1
~~~~~~~~~~~~~
diff --git a/PKG-INFO b/PKG-INFO
index 21bf5bc..3ded548 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: mistune
-Version: 0.7.1
+Version: 0.7.2
Summary: The fastest markdown parser in pure Python
Home-page: https://github.com/lepture/mistune
Author: Hsiaoming Yang
@@ -119,7 +119,7 @@ Description: Mistune
import mistune
from pygments import highlight
from pygments.lexers import get_lexer_by_name
- from pygments.formatters import HtmlFormatter
+ from pygments.formatters import html
class HighlightRenderer(mistune.Renderer):
def block_code(self, code, lang):
@@ -127,7 +127,7 @@ Description: Mistune
return '\n<pre><code>%s</code></pre>\n' % \
mistune.escape(code)
lexer = get_lexer_by_name(lang, stripall=True)
- formatter = HtmlFormatter()
+ formatter = html.HtmlFormatter()
return highlight(code, lexer, formatter)
renderer = HighlightRenderer()
diff --git a/README.rst b/README.rst
index 04664bc..d4017d4 100644
--- a/README.rst
+++ b/README.rst
@@ -111,7 +111,7 @@ Here is an example of code highlighting:
import mistune
from pygments import highlight
from pygments.lexers import get_lexer_by_name
- from pygments.formatters import HtmlFormatter
+ from pygments.formatters import html
class HighlightRenderer(mistune.Renderer):
def block_code(self, code, lang):
@@ -119,7 +119,7 @@ Here is an example of code highlighting:
return '\n<pre><code>%s</code></pre>\n' % \
mistune.escape(code)
lexer = get_lexer_by_name(lang, stripall=True)
- formatter = HtmlFormatter()
+ formatter = html.HtmlFormatter()
return highlight(code, lexer, formatter)
renderer = HighlightRenderer()
diff --git a/mistune.egg-info/PKG-INFO b/mistune.egg-info/PKG-INFO
index 21bf5bc..3ded548 100644
--- a/mistune.egg-info/PKG-INFO
+++ b/mistune.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: mistune
-Version: 0.7.1
+Version: 0.7.2
Summary: The fastest markdown parser in pure Python
Home-page: https://github.com/lepture/mistune
Author: Hsiaoming Yang
@@ -119,7 +119,7 @@ Description: Mistune
import mistune
from pygments import highlight
from pygments.lexers import get_lexer_by_name
- from pygments.formatters import HtmlFormatter
+ from pygments.formatters import html
class HighlightRenderer(mistune.Renderer):
def block_code(self, code, lang):
@@ -127,7 +127,7 @@ Description: Mistune
return '\n<pre><code>%s</code></pre>\n' % \
mistune.escape(code)
lexer = get_lexer_by_name(lang, stripall=True)
- formatter = HtmlFormatter()
+ formatter = html.HtmlFormatter()
return highlight(code, lexer, formatter)
renderer = HighlightRenderer()
diff --git a/mistune.py b/mistune.py
index a8e177d..1e5f963 100644
--- a/mistune.py
+++ b/mistune.py
@@ -11,7 +11,7 @@
import re
import inspect
-__version__ = '0.7.1'
+__version__ = '0.7.2'
__author__ = 'Hsiaoming Yang <me at lepture.com>'
__all__ = [
'BlockGrammar', 'BlockLexer',
@@ -22,10 +22,11 @@ __all__ = [
_key_pattern = re.compile(r'\s+')
+_nonalpha_pattern = re.compile(r'\W')
_escape_pattern = re.compile(r'&(?!#?\w+;)')
_newline_pattern = re.compile(r'\r\n|\r')
_block_quote_leading_pattern = re.compile(r'^ *> ?', flags=re.M)
-_block_code_leadning_pattern = re.compile(r'^ {4}', re.M)
+_block_code_leading_pattern = re.compile(r'^ {4}', re.M)
_inline_tags = [
'a', 'em', 'strong', 'small', 's', 'cite', 'q', 'dfn', 'abbr', 'data',
'time', 'code', 'var', 'samp', 'kbd', 'sub', 'sup', 'i', 'b', 'u', 'mark',
@@ -36,6 +37,7 @@ _pre_tags = ['pre', 'script', 'style']
_valid_end = r'(?!:/|[^\w\s@]*@)\b'
_valid_attr = r'''"[^"]*"|'[^']*'|[^'">]'''
_block_tag = r'(?!(?:%s)\b)\w+%s' % ('|'.join(_inline_tags), _valid_end)
+_scheme_blacklist = ('javascript', 'data', 'vbscript')
def _pure_pattern(regex):
@@ -70,6 +72,19 @@ def escape(text, quote=False, smart_amp=True):
return text
+def escape_link(url, **kwargs):
+ """Remove dangerous URL schemes like javascript: and escape afterwards."""
+ if ':' in url:
+ scheme, _ = url.split(':', 1)
+ scheme = _nonalpha_pattern.sub('', scheme)
+ # whitelist would be better but mistune's use case is too general
+ if scheme.lower() in _scheme_blacklist:
+ return ''
+ # escape &entities; to &entities;
+ kwargs['smart_amp'] = False
+ return escape(url, **kwargs)
+
+
def preprocessing(text, tab=4):
text = _newline_pattern.sub('\n', text)
text = text.replace('\t', ' ' * tab)
@@ -224,7 +239,7 @@ class BlockLexer(object):
def parse_block_code(self, m):
# clean leading whitespace
- code = _block_code_leadning_pattern.sub('', m.group(0))
+ code = _block_code_leading_pattern.sub('', m.group(0))
self.tokens.append({
'type': 'code',
'lang': None,
@@ -285,7 +300,7 @@ class BlockLexer(object):
pattern = re.compile(r'^ {1,%d}' % space, flags=re.M)
item = pattern.sub('', item)
- # determin whether item is loose or not
+ # determine whether item is loose or not
loose = _next
if not loose and re.search(r'\n\n(?!\s*$)', item):
loose = True
@@ -459,9 +474,9 @@ class InlineGrammar(object):
r'^\*{2}([\s\S]+?)\*{2}(?!\*)' # **word**
)
emphasis = re.compile(
- r'^\b_((?:__|[\s\S])+?)_\b' # _word_
+ r'^\b_((?:__|[^_])+?)_\b' # _word_
r'|'
- r'^\*((?:\*\*|[\s\S])+?)\*(?!\*)' # *word*
+ r'^\*((?:\*\*|[^\*])+?)\*(?!\*)' # *word*
)
code = re.compile(r'^(`+)\s*([\s\S]*?[^`])\s*\1(?!`)') # `code`
linebreak = re.compile(r'^ {2,}\n(?!\s*$)')
@@ -504,12 +519,14 @@ class InlineLexer(object):
if not rules:
rules = self.grammar_class()
+ kwargs.update(self.renderer.options)
+ if kwargs.get('hard_wrap'):
+ rules.hard_wrap()
+
self.rules = rules
self._in_link = False
self._in_footnote = False
-
- kwargs.update(self.renderer.options)
self._parse_inline_html = kwargs.get('parse_inline_html')
def __call__(self, text, rules=None):
@@ -836,8 +853,7 @@ class Renderer(object):
:param title: title content for `title` attribute.
:param text: text content for description.
"""
- if link.startswith('javascript:'):
- link = ''
+ link = escape_link(link, quote=True)
if not title:
return '<a href="%s">%s</a>' % (link, text)
title = escape(title, quote=True)
@@ -850,8 +866,7 @@ class Renderer(object):
:param title: title text of the image.
:param text: alt text of the image.
"""
- if src.startswith('javascript:'):
- src = ''
+ src = escape_link(src, quote=True)
text = escape(text, quote=True)
if title:
title = escape(title, quote=True)
@@ -923,6 +938,8 @@ class Markdown(object):
def __init__(self, renderer=None, inline=None, block=None, **kwargs):
if not renderer:
renderer = Renderer(**kwargs)
+ else:
+ kwargs.update(renderer.options)
self.renderer = renderer
@@ -934,13 +951,9 @@ class Markdown(object):
if inline:
self.inline = inline
else:
- rules = InlineGrammar()
- if kwargs.get('hard_wrap'):
- rules.hard_wrap()
- self.inline = InlineLexer(renderer, rules=rules)
+ self.inline = InlineLexer(renderer, **kwargs)
self.block = block or BlockLexer(BlockGrammar())
- self.options = kwargs
self.footnotes = []
self.tokens = []
@@ -1134,7 +1147,7 @@ def markdown(text, escape=True, **kwargs):
:param text: markdown formatted text content.
:param escape: if set to False, all html tags will not be escaped.
:param use_xhtml: output with xhtml tags.
- :param hard_wrap: if set to True, it will has GFM line breaks feature.
+ :param hard_wrap: if set to True, it will use the GFM line breaks feature.
:param parse_block_html: parse text only in block level html.
:param parse_inline_html: parse text only in inline level html.
"""
diff --git a/tests/fixtures/normal/amps_and_angles_encoding.html b/tests/fixtures/normal/amps_and_angles_encoding.html
index 138f4d5..483f8ff 100644
--- a/tests/fixtures/normal/amps_and_angles_encoding.html
+++ b/tests/fixtures/normal/amps_and_angles_encoding.html
@@ -8,10 +8,10 @@
<p>6 > 5.</p>
-<p>Here's a <a href="http://example.com/?foo=1&bar=2">link</a> with an ampersand in the URL.</p>
+<p>Here's a <a href="http://example.com/?foo=1&bar=2">link</a> with an ampersand in the URL.</p>
<p>Here's a link with an amersand in the link text: <a href="http://att.com/" title="AT&T">AT&T</a>.</p>
-<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
-<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
+<p>Here's an inline <a href="/script?foo=1&bar=2">link</a>.</p>
diff --git a/tests/test_cases.py b/tests/test_cases.py
index 6daa32a..7e68d2e 100644
--- a/tests/test_cases.py
+++ b/tests/test_cases.py
@@ -5,12 +5,7 @@ import mistune
root = os.path.dirname(__file__)
known = []
-
-rules = [
- 'table', 'fenced_code', 'footnotes',
- 'autolink', 'strikethrough',
-]
-m = mistune.Markdown(rules=rules)
+m = mistune.Markdown()
def render(folder, name):
diff --git a/tests/test_extra.py b/tests/test_extra.py
index 76cf716..7318444 100644
--- a/tests/test_extra.py
+++ b/tests/test_extra.py
@@ -18,10 +18,27 @@ def test_linebreak():
def test_safe_links():
- ret = mistune.markdown('javascript  alert')
- assert 'src=""' in ret
- ret = mistune.markdown('javascript [foo](<javascript:alert>) alert')
- assert 'href=""' in ret
+ attack_vectors = (
+ # "standard" javascript pseudo protocol
+ ('javascript:alert`1`', ''),
+ # bypass attempt
+ ('jAvAsCrIpT:alert`1`', ''),
+ # javascript pseudo protocol with entities
+ ('javascript:alert`1`', 'javascript:alert`1`'),
+ # javascript pseudo protocol with prefix (dangerous in Chrome)
+ ('\x1Ajavascript:alert`1`', ''),
+ # data-URI (dangerous in Firefox)
+ ('data:text/html,<script>alert`1`</script>', ''),
+ # vbscript-URI (dangerous in Internet Explorer)
+ ('vbscript:msgbox', ''),
+ # breaking out of the attribute
+ ('"<>', '"<>'),
+ )
+ for vector, expected in attack_vectors:
+ # image
+ assert 'src="%s"' % expected in mistune.markdown('' % vector)
+ # link
+ assert 'href="%s"' % expected in mistune.markdown('[atk](%s)' % vector)
def test_skip_style():
@@ -89,3 +106,10 @@ def test_not_escape_block_tags():
def test_not_escape_inline_tags():
text = '<a name="top"></a>'
assert text in mistune.markdown(text, escape=False)
+
+
+def test_hard_wrap_renderer():
+ text = 'foo\nnewline'
+ renderer = mistune.Renderer(hard_wrap=True)
+ func = mistune.Markdown(renderer=renderer)
+ assert '<br>' in func(text)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/mistune.git
More information about the Python-modules-commits
mailing list