[Python-modules-commits] [python-textile] 01/01: New upstream version 3.0.0
Dmitry Shachnev
mitya57 at moszumanska.debian.org
Sat Nov 25 11:41:38 UTC 2017
This is an automated email from the git hooks/post-receive script.
mitya57 pushed a commit to branch upstream
in repository python-textile.
commit 5dd17ed9c0389da71c3b80298ea4efd2de0ae4e3
Author: Dmitry Shachnev <mitya57 at gmail.com>
Date: Sat Nov 25 14:37:30 2017 +0300
New upstream version 3.0.0
---
.travis.yml | 7 +--
CHANGELOG.textile | 26 ++++++++
README.textile | 2 +-
requirements.txt | 5 +-
setup.py | 9 ++-
tests/fixtures/README.txt | 2 +-
tests/test_block.py | 27 +++++++++
tests/test_github_issues.py | 132 ++++++++++++++++++++++++++++++++++++++++
tests/test_glyphs.py | 4 --
tests/test_textile.py | 28 ++++-----
tests/test_urls.py | 8 +++
tests/test_values.py | 2 +-
textile/__init__.py | 9 ---
textile/core.py | 143 +++++++++++++++++++++++++++++---------------
textile/objects/block.py | 6 +-
textile/objects/table.py | 16 ++---
textile/regex_strings.py | 1 -
textile/tools/sanitizer.py | 19 ++----
textile/utils.py | 33 ++++------
textile/version.py | 2 +-
20 files changed, 344 insertions(+), 137 deletions(-)
diff --git a/.travis.yml b/.travis.yml
index e01eb38..3602a98 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,21 +3,18 @@ env:
- REQUIREMENTS=true
- REQUIREMENTS=false
python:
- - "2.6"
- "2.7"
- - "3.2"
- "3.3"
- "3.4"
- "3.5"
- "3.6"
- - "pypy"
+ - "pypy-5.4"
# command to install dependencies
install:
- if [[ $REQUIREMENTS == true ]] ; then pip install -r requirements.txt ; fi
- - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]] ; then pip install coverage==3.7.1; fi
- pip install coveralls pytest pytest-cov coverage codecov
- pip install -e .
- - if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy' ]] ; then pip install regex; fi
+ - if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy-5.4' ]] ; then pip install regex; fi
# command to run tests
script: py.test
sudo: false
diff --git a/CHANGELOG.textile b/CHANGELOG.textile
index 1150246..c998dd0 100644
--- a/CHANGELOG.textile
+++ b/CHANGELOG.textile
@@ -1,5 +1,31 @@
h1. Textile Changelog
+h2. Version 3.0.0
+* Drop support for Python 2.6 and 3.2.
+* Update to the current version of html5lib
+* Bugfixes:
+** Fix handling of HTML entities in extended pre blocks. ("#55":https://github.com/textile/python-textile/issues/55)
+** Empty definitions in definition lists raised an exception ("#56":https://github.com/textile/python-textile/issues/56)
+** Fix handling of unicode in img attributes ("#58":https://github.com/textile/python-textile/issues/58)
+
+h2. Version 2.3.16
+* Bugfixes:
+** Fix processing of extended code blocks ("#50":https://github.com/textile/python-textile/issues/50)
+** Don't break when links fail to include "http:" ("#51":https://github.com/textile/python-textile/issues/51)
+** Better handling of poorly-formatted tables ("#52":https://github.com/textile/python-textile/issues/52)
+
+h2. Version 2.3.15
+* Bugfix: Don't break on unicode characters in the fragment of a url.
+
+h2. Version 2.3.14
+* Bugfix: Fix textile on Python 2.6 ("#48":https://github.com/textile/python-textile/issues/48)
+
+h2. Version 2.3.13
+* Remove extraneous arguments from textile method. These were originally added long ago to work with django, but markup languages are long gone from django.
+* Bugfix: Don't mangle percent-encoded URLs so much. ("#45":https://github.com/textile/python-textile/issues/45)
+* Bugfix: More fixes for poorly-formatted lists. ("#46":https://github.com/textile/python-textile/issues/46)
+* Bugfix: Improve handling of whitespace in pre-formatted blocks. This now matches php-textile's handling of pre blocks much more closely. ("#47":https://github.com/textile/python-textile/issues/47)
+
h2. Version 2.3.12
* Bugfix: Don't die on pre blocks with unicode characters. ("#43":https://github.com/textile/python-textile/issues/43)
* Bugfix: Fix regressions introduced into the code between 2.2.2 and 2.3.11. (Special thanks to "@adam-iris":https://github.com/adam-iris for providing pull request "#44":https://github.com/textile/python-textile/pull/44)
diff --git a/README.textile b/README.textile
index a363721..5615550 100644
--- a/README.textile
+++ b/README.textile
@@ -39,4 +39,4 @@ bc.. import textile
h3. Notes:
-* Active development supports Python 2.6 or later (including Python 3.2+).
+* Active development supports Python 2.7 or later (including Python 3.3+).
diff --git a/requirements.txt b/requirements.txt
index 5cfb442..a477d4b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
-html5lib==0.999
-Pillow==3.0.0
\ No newline at end of file
+html5lib>=1.0b10
+Pillow==3.0.0
+regex
diff --git a/setup.py b/setup.py
index c12d3e0..e2f49ba 100644
--- a/setup.py
+++ b/setup.py
@@ -55,10 +55,15 @@ setup(
'Programming Language :: Python :: 3.6',
],
keywords='textile,text,html markup',
- install_requires=['six',],
+ install_requires=[
+ 'six',
+ 'html5lib>=0.999999999',
+ ],
extras_require={
':python_version=="2.6"': ['ordereddict>=1.1'],
- 'develop': ['regex', 'pytest', 'pytest-cov'],
+ 'develop': ['pytest', 'pytest-cov'],
+ 'imagesize': ['Pillow>=3.0.0'],
+ 'regex': ['regex'],
},
entry_points={'console_scripts': ['pytextile=textile.__main__:main']},
setup_requires=['pytest-runner'],
diff --git a/tests/fixtures/README.txt b/tests/fixtures/README.txt
index 426c9aa..ba86730 100644
--- a/tests/fixtures/README.txt
+++ b/tests/fixtures/README.txt
@@ -42,5 +42,5 @@
<h3>Notes:</h3>
<ul>
- <li>Active development supports Python 2.6 or later (including Python 3.2+).</li>
+ <li>Active development supports Python 2.7 or later (including Python 3.3+).</li>
</ul>
\ No newline at end of file
diff --git a/tests/test_block.py b/tests/test_block.py
index c69105c..44f3ea2 100644
--- a/tests/test_block.py
+++ b/tests/test_block.py
@@ -69,3 +69,30 @@ def test_blockcode_comment():
t = textile.Textile()
result = t.parse(input)
assert result == expect
+
+def test_extended_pre_block_with_many_newlines():
+ """Extra newlines in an extended pre block should not get cut down to only
+ two."""
+ text = '''pre.. word
+
+another
+
+word
+
+
+yet anothe word'''
+ expect = '''<pre>word
+
+another
+
+word
+
+
+yet anothe word</pre>'''
+ result = textile.textile(text)
+ assert result == expect
+
+ text = 'p. text text\n\n\nh1. Hello\n'
+ expect = '\t<p>text text</p>\n\n\n\t<h1>Hello</h1>'
+ result = textile.textile(text)
+ assert result == expect
diff --git a/tests/test_github_issues.py b/tests/test_github_issues.py
index 012ee5d..27befff 100644
--- a/tests/test_github_issues.py
+++ b/tests/test_github_issues.py
@@ -86,6 +86,11 @@ def test_github_issue_30():
expect = '\t<p><a href="http://lala.com" title="Tëxtíle">Tëxtíle</a></p>'
assert result == expect
+ text ='!http://lala.com/lol.gif(♡ imáges)!'
+ result = textile.textile(text)
+ expect = '\t<p><img alt="♡ imáges" src="http://lala.com/lol.gif" title="♡ imáges" /></p>'
+ assert result == expect
+
def test_github_issue_36():
text = '"Chögyam Trungpa":https://www.google.com/search?q=Chögyam+Trungpa'
result = textile.textile(text)
@@ -130,3 +135,130 @@ def test_github_issue_43():
result = textile.textile(text)
expect = '<pre>smart ‘quotes’ are not smart!</pre>'
assert result == expect
+
+def test_github_issue_45():
+ """Incorrect transform unicode url"""
+ text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0'
+ result = textile.textile(text)
+ expect = '\t<p><a href="https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0">test</a></p>'
+ assert result == expect
+
+def test_github_issue_46():
+ """Key error on mal-formed numbered lists. CAUTION: both the input and the
+ ouput are ugly."""
+ text = '# test\n### test\n## test'
+ expect = ('\t<ol>\n\t\t<li>test\n\t\t\t<ol>\n\t\t\t\t<li>test</li>'
+ '\n\t\t\t</ol></li>\n\t\t<ol>\n\t\t\t<li>test</li>'
+ '\n\t\t</ol></li>\n\t\t</ol>')
+ result = textile.textile(text)
+ assert result == expect
+
+def test_github_issue_47():
+ """Incorrect wrap pre-formatted value"""
+ text = '''pre.. word
+
+another
+
+word
+
+yet anothe word'''
+ result = textile.textile(text)
+ expect = '''<pre>word
+
+another
+
+word
+
+yet anothe word</pre>'''
+ assert result == expect
+
+def test_github_issue_49():
+ """Key error on russian hash-route link"""
+ s = '"link":https://ru.vuejs.org/v2/guide/components.html#Входные-параметры'
+ result = textile.textile(s)
+ expect = '\t<p><a href="https://ru.vuejs.org/v2/guide/components.html#Входные-параметры">link</a></p>'
+ assert result == expect
+
+def test_github_issue_50():
+ """Incorrect wrap code with Java generics in pre"""
+ test = ('pre.. public class Tynopet<T extends Framework> {}\n\nfinal '
+ 'List<List<String>> multipleList = new ArrayList<>();')
+ result = textile.textile(test)
+ expect = ('<pre>public class Tynopet<T extends Framework> {}\n\n'
+ 'final List<List<String>> multipleList = new '
+ 'ArrayList<>();</pre>')
+ assert result == expect
+
+def test_github_issue_51():
+ """Link build with $ sign without "http" prefix broken."""
+ test = '"$":www.google.com.br'
+ result = textile.textile(test)
+ expect = '\t<p><a href="www.google.com.br">www.google.com.br</a></p>'
+ assert result == expect
+
+def test_github_issue_52():
+ """Table build without space after aligment raise a AttributeError."""
+ test = '|=.First Header |=. Second Header |'
+ result = textile.textile(test)
+ expect = ('\t<table>\n\t\t<tr>\n\t\t\t<td>=.First Header '
+ '</td>\n\t\t\t<td style="text-align:center;">Second Header </td>'
+ '\n\t\t</tr>\n\t</table>')
+ assert result == expect
+
+def test_github_issue_55():
+ """Incorrect handling of quote entities in extended pre block"""
+ test = ('pre.. this is the first line\n\nbut "quotes" in an extended pre '
+ 'block need to be handled properly.')
+ result = textile.textile(test)
+ expect = ('<pre>this is the first line\n\nbut "quotes" in an '
+ 'extended pre block need to be handled properly.</pre>')
+ assert result == expect
+
+ # supplied input
+ test = ('pre.. import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
+ '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
+ 'RescheduleTask;\n\nimport java.util.concurrent.'
+ 'ScheduledExecutorService;\nimport java.util.concurrent.TimeUnit;'
+ '\n\n/**\n* @author ustits\n*/\npublic abstract class '
+ 'MainService<T> extends RescheduleTask implements Context<T> {\n\n'
+ 'private static final Logger log = LoggerFactory.getLogger('
+ 'MainService.class);\nprivate final ScheduledExecutorService '
+ 'scheduler;\n\nprivate boolean isFirstRun = true;\nprivate T '
+ 'configs;\n\npublic MainService(final ScheduledExecutorService '
+ 'scheduler) {\nsuper(scheduler);\nthis.scheduler = scheduler;\n}\n'
+ '\n at Override\npublic void setConfig(final T configs) {\nthis.'
+ 'configs = configs;\nif (isFirstRun) {\nscheduler.schedule(this, '
+ '0, TimeUnit.SECONDS);\nisFirstRun = false;\n}\n}\n\n at Override\n'
+ 'public void stop() {\nsuper.stop();\nscheduler.shutdown();\ntry {'
+ '\nscheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} '
+ 'catch (InterruptedException ie) {\nlog.warn("Unable to wait for '
+ 'syncs termination", ie);\nThread.currentThread().interrupt();\n}'
+ '\n}\n\nprotected final T getConfigs() {\nreturn configs;\n}\n}')
+ result = textile.textile(test)
+ expect = ('<pre>import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
+ '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
+ 'RescheduleTask;\n\nimport java.util.concurrent.'
+ 'ScheduledExecutorService;\nimport java.util.concurrent.'
+ 'TimeUnit;\n\n/**\n* @author ustits\n*/\npublic abstract class '
+ 'MainService<T> extends RescheduleTask implements '
+ 'Context<T> {\n\nprivate static final Logger log = '
+ 'LoggerFactory.getLogger(MainService.class);\nprivate final '
+ 'ScheduledExecutorService scheduler;\n\nprivate boolean '
+ 'isFirstRun = true;\nprivate T configs;\n\npublic MainService('
+ 'final ScheduledExecutorService scheduler) {\nsuper(scheduler);'
+ '\nthis.scheduler = scheduler;\n}\n\n at Override\npublic void '
+ 'setConfig(final T configs) {\nthis.configs = configs;\nif ('
+ 'isFirstRun) {\nscheduler.schedule(this, 0, TimeUnit.SECONDS);'
+ '\nisFirstRun = false;\n}\n}\n\n at Override\npublic void stop() {'
+ '\nsuper.stop();\nscheduler.shutdown();\ntry {\nscheduler.'
+ 'awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} catch '
+ '(InterruptedException ie) {\nlog.warn("Unable to wait '
+ 'for syncs termination", ie);\nThread.currentThread().'
+ 'interrupt();\n}\n}\n\nprotected final T getConfigs() {\n'
+ 'return configs;\n}\n}</pre>')
+ assert result == expect
+
+def test_issue_56():
+ result = textile.textile("- :=\n-")
+ expect = '<dl>\n</dl>'
+ assert result == expect
diff --git a/tests/test_glyphs.py b/tests/test_glyphs.py
index fcf2636..56b0d27 100644
--- a/tests/test_glyphs.py
+++ b/tests/test_glyphs.py
@@ -26,7 +26,3 @@ def test_glyphs():
result = t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
expect = '<p><cite>Cat’s Cradle</cite> by Vonnegut</p>'
assert result == expect
-
- result = t.glyphs('test"')
- expect = 'test” '
- assert result == expect
diff --git a/tests/test_textile.py b/tests/test_textile.py
index dd069fb..86a7d85 100644
--- a/tests/test_textile.py
+++ b/tests/test_textile.py
@@ -67,22 +67,18 @@ def test_autolinking():
def test_sanitize():
test = "a paragraph of benign text"
result = "\t<p>a paragraph of benign text</p>"
- try:
- expect = textile.Textile().parse(test, sanitize=True)
- assert result == expect
-
- test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
- result = '<p style="">a paragraph of evil text</p>'
- expect = textile.Textile().parse(test, sanitize=True)
- assert result == expect
-
- test = """<p>a paragraph of benign text<br />and more text</p>"""
- result = '<p>a paragraph of benign text<br />\nand more text</p>'
- expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
- assert result == expect
- except Exception as e:
- message = '{0}'.format(e)
- assert "html5lib not available" in message
+ expect = textile.Textile().parse(test, sanitize=True)
+ assert result == expect
+
+ test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
+ result = '<p style="">a paragraph of evil text</p>'
+ expect = textile.Textile().parse(test, sanitize=True)
+ assert result == expect
+
+ test = """<p>a paragraph of benign text<br />and more text</p>"""
+ result = '<p>a paragraph of benign text<br />\nand more text</p>'
+ expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
+ assert result == expect
def test_imagesize():
PIL = pytest.importorskip('PIL')
diff --git a/tests/test_urls.py b/tests/test_urls.py
index 0ae78e6..7a9798e 100644
--- a/tests/test_urls.py
+++ b/tests/test_urls.py
@@ -59,3 +59,11 @@ def test_rel_attribute():
result = t.parse('"$":http://domain.tld')
expect = '\t<p><a href="http://domain.tld" rel="nofollow">domain.tld</a></p>'
assert result == expect
+
+def test_quotes_in_link_text():
+ """quotes in link text are tricky."""
+ test = '""this is a quote in link text"":url'
+ t = Textile()
+ result = t.parse(test)
+ expect = '\t<p><a href="url">“this is a quote in link text”</a></p>'
+ assert result == expect
diff --git a/tests/test_values.py b/tests/test_values.py
index 68a2dc1..063ed3e 100644
--- a/tests/test_values.py
+++ b/tests/test_values.py
@@ -178,7 +178,7 @@ xhtml_known_values = (
('@monospaced text@, followed by text',
'\t<p><code>monospaced text</code>, followed by text</p>'),
- ('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\t<p>some text</p>'),
+ ('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\n\n\n\n\t<p>some text</p>'),
('pre.. foo bar baz\nquux', '<pre>foo bar baz\nquux</pre>'),
diff --git a/textile/__init__.py b/textile/__init__.py
index c019f41..bb7829f 100644
--- a/textile/__init__.py
+++ b/textile/__init__.py
@@ -9,12 +9,3 @@ from .version import VERSION
__all__ = ['textile', 'textile_restricted']
__version__ = VERSION
-
-
-if sys.version_info[:2] == (2, 6):
- warnings.warn(
- "Python 2.6 is no longer supported by the Python core team, please "
- "upgrade your Python. A future version of textile will drop support "
- "for Python 2.6",
- DeprecationWarning
- )
diff --git a/textile/core.py b/textile/core.py
index 695b7e7..7572a46 100644
--- a/textile/core.py
+++ b/textile/core.py
@@ -20,25 +20,20 @@ Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
import uuid
import six
+from six.moves.urllib_parse import (urlparse, urlsplit, urlunsplit, quote,
+ unquote)
from textile.tools import sanitizer, imagesize
-from textile.regex_strings import (align_re_s, cls_re_s, halign_re_s,
- pnct_re_s, regex_snippets, syms_re_s, table_span_re_s, valign_re_s)
+from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
+ regex_snippets, syms_re_s, table_span_re_s)
from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
has_raw_text, is_rel_url, is_valid_url, list_type, normalize_newlines,
parse_attributes, pba)
from textile.objects import Block, Table
-try:
- from collections import OrderedDict
-except ImportError:
- from ordereddict import OrderedDict
+from collections import OrderedDict
-from six.moves import urllib
-urlparse, urlsplit, urlunsplit, quote, unquote = (urllib.parse.urlparse,
- urllib.parse.urlsplit, urllib.parse.urlunsplit, urllib.parse.quote,
- urllib.parse.unquote)
try:
import regex as re
@@ -277,6 +272,8 @@ class Textile(object):
# a newline, replace it with a new style break tag and a newline.
text = re.sub(r'<br( /)?>(?!\n)', '<br />\n', text)
+ text = text.rstrip('\n')
+
return text
def table(self, text):
@@ -346,7 +343,14 @@ class Textile(object):
# This will only increment the count for list items, not
# definition items
if showitem:
- self.olstarts[tl] = self.olstarts[tl] + 1
+ # Assume properly formatted input
+ try:
+ self.olstarts[tl] = self.olstarts[tl] + 1
+ # if we get here, we've got some poor textile formatting.
+ # add this type of list to olstarts and assume we'll start
+ # it at 1. expect screwy output.
+ except KeyError:
+ self.olstarts[tl] = 1
nm = re.match("^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
"[ .].*".format(cls_re_s), nextline)
@@ -420,15 +424,29 @@ class Textile(object):
tre = '|'.join(self.btag)
else:
tre = '|'.join(self.btag_lite)
- text = text.split('\n\n')
+
+ # split the text by two or more newlines, retaining the newlines in the
+ # split list
+ text = re.split(r'(\n{2,})', text)
+
+ # some blocks, when processed, will ask us to output nothing, if that's
+ # the case, we'd want to drop the whitespace which comes after it.
+ eat_whitespace = False
tag = 'p'
- atts = cite = graf = ext = ''
+ atts = cite = ext = ''
- last_item_is_a_shelf = False
out = []
for line in text:
+ # the line is just whitespace, add it to the output, and move on
+ if not line.strip():
+ if not eat_whitespace:
+ out.append(line)
+ continue
+
+ eat_whitespace = False
+
pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
align_re_s, cls_re_s))
@@ -437,14 +455,15 @@ class Textile(object):
if match:
# if we had a previous extended tag but not this time, close up
# the tag
- if out:
- last_item_is_a_shelf = out[-1] in self.shelf
- if ext and match.group('tag') and last_item_is_a_shelf:
- content = out.pop()
+ if ext and out:
+ # it's out[-2] because the last element in out is the
+ # whitespace that preceded this line
+ content = encode_html(out[-2], quotes=True)
content = generate_tag(block.inner_tag, content,
block.inner_atts)
- out.append(generate_tag(block.outer_tag, content,
- block.outer_atts))
+ content = generate_tag(block.outer_tag, content,
+ block.outer_atts)
+ out[-2] = content
tag, atts, ext, cite, content = match.groups()
block = Block(self, **match.groupdict())
inner_block = generate_tag(block.inner_tag, block.content,
@@ -463,40 +482,64 @@ class Textile(object):
# no tag specified
else:
# if we're inside an extended block, add the text from the
- # previous extension to the front
+ # previous line to the front
if ext and out:
- line = '{0}\n\n{1}'.format(out.pop(), line)
- whitespace = ' \t\n\r\f\v'
- if ext or not line[0] in whitespace:
+ line = '{0}{1}'.format(out.pop(), line)
+ # the logic in the if statement below is a bit confusing in
+ # php-textile. I'm still not sure I understand what the php
+ # code is doing. Something tells me it's a phpsadness. Anyway,
+ # this works, and is much easier to understand: if we're not in
+ # an extension, and the line doesn't begin with a space, treat
+ # it like a block to insert. Lines that begin with a space are
+ # not processed as a block.
+ if not ext and not line[0] == ' ':
block = Block(self, tag, atts, ext, cite, line)
+ # if the block contains html tags, generate_tag would
+ # mangle it, so process as is.
if block.tag == 'p' and not has_raw_text(block.content):
line = block.content
else:
line = generate_tag(block.outer_tag, block.content,
block.outer_atts)
- if block.inner_tag == 'code':
- line = block.content
- if block.outer_tag != 'pre' and not has_raw_text(line):
- line = "\t{0}".format(line)
+ line = "\t{0}".format(line)
else:
- line = self.graf(line)
+ if block.tag == 'pre':
+ line = self.shelve(encode_html(line, quotes=True))
+ else:
+ line = self.graf(line)
line = self.doPBr(line)
line = line.replace('<br>', '<br />')
- if line.strip():
+ # if we're in an extended block, and we haven't specified a new
+ # tag, join this line to the last item of the output
+ if ext and not match:
+ last_item = out.pop()
+ out.append('{0}{1}'.format(last_item, line))
+ elif not block.eat:
+ # or if it's a type of block which indicates we shouldn't drop
+ # it, add it to the output.
out.append(line)
if not ext:
tag = 'p'
atts = ''
cite = ''
- graf = ''
+ # if it's a block we should drop, don't keep the whitespace which
+ # will come after it.
+ if block.eat:
+ eat_whitespace = True
+
+ # at this point, we've gone through all the lines, and if there's still
+ # an extension in effect, we close it here.
if ext and out:
- out.append(generate_tag(block.outer_tag, out.pop(),
- block.outer_atts))
- return '\n\n'.join(out)
+ block.content = out.pop()
+ block.process()
+ final = generate_tag(block.outer_tag, block.content,
+ block.outer_atts)
+ out.append(final)
+ return ''.join(out)
def footnoteRef(self, text):
# somehow php-textile gets away with not capturing the space.
@@ -537,10 +580,6 @@ class Textile(object):
So, for the first pass, we use the glyph_search_initial set of
regexes. For all remaining passes, we use glyph_search
"""
- # fix: hackish
- if text.endswith('"'):
- text = '{0} '.format(text)
-
text = text.rstrip('\n')
result = []
searchlist = self.glyph_search_initial
@@ -689,7 +728,7 @@ class Textile(object):
try:
possibility = possible_start_quotes.pop()
- except IndexError:
+ except IndexError: # pragma: no cover
# If out of possible starting segments we back the
# last one from the linkparts array
linkparts.pop()
@@ -881,7 +920,7 @@ class Textile(object):
text = url
if "://" in text:
text = text.split("://")[1]
- else:
+ elif ":" in text:
text = text.split(":")[1]
text = text.strip()
@@ -942,11 +981,19 @@ class Textile(object):
quote(netloc_parsed['password']))
host = netloc_parsed['host']
port = netloc_parsed['port'] and netloc_parsed['port']
- path = '/'.join( # could be encoded slashes!
- quote(unquote(pce).encode('utf8'), b'')
- for pce in parsed.path.split('/')
- )
- fragment = quote(unquote(parsed.fragment))
+ # the below splits the path portion of the url by slashes, translates
+ # percent-encoded characters back into strings, then re-percent-encodes
+ # what's necessary. Sounds screwy, but the url could include encoded
+ # slashes, and this is a way to clean that up. It branches for PY2/3
+ # because the quote and unquote functions expects different input
+ # types: unicode strings for PY2 and str for PY3.
+ if six.PY2:
+ path_parts = (quote(unquote(pce.encode('utf8')), b'') for pce in
+ parsed.path.split('/'))
+ else:
+ path_parts = (quote(unquote(pce), b'') for pce in
+ parsed.path.split('/'))
+ path = '/'.join(path_parts)
# put it back together
netloc = ''
@@ -958,7 +1005,7 @@ class Textile(object):
netloc = '{0}{1}'.format(netloc, host)
if port:
netloc = '{0}:{1}'.format(netloc, port)
- return urlunsplit((scheme, netloc, path, parsed.query, fragment))
+ return urlunsplit((scheme, netloc, path, parsed.query, parsed.fragment))
def span(self, text):
qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
@@ -1137,6 +1184,8 @@ class Textile(object):
# parse the attributes and content
m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line,
flags=re.M | re.S)
+ if not m:
+ continue
atts, content = m.groups()
# cleanup
@@ -1356,7 +1405,7 @@ class Textile(object):
return self.linkIndex
-def textile(text, html_type='xhtml', encoding=None, output=None):
+def textile(text, html_type='xhtml'):
"""
Apply Textile to a block of text.
diff --git a/textile/objects/block.py b/textile/objects/block.py
index 89e6b2e..7002eca 100644
--- a/textile/objects/block.py
+++ b/textile/objects/block.py
@@ -1,10 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
-try:
- from collections import OrderedDict
-except ImportError:
- from ordereddict import OrderedDict
+from collections import OrderedDict
try:
import regex as re
except ImportError:
@@ -49,6 +46,7 @@ class Block(object):
# It will be empty if the regex matched and ate it.
if '' == notedef:
self.content = notedef
+ self.eat = True
fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
self.tag, flags=re.U)
diff --git a/textile/objects/table.py b/textile/objects/table.py
index f694098..4796afc 100644
--- a/textile/objects/table.py
+++ b/textile/objects/table.py
@@ -38,15 +38,17 @@ class Table(object):
# as a normal center-aligned cell.
if i == 0 and row[:2] == '|=':
captionpattern = (r"^\|\=(?P<capts>{s}{a}{c})\. "
- r"(?P<cap>[^\n]*)(?P<row>.*)".format(**{'s':
- table_span_re_s, 'a': align_re_s, 'c': cls_re_s}))
+ r"(?P<cap>[^\n]*)(?P<row>.*)".format(**{
+ 's': table_span_re_s, 'a': align_re_s,
+ 'c': cls_re_s}))
caption_re = re.compile(captionpattern, re.S)
cmtch = caption_re.match(row)
- caption = Caption(**cmtch.groupdict())
- self.caption = '\n{0}'.format(caption.caption)
- row = cmtch.group('row').lstrip()
- if row == '':
- continue
+ if cmtch:
+ caption = Caption(**cmtch.groupdict())
+ self.caption = '\n{0}'.format(caption.caption)
+ row = cmtch.group('row').lstrip()
+ if row == '':
+ continue
# Colgroup -- A colgroup row will not necessarily end with a |.
# Hence it may include the next row of actual table data.
diff --git a/textile/regex_strings.py b/textile/regex_strings.py
index a152072..f7c6f12 100644
--- a/textile/regex_strings.py
+++ b/textile/regex_strings.py
@@ -19,7 +19,6 @@ try:
'char': r'(?:[^\p{Zs}\v])',
}
except ImportError:
- import re
from sys import maxunicode
upper_re_s = "".join(
[six.unichr(c) for c in six.moves.range(maxunicode) if six.unichr(
diff --git a/textile/tools/sanitizer.py b/textile/tools/sanitizer.py
index 4fc8fb2..3c7209c 100644
--- a/textile/tools/sanitizer.py
+++ b/textile/tools/sanitizer.py
@@ -3,18 +3,9 @@ def sanitize(string):
Ensure that the text does not contain any malicious HTML code which might
break the page.
"""
- try:
- import html5lib
- from html5lib import sanitizer, serializer, treewalkers
- except ImportError:
- raise Exception("html5lib not available")
+ from html5lib import parseFragment, serialize
- p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
- tree = p.parseFragment(string)
-
- walker = treewalkers.getTreeWalker("etree")
- stream = walker(tree)
-
- s = serializer.htmlserializer.HTMLSerializer(omit_optional_tags=False,
- quote_attr_values=True)
- return s.render(stream)
+ parsed = parseFragment(string)
+ clean = serialize(parsed, sanitize=True, omit_optional_tags=False,
+ quote_attr_values='always')
+ return clean
diff --git a/textile/utils.py b/textile/utils.py
index 00da831..fa21f05 100644
--- a/textile/utils.py
+++ b/textile/utils.py
@@ -10,10 +10,7 @@ from six.moves import urllib, html_parser
urlparse = urllib.parse.urlparse
HTMLParser = html_parser.HTMLParser
-try:
- from collections import OrderedDict
-except ImportError:
- from ordereddict import OrderedDict
+from collections import OrderedDict
from xml.etree import ElementTree
@@ -50,6 +47,8 @@ def generate_tag(tag, content, attributes=None):
content are strings, the attributes argument is a dictionary. As
a convenience, if the content is ' /', a self-closing tag is generated."""
content = six.text_type(content)
+ # In PY2, ElementTree tostringlist only works with bytes, not with
+ # unicode().
enc = 'unicode'
if six.PY2:
enc = 'UTF-8'
@@ -60,21 +59,12 @@ def generate_tag(tag, content, attributes=None):
# adding text by assigning it to element_tag.text. That results in
# non-ascii text being html-entity encoded. Not bad, but not entirely
# matching php-textile either.
- try:
- element_tag = ElementTree.tostringlist(element, encoding=enc,
- method='html')
- element_tag.insert(len(element_tag) - 1, content)
- element_text = ''.join(element_tag)
- except AttributeError:
- # Python 2.6 doesn't have the tostringlist method, so we have to treat
- # it differently.
- element_tag = ElementTree.tostring(element, encoding=enc)
- element_text = re.sub(r"<\?xml version='1.0' encoding='UTF-8'\?>\n",
- '', element_tag)
- if content != six.text_type(' /'):
- element_text = element_text.rstrip(' />')
- element_text = six.text_type('{0}>{1}</{2}>').format(six.text_type(
- element_text), content, tag)
+ element_tag = ElementTree.tostringlist(element, encoding=enc,
+ method='html')
+ if six.PY2:
+ element_tag = [v.decode(enc) for v in element_tag]
+ element_tag.insert(len(element_tag) - 1, content)
+ element_text = ''.join(element_tag)
return element_text
def has_raw_text(text):
@@ -110,9 +100,8 @@ def list_type(list_string):
def normalize_newlines(string):
out = string.strip()
- out = re.sub(r'\r\n', '\n', out)
- out = re.sub(r'\n{3,}', '\n\n', out)
- out = re.sub(r'\n\s*\n', '\n\n', out)
+ out = re.sub(r'\r\n?', '\n', out)
+ out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
out = re.sub(r'"$', '" ', out)
return out
diff --git a/textile/version.py b/textile/version.py
index ce59314..aaa4264 100644
--- a/textile/version.py
+++ b/textile/version.py
@@ -1 +1 @@
-VERSION = '2.3.12'
+VERSION = '3.0.0'
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-textile.git
More information about the Python-modules-commits
mailing list