[Python-modules-commits] [python-textile] 01/01: New upstream version 3.0.0

Dmitry Shachnev mitya57 at moszumanska.debian.org
Sat Nov 25 11:41:38 UTC 2017


This is an automated email from the git hooks/post-receive script.

mitya57 pushed a commit to branch upstream
in repository python-textile.

commit 5dd17ed9c0389da71c3b80298ea4efd2de0ae4e3
Author: Dmitry Shachnev <mitya57 at gmail.com>
Date:   Sat Nov 25 14:37:30 2017 +0300

    New upstream version 3.0.0
---
 .travis.yml                 |   7 +--
 CHANGELOG.textile           |  26 ++++++++
 README.textile              |   2 +-
 requirements.txt            |   5 +-
 setup.py                    |   9 ++-
 tests/fixtures/README.txt   |   2 +-
 tests/test_block.py         |  27 +++++++++
 tests/test_github_issues.py | 132 ++++++++++++++++++++++++++++++++++++++++
 tests/test_glyphs.py        |   4 --
 tests/test_textile.py       |  28 ++++-----
 tests/test_urls.py          |   8 +++
 tests/test_values.py        |   2 +-
 textile/__init__.py         |   9 ---
 textile/core.py             | 143 +++++++++++++++++++++++++++++---------------
 textile/objects/block.py    |   6 +-
 textile/objects/table.py    |  16 ++---
 textile/regex_strings.py    |   1 -
 textile/tools/sanitizer.py  |  19 ++----
 textile/utils.py            |  33 ++++------
 textile/version.py          |   2 +-
 20 files changed, 344 insertions(+), 137 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index e01eb38..3602a98 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,21 +3,18 @@ env:
   - REQUIREMENTS=true
   - REQUIREMENTS=false
 python:
-  - "2.6"
   - "2.7"
-  - "3.2"
   - "3.3"
   - "3.4"
   - "3.5"
   - "3.6"
-  - "pypy"
+  - "pypy-5.4"
 # command to install dependencies
 install:
   - if [[ $REQUIREMENTS == true ]] ; then pip install -r requirements.txt ; fi
-  - if [[ $TRAVIS_PYTHON_VERSION == '3.2' ]] ; then pip install coverage==3.7.1; fi
   - pip install coveralls pytest pytest-cov coverage codecov
   - pip install -e .
-  - if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy' ]] ; then pip install regex; fi
+  - if [[ ! $TRAVIS_PYTHON_VERSION == 'pypy-5.4' ]] ; then pip install regex; fi
 # command to run tests
 script: py.test
 sudo: false
diff --git a/CHANGELOG.textile b/CHANGELOG.textile
index 1150246..c998dd0 100644
--- a/CHANGELOG.textile
+++ b/CHANGELOG.textile
@@ -1,5 +1,31 @@
 h1. Textile Changelog
 
+h2. Version 3.0.0
+* Drop support for Python 2.6 and 3.2.
+* Update to the current version of html5lib
+* Bugfixes:
+** Fix handling of HTML entities in extended pre blocks. ("#55":https://github.com/textile/python-textile/issues/55)
+** Empty definitions in definition lists raised an exception ("#56":https://github.com/textile/python-textile/issues/56)
+** Fix handling of unicode in img attributes ("#58":https://github.com/textile/python-textile/issues/58)
+
+h2. Version 2.3.16
+* Bugfixes:
+** Fix processing of extended code blocks ("#50":https://github.com/textile/python-textile/issues/50)
+** Don't break when links fail to include "http:" ("#51":https://github.com/textile/python-textile/issues/51)
+** Better handling of poorly-formatted tables ("#52":https://github.com/textile/python-textile/issues/52)
+
+h2. Version 2.3.15
+* Bugfix: Don't break on unicode characters in the fragment of a url.
+
+h2. Version 2.3.14
+* Bugfix: Fix textile on Python 2.6 ("#48":https://github.com/textile/python-textile/issues/48)
+
+h2. Version 2.3.13
+* Remove extraneous arguments from textile method. These were originally added long ago to work with django, but markup languages are long gone from django.
+* Bugfix: Don't mangle percent-encoded URLs so much. ("#45":https://github.com/textile/python-textile/issues/45)
+* Bugfix: More fixes for poorly-formatted lists. ("#46":https://github.com/textile/python-textile/issues/46)
+* Bugfix: Improve handling of whitespace in pre-formatted blocks. This now matches php-textile's handling of pre blocks much more closely. ("#47":https://github.com/textile/python-textile/issues/47)
+
 h2. Version 2.3.12
 * Bugfix: Don't die on pre blocks with unicode characters. ("#43":https://github.com/textile/python-textile/issues/43)
 * Bugfix: Fix regressions introduced into the code between 2.2.2 and 2.3.11. (Special thanks to "@adam-iris":https://github.com/adam-iris for providing pull request "#44":https://github.com/textile/python-textile/pull/44)
diff --git a/README.textile b/README.textile
index a363721..5615550 100644
--- a/README.textile
+++ b/README.textile
@@ -39,4 +39,4 @@ bc.. import textile
 
 h3. Notes:
 
-* Active development supports Python 2.6 or later (including Python 3.2+).
+* Active development supports Python 2.7 or later (including Python 3.3+).
diff --git a/requirements.txt b/requirements.txt
index 5cfb442..a477d4b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
-html5lib==0.999
-Pillow==3.0.0
\ No newline at end of file
+html5lib>=1.0b10
+Pillow==3.0.0
+regex
diff --git a/setup.py b/setup.py
index c12d3e0..e2f49ba 100644
--- a/setup.py
+++ b/setup.py
@@ -55,10 +55,15 @@ setup(
         'Programming Language :: Python :: 3.6',
     ],
     keywords='textile,text,html markup',
-    install_requires=['six',],
+    install_requires=[
+        'six',
+        'html5lib>=0.999999999',
+        ],
     extras_require={
         ':python_version=="2.6"': ['ordereddict>=1.1'],
-        'develop': ['regex', 'pytest', 'pytest-cov'],
+        'develop': ['pytest', 'pytest-cov'],
+        'imagesize': ['Pillow>=3.0.0'],
+        'regex': ['regex'],
     },
     entry_points={'console_scripts': ['pytextile=textile.__main__:main']},
     setup_requires=['pytest-runner'],
diff --git a/tests/fixtures/README.txt b/tests/fixtures/README.txt
index 426c9aa..ba86730 100644
--- a/tests/fixtures/README.txt
+++ b/tests/fixtures/README.txt
@@ -42,5 +42,5 @@
 	<h3>Notes:</h3>
 
 	<ul>
-		<li>Active development supports Python 2.6 or later (including Python 3.2+).</li>
+		<li>Active development supports Python 2.7 or later (including Python 3.3+).</li>
 	</ul>
\ No newline at end of file
diff --git a/tests/test_block.py b/tests/test_block.py
index c69105c..44f3ea2 100644
--- a/tests/test_block.py
+++ b/tests/test_block.py
@@ -69,3 +69,30 @@ def test_blockcode_comment():
     t = textile.Textile()
     result = t.parse(input)
     assert result == expect
+
+def test_extended_pre_block_with_many_newlines():
+    """Extra newlines in an extended pre block should not get cut down to only
+    two."""
+    text = '''pre.. word
+
+another
+
+word
+
+
+yet anothe word'''
+    expect = '''<pre>word
+
+another
+
+word
+
+
+yet anothe word</pre>'''
+    result = textile.textile(text)
+    assert result == expect
+
+    text = 'p. text text\n\n\nh1. Hello\n'
+    expect = '\t<p>text text</p>\n\n\n\t<h1>Hello</h1>'
+    result = textile.textile(text)
+    assert result == expect
diff --git a/tests/test_github_issues.py b/tests/test_github_issues.py
index 012ee5d..27befff 100644
--- a/tests/test_github_issues.py
+++ b/tests/test_github_issues.py
@@ -86,6 +86,11 @@ def test_github_issue_30():
     expect = '\t<p><a href="http://lala.com" title="Tëxtíle">Tëxtíle</a></p>'
     assert result == expect
 
+    text ='!http://lala.com/lol.gif(♡ imáges)!'
+    result = textile.textile(text)
+    expect = '\t<p><img alt="♡ imáges" src="http://lala.com/lol.gif" title="♡ imáges" /></p>'
+    assert result == expect
+
 def test_github_issue_36():
     text = '"Chögyam Trungpa":https://www.google.com/search?q=Chögyam+Trungpa'
     result = textile.textile(text)
@@ -130,3 +135,130 @@ def test_github_issue_43():
     result = textile.textile(text)
     expect = '<pre>smart ‘quotes’ are not smart!</pre>'
     assert result == expect
+
+def test_github_issue_45():
+    """Incorrect transform unicode url"""
+    text = '"test":https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0'
+    result = textile.textile(text)
+    expect = '\t<p><a href="https://myabstractwiki.ru/index.php/%D0%97%D0%B0%D0%B3%D0%BB%D0%B0%D0%B2%D0%BD%D0%B0%D1%8F_%D1%81%D1%82%D1%80%D0%B0%D0%BD%D0%B8%D1%86%D0%B0">test</a></p>'
+    assert result == expect
+
+def test_github_issue_46():
+    """Key error on mal-formed numbered lists. CAUTION: both the input and the
+    ouput are ugly."""
+    text = '# test\n### test\n## test'
+    expect = ('\t<ol>\n\t\t<li>test\n\t\t\t<ol>\n\t\t\t\t<li>test</li>'
+              '\n\t\t\t</ol></li>\n\t\t<ol>\n\t\t\t<li>test</li>'
+              '\n\t\t</ol></li>\n\t\t</ol>')
+    result = textile.textile(text)
+    assert result == expect
+
+def test_github_issue_47():
+    """Incorrect wrap pre-formatted value"""
+    text = '''pre.. word
+
+another
+
+word
+
+yet anothe word'''
+    result = textile.textile(text)
+    expect = '''<pre>word
+
+another
+
+word
+
+yet anothe word</pre>'''
+    assert result == expect
+
+def test_github_issue_49():
+    """Key error on russian hash-route link"""
+    s = '"link":https://ru.vuejs.org/v2/guide/components.html#Входные-параметры'
+    result = textile.textile(s)
+    expect = '\t<p><a href="https://ru.vuejs.org/v2/guide/components.html#Входные-параметры">link</a></p>'
+    assert result == expect
+
+def test_github_issue_50():
+    """Incorrect wrap code with Java generics in pre"""
+    test = ('pre.. public class Tynopet<T extends Framework> {}\n\nfinal '
+            'List<List<String>> multipleList = new ArrayList<>();')
+    result = textile.textile(test)
+    expect = ('<pre>public class Tynopet<T extends Framework> {}\n\n'
+              'final List<List<String>> multipleList = new '
+              'ArrayList<>();</pre>')
+    assert result == expect
+
+def test_github_issue_51():
+    """Link build with $ sign without "http" prefix broken."""
+    test = '"$":www.google.com.br'
+    result = textile.textile(test)
+    expect = '\t<p><a href="www.google.com.br">www.google.com.br</a></p>'
+    assert result == expect
+
+def test_github_issue_52():
+    """Table build without space after aligment raise a AttributeError."""
+    test = '|=.First Header |=. Second Header |'
+    result = textile.textile(test)
+    expect = ('\t<table>\n\t\t<tr>\n\t\t\t<td>=.First Header '
+              '</td>\n\t\t\t<td style="text-align:center;">Second Header </td>'
+              '\n\t\t</tr>\n\t</table>')
+    assert result == expect
+
+def test_github_issue_55():
+    """Incorrect handling of quote entities in extended pre block"""
+    test = ('pre.. this is the first line\n\nbut "quotes" in an extended pre '
+            'block need to be handled properly.')
+    result = textile.textile(test)
+    expect = ('<pre>this is the first line\n\nbut "quotes" in an '
+              'extended pre block need to be handled properly.</pre>')
+    assert result == expect
+
+    # supplied input
+    test = ('pre.. import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
+            '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
+            'RescheduleTask;\n\nimport java.util.concurrent.'
+            'ScheduledExecutorService;\nimport java.util.concurrent.TimeUnit;'
+            '\n\n/**\n* @author ustits\n*/\npublic abstract class '
+            'MainService<T> extends RescheduleTask implements Context<T> {\n\n'
+            'private static final Logger log = LoggerFactory.getLogger('
+            'MainService.class);\nprivate final ScheduledExecutorService '
+            'scheduler;\n\nprivate boolean isFirstRun = true;\nprivate T '
+            'configs;\n\npublic MainService(final ScheduledExecutorService '
+            'scheduler) {\nsuper(scheduler);\nthis.scheduler = scheduler;\n}\n'
+            '\n at Override\npublic void setConfig(final T configs) {\nthis.'
+            'configs = configs;\nif (isFirstRun) {\nscheduler.schedule(this, '
+            '0, TimeUnit.SECONDS);\nisFirstRun = false;\n}\n}\n\n at Override\n'
+            'public void stop() {\nsuper.stop();\nscheduler.shutdown();\ntry {'
+            '\nscheduler.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} '
+            'catch (InterruptedException ie) {\nlog.warn("Unable to wait for '
+            'syncs termination", ie);\nThread.currentThread().interrupt();\n}'
+            '\n}\n\nprotected final T getConfigs() {\nreturn configs;\n}\n}')
+    result = textile.textile(test)
+    expect = ('<pre>import org.slf4j.Logger;\nimport org.slf4j.LoggerFactory;'
+              '\nimport ru.onyma.job.Context;\nimport ru.onyma.job.'
+              'RescheduleTask;\n\nimport java.util.concurrent.'
+              'ScheduledExecutorService;\nimport java.util.concurrent.'
+              'TimeUnit;\n\n/**\n* @author ustits\n*/\npublic abstract class '
+              'MainService<T> extends RescheduleTask implements '
+              'Context<T> {\n\nprivate static final Logger log = '
+              'LoggerFactory.getLogger(MainService.class);\nprivate final '
+              'ScheduledExecutorService scheduler;\n\nprivate boolean '
+              'isFirstRun = true;\nprivate T configs;\n\npublic MainService('
+              'final ScheduledExecutorService scheduler) {\nsuper(scheduler);'
+              '\nthis.scheduler = scheduler;\n}\n\n at Override\npublic void '
+              'setConfig(final T configs) {\nthis.configs = configs;\nif ('
+              'isFirstRun) {\nscheduler.schedule(this, 0, TimeUnit.SECONDS);'
+              '\nisFirstRun = false;\n}\n}\n\n at Override\npublic void stop() {'
+              '\nsuper.stop();\nscheduler.shutdown();\ntry {\nscheduler.'
+              'awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);\n} catch '
+              '(InterruptedException ie) {\nlog.warn("Unable to wait '
+              'for syncs termination", ie);\nThread.currentThread().'
+              'interrupt();\n}\n}\n\nprotected final T getConfigs() {\n'
+              'return configs;\n}\n}</pre>')
+    assert result == expect
+
+def test_issue_56():
+    result = textile.textile("- :=\n-")
+    expect = '<dl>\n</dl>'
+    assert result == expect
diff --git a/tests/test_glyphs.py b/tests/test_glyphs.py
index fcf2636..56b0d27 100644
--- a/tests/test_glyphs.py
+++ b/tests/test_glyphs.py
@@ -26,7 +26,3 @@ def test_glyphs():
     result = t.glyphs("<p><cite>Cat's Cradle</cite> by Vonnegut</p>")
     expect = '<p><cite>Cat’s Cradle</cite> by Vonnegut</p>'
     assert result == expect
-
-    result = t.glyphs('test"')
-    expect = 'test” '
-    assert result == expect
diff --git a/tests/test_textile.py b/tests/test_textile.py
index dd069fb..86a7d85 100644
--- a/tests/test_textile.py
+++ b/tests/test_textile.py
@@ -67,22 +67,18 @@ def test_autolinking():
 def test_sanitize():
     test = "a paragraph of benign text"
     result = "\t<p>a paragraph of benign text</p>"
-    try:
-        expect = textile.Textile().parse(test, sanitize=True)
-        assert result == expect
-
-        test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
-        result = '<p style="">a paragraph of evil text</p>'
-        expect = textile.Textile().parse(test, sanitize=True)
-        assert result == expect
-
-        test = """<p>a paragraph of benign text<br />and more text</p>"""
-        result = '<p>a paragraph of benign text<br />\nand more text</p>'
-        expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
-        assert result == expect
-    except Exception as e:
-        message = '{0}'.format(e)
-        assert "html5lib not available" in message
+    expect = textile.Textile().parse(test, sanitize=True)
+    assert result == expect
+
+    test = """<p style="width: expression(alert('evil'));">a paragraph of evil text</p>"""
+    result = '<p style="">a paragraph of evil text</p>'
+    expect = textile.Textile().parse(test, sanitize=True)
+    assert result == expect
+
+    test = """<p>a paragraph of benign text<br />and more text</p>"""
+    result = '<p>a paragraph of benign text<br />\nand more text</p>'
+    expect = textile.Textile(html_type='html5').parse(test, sanitize=True)
+    assert result == expect
 
 def test_imagesize():
     PIL = pytest.importorskip('PIL')
diff --git a/tests/test_urls.py b/tests/test_urls.py
index 0ae78e6..7a9798e 100644
--- a/tests/test_urls.py
+++ b/tests/test_urls.py
@@ -59,3 +59,11 @@ def test_rel_attribute():
     result = t.parse('"$":http://domain.tld')
     expect = '\t<p><a href="http://domain.tld" rel="nofollow">domain.tld</a></p>'
     assert result == expect
+
+def test_quotes_in_link_text():
+    """quotes in link text are tricky."""
+    test = '""this is a quote in link text"":url'
+    t = Textile()
+    result = t.parse(test)
+    expect = '\t<p><a href="url">“this is a quote in link text”</a></p>'
+    assert result == expect
diff --git a/tests/test_values.py b/tests/test_values.py
index 68a2dc1..063ed3e 100644
--- a/tests/test_values.py
+++ b/tests/test_values.py
@@ -178,7 +178,7 @@ xhtml_known_values = (
     ('@monospaced text@, followed by text',
      '\t<p><code>monospaced text</code>, followed by text</p>'),
 
-    ('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\t<p>some text</p>'),
+    ('h2. A header\n\n\n\n\n\nsome text', '\t<h2>A header</h2>\n\n\n\n\n\n\t<p>some text</p>'),
 
     ('pre.. foo bar baz\nquux', '<pre>foo bar baz\nquux</pre>'),
 
diff --git a/textile/__init__.py b/textile/__init__.py
index c019f41..bb7829f 100644
--- a/textile/__init__.py
+++ b/textile/__init__.py
@@ -9,12 +9,3 @@ from .version import VERSION
 __all__ = ['textile', 'textile_restricted']
 
 __version__ = VERSION
-
-
-if sys.version_info[:2] == (2, 6):
-    warnings.warn(
-        "Python 2.6 is no longer supported by the Python core team, please "
-        "upgrade your Python. A future version of textile will drop support "
-        "for Python 2.6",
-        DeprecationWarning
-    )
diff --git a/textile/core.py b/textile/core.py
index 695b7e7..7572a46 100644
--- a/textile/core.py
+++ b/textile/core.py
@@ -20,25 +20,20 @@ Additions and fixes Copyright (c) 2006 Alex Shiels http://thresholdstate.com/
 
 import uuid
 import six
+from six.moves.urllib_parse import (urlparse, urlsplit, urlunsplit, quote,
+        unquote)
 
 from textile.tools import sanitizer, imagesize
-from textile.regex_strings import (align_re_s, cls_re_s, halign_re_s,
-        pnct_re_s, regex_snippets, syms_re_s, table_span_re_s, valign_re_s)
+from textile.regex_strings import (align_re_s, cls_re_s, pnct_re_s,
+        regex_snippets, syms_re_s, table_span_re_s)
 from textile.utils import (decode_high, encode_high, encode_html, generate_tag,
         has_raw_text, is_rel_url, is_valid_url, list_type, normalize_newlines,
         parse_attributes, pba)
 from textile.objects import Block, Table
 
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
+from collections import OrderedDict
 
-from six.moves import urllib
-urlparse, urlsplit, urlunsplit, quote, unquote = (urllib.parse.urlparse,
-        urllib.parse.urlsplit, urllib.parse.urlunsplit, urllib.parse.quote,
-        urllib.parse.unquote)
 
 try:
     import regex as re
@@ -277,6 +272,8 @@ class Textile(object):
         # a newline, replace it with a new style break tag and a newline.
         text = re.sub(r'<br( /)?>(?!\n)', '<br />\n', text)
 
+        text = text.rstrip('\n')
+
         return text
 
     def table(self, text):
@@ -346,7 +343,14 @@ class Textile(object):
                 # This will only increment the count for list items, not
                 # definition items
                 if showitem:
-                    self.olstarts[tl] = self.olstarts[tl] + 1
+                    # Assume properly formatted input
+                    try:
+                        self.olstarts[tl] = self.olstarts[tl] + 1
+                    # if we get here, we've got some poor textile formatting.
+                    # add this type of list to olstarts and assume we'll start
+                    # it at 1. expect screwy output.
+                    except KeyError:
+                        self.olstarts[tl] = 1
 
             nm = re.match("^(?P<nextlistitem>[#\*;:]+)(_|[\d]+)?{0}"
                     "[ .].*".format(cls_re_s), nextline)
@@ -420,15 +424,29 @@ class Textile(object):
             tre = '|'.join(self.btag)
         else:
             tre = '|'.join(self.btag_lite)
-        text = text.split('\n\n')
+
+        # split the text by two or more newlines, retaining the newlines in the
+        # split list
+        text = re.split(r'(\n{2,})', text)
+
+        # some blocks, when processed, will ask us to output nothing, if that's
+        # the case, we'd want to drop the whitespace which comes after it.
+        eat_whitespace = False
 
         tag = 'p'
-        atts = cite = graf = ext = ''
+        atts = cite = ext = ''
 
-        last_item_is_a_shelf = False
         out = []
 
         for line in text:
+            # the line is just whitespace, add it to the output, and move on
+            if not line.strip():
+                if not eat_whitespace:
+                    out.append(line)
+                continue
+
+            eat_whitespace = False
+
             pattern = (r'^(?P<tag>{0})(?P<atts>{1}{2})\.(?P<ext>\.?)'
                     r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
                         align_re_s, cls_re_s))
@@ -437,14 +455,15 @@ class Textile(object):
             if match:
                 # if we had a previous extended tag but not this time, close up
                 # the tag
-                if out:
-                    last_item_is_a_shelf = out[-1] in self.shelf
-                if ext and match.group('tag') and last_item_is_a_shelf:
-                    content = out.pop()
+                if ext and out:
+                    # it's out[-2] because the last element in out is the
+                    # whitespace that preceded this line
+                    content = encode_html(out[-2], quotes=True)
                     content = generate_tag(block.inner_tag, content,
                             block.inner_atts)
-                    out.append(generate_tag(block.outer_tag, content,
-                        block.outer_atts))
+                    content = generate_tag(block.outer_tag, content,
+                        block.outer_atts)
+                    out[-2] = content
                 tag, atts, ext, cite, content = match.groups()
                 block = Block(self, **match.groupdict())
                 inner_block = generate_tag(block.inner_tag, block.content,
@@ -463,40 +482,64 @@ class Textile(object):
             # no tag specified
             else:
                 # if we're inside an extended block, add the text from the
-                # previous extension to the front
+                # previous line to the front
                 if ext and out:
-                    line = '{0}\n\n{1}'.format(out.pop(), line)
-                whitespace = ' \t\n\r\f\v'
-                if ext or not line[0] in whitespace:
+                    line = '{0}{1}'.format(out.pop(), line)
+                # the logic in the if statement below is a bit confusing in
+                # php-textile. I'm still not sure I understand what the php
+                # code is doing. Something tells me it's a phpsadness. Anyway,
+                # this works, and is much easier to understand: if we're not in
+                # an extension, and the line doesn't begin with a space, treat
+                # it like a block to insert. Lines that begin with a space are
+                # not processed as a block.
+                if not ext and not line[0] == ' ':
                     block = Block(self, tag, atts, ext, cite, line)
+                    # if the block contains html tags, generate_tag would
+                    # mangle it, so process as is.
                     if block.tag == 'p' and not has_raw_text(block.content):
                         line = block.content
                     else:
                         line = generate_tag(block.outer_tag, block.content,
                                 block.outer_atts)
-                        if block.inner_tag == 'code':
-                            line = block.content
-                        if block.outer_tag != 'pre' and not has_raw_text(line):
-                            line = "\t{0}".format(line)
+                        line = "\t{0}".format(line)
                 else:
-                    line = self.graf(line)
+                    if block.tag == 'pre':
+                        line = self.shelve(encode_html(line, quotes=True))
+                    else:
+                        line = self.graf(line)
 
             line = self.doPBr(line)
             line = line.replace('<br>', '<br />')
 
-            if line.strip():
+            # if we're in an extended block, and we haven't specified a new
+            # tag, join this line to the last item of the output
+            if ext and not match:
+                last_item = out.pop()
+                out.append('{0}{1}'.format(last_item, line))
+            elif not block.eat:
+                # or if it's a type of block which indicates we shouldn't drop
+                # it, add it to the output.
                 out.append(line)
 
             if not ext:
                 tag = 'p'
                 atts = ''
                 cite = ''
-                graf = ''
 
+            # if it's a block we should drop, don't keep the whitespace which
+            # will come after it.
+            if block.eat:
+                eat_whitespace = True
+
+        # at this point, we've gone through all the lines, and if there's still
+        # an extension in effect, we close it here.
         if ext and out:
-            out.append(generate_tag(block.outer_tag, out.pop(),
-                block.outer_atts))
-        return '\n\n'.join(out)
+            block.content = out.pop()
+            block.process()
+            final = generate_tag(block.outer_tag, block.content,
+                                 block.outer_atts)
+            out.append(final)
+        return ''.join(out)
 
     def footnoteRef(self, text):
         # somehow php-textile gets away with not capturing the space.
@@ -537,10 +580,6 @@ class Textile(object):
         So, for the first pass, we use the glyph_search_initial set of
         regexes.  For all remaining passes, we use glyph_search
         """
-        # fix: hackish
-        if text.endswith('"'):
-            text = '{0} '.format(text)
-
         text = text.rstrip('\n')
         result = []
         searchlist = self.glyph_search_initial
@@ -689,7 +728,7 @@ class Textile(object):
 
                         try:
                             possibility = possible_start_quotes.pop()
-                        except IndexError:
+                        except IndexError: # pragma: no cover
                             # If out of possible starting segments we back the
                             # last one from the linkparts array
                             linkparts.pop()
@@ -881,7 +920,7 @@ class Textile(object):
             text = url
             if "://" in text:
                 text = text.split("://")[1]
-            else:
+            elif ":" in text:
                 text = text.split(":")[1]
 
         text = text.strip()
@@ -942,11 +981,19 @@ class Textile(object):
                     quote(netloc_parsed['password']))
         host = netloc_parsed['host']
         port = netloc_parsed['port'] and netloc_parsed['port']
-        path = '/'.join(  # could be encoded slashes!
-            quote(unquote(pce).encode('utf8'), b'')
-            for pce in parsed.path.split('/')
-        )
-        fragment = quote(unquote(parsed.fragment))
+        # the below splits the path portion of the url by slashes, translates
+        # percent-encoded characters back into strings, then re-percent-encodes
+        # what's necessary. Sounds screwy, but the url could include encoded
+        # slashes, and this is a way to clean that up. It branches for PY2/3
+        # because the quote and unquote functions expects different input
+        # types: unicode strings for PY2 and str for PY3.
+        if six.PY2:
+            path_parts = (quote(unquote(pce.encode('utf8')), b'') for pce in
+                    parsed.path.split('/'))
+        else:
+            path_parts = (quote(unquote(pce), b'') for pce in
+                    parsed.path.split('/'))
+        path = '/'.join(path_parts)
 
         # put it back together
         netloc = ''
@@ -958,7 +1005,7 @@ class Textile(object):
         netloc = '{0}{1}'.format(netloc, host)
         if port:
             netloc = '{0}:{1}'.format(netloc, port)
-        return urlunsplit((scheme, netloc, path, parsed.query, fragment))
+        return urlunsplit((scheme, netloc, path, parsed.query, parsed.fragment))
 
     def span(self, text):
         qtags = (r'\*\*', r'\*', r'\?\?', r'\-', r'__',
@@ -1137,6 +1184,8 @@ class Textile(object):
             # parse the attributes and content
             m = re.match(r'^[-]+({0})[ .](.*)$'.format(cls_re_s), line,
                     flags=re.M | re.S)
+            if not m:
+                continue
 
             atts, content = m.groups()
             # cleanup
@@ -1356,7 +1405,7 @@ class Textile(object):
         return self.linkIndex
 
 
-def textile(text, html_type='xhtml', encoding=None, output=None):
+def textile(text, html_type='xhtml'):
     """
     Apply Textile to a block of text.
 
diff --git a/textile/objects/block.py b/textile/objects/block.py
index 89e6b2e..7002eca 100644
--- a/textile/objects/block.py
+++ b/textile/objects/block.py
@@ -1,10 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
+from collections import OrderedDict
 try:
     import regex as re
 except ImportError:
@@ -49,6 +46,7 @@ class Block(object):
             # It will be empty if the regex matched and ate it.
             if '' == notedef:
                 self.content = notedef
+                self.eat = True
 
         fns = re.search(r'fn(?P<fnid>{0}+)'.format(regex_snippets['digit']),
                 self.tag, flags=re.U)
diff --git a/textile/objects/table.py b/textile/objects/table.py
index f694098..4796afc 100644
--- a/textile/objects/table.py
+++ b/textile/objects/table.py
@@ -38,15 +38,17 @@ class Table(object):
             # as a normal center-aligned cell.
             if i == 0 and row[:2] == '|=':
                 captionpattern = (r"^\|\=(?P<capts>{s}{a}{c})\. "
-                        r"(?P<cap>[^\n]*)(?P<row>.*)".format(**{'s':
-                            table_span_re_s, 'a': align_re_s, 'c': cls_re_s}))
+                                  r"(?P<cap>[^\n]*)(?P<row>.*)".format(**{
+                                      's': table_span_re_s, 'a': align_re_s,
+                                      'c': cls_re_s}))
                 caption_re = re.compile(captionpattern, re.S)
                 cmtch = caption_re.match(row)
-                caption = Caption(**cmtch.groupdict())
-                self.caption = '\n{0}'.format(caption.caption)
-                row = cmtch.group('row').lstrip()
-                if row == '':
-                    continue
+                if cmtch:
+                    caption = Caption(**cmtch.groupdict())
+                    self.caption = '\n{0}'.format(caption.caption)
+                    row = cmtch.group('row').lstrip()
+                    if row == '':
+                        continue
 
             # Colgroup -- A colgroup row will not necessarily end with a |.
             # Hence it may include the next row of actual table data.
diff --git a/textile/regex_strings.py b/textile/regex_strings.py
index a152072..f7c6f12 100644
--- a/textile/regex_strings.py
+++ b/textile/regex_strings.py
@@ -19,7 +19,6 @@ try:
         'char': r'(?:[^\p{Zs}\v])',
         }
 except ImportError:
-    import re
     from sys import maxunicode
     upper_re_s = "".join(
             [six.unichr(c) for c in six.moves.range(maxunicode) if six.unichr(
diff --git a/textile/tools/sanitizer.py b/textile/tools/sanitizer.py
index 4fc8fb2..3c7209c 100644
--- a/textile/tools/sanitizer.py
+++ b/textile/tools/sanitizer.py
@@ -3,18 +3,9 @@ def sanitize(string):
     Ensure that the text does not contain any malicious HTML code which might
     break the page.
     """
-    try:
-        import html5lib
-        from html5lib import sanitizer, serializer, treewalkers
-    except ImportError:
-        raise Exception("html5lib not available")
+    from html5lib import parseFragment, serialize
 
-    p = html5lib.HTMLParser(tokenizer=sanitizer.HTMLSanitizer)
-    tree = p.parseFragment(string)
-
-    walker = treewalkers.getTreeWalker("etree")
-    stream = walker(tree)
-
-    s = serializer.htmlserializer.HTMLSerializer(omit_optional_tags=False,
-            quote_attr_values=True)
-    return s.render(stream)
+    parsed = parseFragment(string)
+    clean = serialize(parsed, sanitize=True, omit_optional_tags=False,
+                      quote_attr_values='always')
+    return clean
diff --git a/textile/utils.py b/textile/utils.py
index 00da831..fa21f05 100644
--- a/textile/utils.py
+++ b/textile/utils.py
@@ -10,10 +10,7 @@ from six.moves import urllib, html_parser
 urlparse = urllib.parse.urlparse
 HTMLParser = html_parser.HTMLParser
 
-try:
-    from collections import OrderedDict
-except ImportError:
-    from ordereddict import OrderedDict
+from collections import OrderedDict
 
 from xml.etree import ElementTree
 
@@ -50,6 +47,8 @@ def generate_tag(tag, content, attributes=None):
     content are strings, the attributes argument is a dictionary.  As
     a convenience, if the content is ' /', a self-closing tag is generated."""
     content = six.text_type(content)
+    # In PY2, ElementTree tostringlist only works with bytes, not with
+    # unicode().
     enc = 'unicode'
     if six.PY2:
         enc = 'UTF-8'
@@ -60,21 +59,12 @@ def generate_tag(tag, content, attributes=None):
     # adding text by assigning it to element_tag.text.  That results in
     # non-ascii text being html-entity encoded.  Not bad, but not entirely
     # matching php-textile either.
-    try:
-        element_tag = ElementTree.tostringlist(element, encoding=enc,
-                method='html')
-        element_tag.insert(len(element_tag) - 1, content)
-        element_text = ''.join(element_tag)
-    except AttributeError:
-        # Python 2.6 doesn't have the tostringlist method, so we have to treat
-        # it differently.
-        element_tag = ElementTree.tostring(element, encoding=enc)
-        element_text = re.sub(r"<\?xml version='1.0' encoding='UTF-8'\?>\n",
-                '', element_tag)
-        if content != six.text_type(' /'):
-            element_text = element_text.rstrip(' />')
-            element_text = six.text_type('{0}>{1}</{2}>').format(six.text_type(
-                element_text), content, tag)
+    element_tag = ElementTree.tostringlist(element, encoding=enc,
+            method='html')
+    if six.PY2:
+        element_tag = [v.decode(enc) for v in element_tag]
+    element_tag.insert(len(element_tag) - 1, content)
+    element_text = ''.join(element_tag)
     return element_text
 
 def has_raw_text(text):
@@ -110,9 +100,8 @@ def list_type(list_string):
 
 def normalize_newlines(string):
     out = string.strip()
-    out = re.sub(r'\r\n', '\n', out)
-    out = re.sub(r'\n{3,}', '\n\n', out)
-    out = re.sub(r'\n\s*\n', '\n\n', out)
+    out = re.sub(r'\r\n?', '\n', out)
+    out = re.compile(r'^[ \t]*\n', flags=re.M).sub('\n', out)
     out = re.sub(r'"$', '" ', out)
     return out
 
diff --git a/textile/version.py b/textile/version.py
index ce59314..aaa4264 100644
--- a/textile/version.py
+++ b/textile/version.py
@@ -1 +1 @@
-VERSION = '2.3.12'
+VERSION = '3.0.0'

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-textile.git



More information about the Python-modules-commits mailing list