[Python-modules-commits] [python-tidylib] 01/08: Import python-tidylib_0.3.0~dfsg.orig.tar.gz

Dmitry Shachnev mitya57 at moszumanska.debian.org
Sun Sep 25 10:58:35 UTC 2016


This is an automated email from the git hooks/post-receive script.

mitya57 pushed a commit to branch master
in repository python-tidylib.

commit 678ef50477d8d385a9da3da4e2a327e5a8388aff
Author: Dmitry Shachnev <mitya57 at gmail.com>
Date:   Sun Sep 25 13:36:10 2016 +0300

    Import python-tidylib_0.3.0~dfsg.orig.tar.gz
---
 PKG-INFO              |  18 ++--
 README                |  20 ++---
 setup.py              |  45 +++++-----
 tests/test_docs.py    |  34 +++++---
 tests/threadsafety.py |   9 +-
 tidylib/__init__.py   | 204 +------------------------------------------
 tidylib/tidy.py       | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 307 insertions(+), 257 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index a1ac0e5..8a64af2 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: pytidylib
-Version: 0.2.4
+Version: 0.3.0
 Summary: Python wrapper for HTML Tidy (tidylib) on Python 2 and 3
 Home-page: http://countergram.com/open-source/pytidylib/
 Author: Jason Stitt
@@ -18,12 +18,18 @@ Description: `PyTidyLib`_ is a Python package that wraps the `HTML Tidy`_ librar
         * Indent the output, including proper (i.e. no) indenting for ``pre`` elements,
           which some (X)HTML indenting code overlooks.
         
-        Version usage
-        =============
+        Changes
+        =======
         
-        * Windows: 0.2.0 and later
-        * Python 3: Tests pass on 0.2.3
-        * tidylib itself is not actively updated and may have problems with newer HTML
+        * 0.3.0: Refactored to use Tidy and PersistentTidy classes while keeping the
+        functional interface (which will lazily create a global Tidy() object) for
+        backward compatibility. You can now pass a list of library names and base
+        options when instantiating Tidy. The keep_doc argument is now deprecated
+        and does nothing; use PersistentTidy.
+        
+        * 0.2.4: Bugfix for a strange memory allocation corner case in Tidy.
+        
+        * 0.2.3: Python 3 support (2 + 3 cross compatible) with passing Tox tests.
         
         Small example of use
         ====================
diff --git a/README b/README
index a471b26..1b16576 100644
--- a/README
+++ b/README
@@ -1,14 +1,10 @@
-For documentation, see docs/html/index.html in this distribution, or
-http://countergram.com/open-source/pytidylib/
+This is a Python wrapper around the HTML Tidy library. Quick start example:
 
-Small example of use:
+from tidylib import Tidy
+tidy = Tidy()
+document, errors = tidy.tidy_document('<p>fõo <img src="bar.jpg">',
+    options={'alt-text': 'baz'})
+print(document)
+print(errors)
 
-from tidylib import tidy_document
-document, errors = tidy_document('''<p>fõo <img src="bar.jpg">''',
-    options={'numeric-entities':1})
-print document
-print errors
-
-NOTE: HTML Tidy itself has currently not been updated for a long time, and may
-not be, and it may have trouble with newer HTML. This is just a thin Python
-wrapper around HTML Tidy, which is a separate project.
+For full documentation, see the docs/ directory.
diff --git a/setup.py b/setup.py
index 49e1d71..ceadc75 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# Copyright 2009 Jason Stitt
+# Copyright 2009-2015 Jason Stitt
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -33,12 +33,18 @@ library's many capabilities include:
 * Indent the output, including proper (i.e. no) indenting for ``pre`` elements,
   which some (X)HTML indenting code overlooks.
 
-Version usage
-=============
+Changes
+=======
 
-* Windows: 0.2.0 and later
-* Python 3: Tests pass on 0.2.3
-* tidylib itself is not actively updated and may have problems with newer HTML
+* 0.3.0: Refactored to use Tidy and PersistentTidy classes while keeping the
+functional interface (which will lazily create a global Tidy() object) for
+backward compatibility. You can now pass a list of library names and base
+options when instantiating Tidy. The keep_doc argument is now deprecated
+and does nothing; use PersistentTidy.
+
+* 0.2.4: Bugfix for a strange memory allocation corner case in Tidy.
+
+* 0.2.3: Python 3 support (2 + 3 cross compatible) with passing Tox tests.
 
 Small example of use
 ====================
@@ -61,7 +67,7 @@ the `PyTidyLib`_ web page.
 .. _`PyTidyLib`: http://countergram.com/open-source/pytidylib/
 """
 
-VERSION = "0.2.4"
+VERSION = "0.3.0"
 
 setup(
     name="pytidylib",
@@ -73,16 +79,15 @@ setup(
     url="http://countergram.com/open-source/pytidylib/",
     packages=['tidylib'],
     classifiers=[
-          'Development Status :: 5 - Production/Stable',
-          'Environment :: Other Environment',
-          'Intended Audience :: Developers',
-          'License :: OSI Approved :: MIT License',
-          'Programming Language :: Python',
-          'Programming Language :: Python :: 3',
-          'Natural Language :: English',
-          'Topic :: Utilities',
-          'Topic :: Text Processing :: Markup :: HTML',
-          'Topic :: Text Processing :: Markup :: XML',
-          ],
-    )
-
+        'Development Status :: 5 - Production/Stable',
+        'Environment :: Other Environment',
+        'Intended Audience :: Developers',
+        'License :: OSI Approved :: MIT License',
+        'Programming Language :: Python',
+        'Programming Language :: Python :: 3',
+        'Natural Language :: English',
+        'Topic :: Utilities',
+        'Topic :: Text Processing :: Markup :: HTML',
+        'Topic :: Text Processing :: Markup :: XML',
+    ],
+)
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 45ced58..adcffe4 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -22,7 +22,7 @@
 from __future__ import unicode_literals
 
 import unittest
-from tidylib import tidy_document, release_tidy_doc, thread_local_doc
+from tidylib import Tidy, PersistentTidy, tidy_document
 
 DOC = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
 <html>
@@ -76,6 +76,28 @@ class TestDocs1(unittest.TestCase):
         doc, err = tidy_document(h)
         self.assertEqual(doc, expected)
 
+    def test_can_use_two_tidy_instances(self):
+        t1 = Tidy()
+        t2 = Tidy()
+        self.assertEqual(t1.tidy_document(DOC % 'a')[0], DOC % 'a')
+        self.assertEqual(t2.tidy_document(DOC % 'b')[0], DOC % 'b')
+
+    def test_tidy_doesnt_persist_options(self):
+        tidy = Tidy()
+        # This option makes it a fragment
+        doc, err = tidy.tidy_document(DOC % 'a', {'show-body-only': 1})
+        self.assertEqual(doc, 'a\n')
+        doc, err = tidy.tidy_document(DOC % 'a')
+        self.assertEqual(doc, DOC % 'a')
+
+    def test_persistent_tidy_does_persist_options(self):
+        tidy = PersistentTidy()
+        # This option makes it a fragment
+        doc, err = tidy.tidy_document(DOC % 'a', {'show-body-only': 1})
+        self.assertEqual(doc, 'a\n')
+        doc, err = tidy.tidy_document(DOC % 'a')
+        self.assertEqual(doc, 'a\n')
+
     def test_xmlns_large_document_xml_corner_case(self):
         # Test for a super weird edge case in Tidy that can cause it to return
         # the wrong required buffer size.
@@ -84,16 +106,6 @@ class TestDocs1(unittest.TestCase):
         doc, err = tidy_document(html, {'output-xml': 1})
         self.assertEqual(doc.strip()[-7:], "</html>")
 
-    def test_keep_document(self):
-        h = "hello"
-        expected = DOC % h
-        for i in range(4):
-            doc, err = tidy_document(h, keep_doc=True)
-            self.assertEqual(doc, expected)
-        assert hasattr(thread_local_doc, 'doc')
-        release_tidy_doc()
-        assert not hasattr(thread_local_doc, 'doc')
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/threadsafety.py b/tests/threadsafety.py
index cc2a128..85f7e68 100644
--- a/tests/threadsafety.py
+++ b/tests/threadsafety.py
@@ -24,9 +24,8 @@ from tidylib import tidy_document
 
 error_queue = Queue()
 
-DOC = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
-    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
+DOC = '''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+<html>
   <head>
     <title></title>
   </head>
@@ -63,5 +62,5 @@ def run_test():
 if __name__ == '__main__':
     run_test()
     if not error_queue.empty():
-        print "About %s errors out of %s" % (error_queue.qsize(), NUM_THREADS * NUM_TRIES)
-        print error_queue.get()
+        print("About %s errors out of %s" % (error_queue.qsize(), NUM_THREADS * NUM_TRIES))
+        print(error_queue.get())
diff --git a/tidylib/__init__.py b/tidylib/__init__.py
index 5a3864c..db089cd 100644
--- a/tidylib/__init__.py
+++ b/tidylib/__init__.py
@@ -1,203 +1 @@
-# Copyright 2009-2014 Jason Stitt
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-import ctypes
-import threading
-import platform
-from tidylib.sink import create_sink, destroy_sink
-
-__all__ = ['tidy_document', 'tidy_fragment', 'release_tidy_doc']
-
-# -------------------------------------------------------------------------- #
-# Constants
-
-LIB_NAMES = ['libtidy', 'libtidy.so', 'libtidy-0.99.so.0', 'cygtidy-0-99-0',
-             'tidylib', 'libtidy.dylib', 'tidy']
-ENOMEM = -12
-BASE_OPTIONS = {
-    "indent": 1,           # Pretty; not too much of a performance hit
-    "tidy-mark": 0,        # No tidy meta tag in output
-    "wrap": 0,             # No wrapping
-    "alt-text": "",        # Help ensure validation
-    "doctype": 'strict',   # Little sense in transitional for tool-generated markup...
-    "force-output": 1,     # May not get what you expect but you will get something
-}
-
-# Note: These are meant as sensible defaults. If you don't like these being
-# applied by default, just set tidylib.BASE_OPTIONS = {} after importing.
-# You can of course override any of these options when you call the
-# tidy_document() or tidy_fragment() function
-
-# -------------------------------------------------------------------------- #
-# Globals
-
-tidy = None
-thread_local_doc = threading.local()
-
-# Fix for Windows b/c tidy uses stdcall on Windows
-if "Windows" == platform.system():
-    load_library = ctypes.windll.LoadLibrary
-else:
-    load_library = ctypes.cdll.LoadLibrary
-
-for name in LIB_NAMES:
-    try:
-        tidy = load_library(name)
-        break
-    except OSError:
-        pass
-
-if tidy is None:
-    raise OSError("Could not load libtidy using any of these names: %s" % (",".join(LIB_NAMES)))
-
-tidy.tidyCreate.restype = ctypes.POINTER(ctypes.c_void_p)  # Fix for 64-bit systems
-
-# -------------------------------------------------------------------------- #
-# 3.x/2.x cross-compatibility
-
-try:
-    unicode  # 2.x
-
-    def is_unicode(obj):
-        return isinstance(obj, unicode)
-
-    def encode_key_value(k, v):
-        return unicode(k).encode('utf-8'), unicode(v).encode('utf-8')
-except NameError:
-    # 3.x
-    def is_unicode(obj):
-        return isinstance(obj, str)
-
-    def encode_key_value(k, v):
-        return str(k).encode('utf-8'), str(v).encode('utf-8')
-
-# -------------------------------------------------------------------------- #
-# Functions
-
-
-def tidy_document(text, options=None, keep_doc=False):
-    """ Run a string with markup through HTML Tidy; return the corrected one.
-
-    text: The markup, which may be anything from an empty string to a complete
-    (X)HTML document. If you pass in a unicode type (py3 str, py2 unicode) you
-    get one back out, and tidy will have some options set that may affect
-    behavior (e.g. named entities converted to plain unicode characters). If
-    you pass in a bytes type (py3 bytes, py2 str) you will get one of those
-    back.
-
-    options (dict): Options passed directly to HTML Tidy; see the HTML Tidy docs
-    (http://tidy.sourceforge.net/docs/quickref.html) or run tidy -help-config
-    from the command line.
-
-    keep_doc (boolean): If True, store 1 document object per thread and re-use
-    it, for a slight performance boost especially when tidying very large numbers
-    of very short documents.
-
-    returns (str, str): The tidied markup and unparsed warning/error messages.
-    Warnings and errors are returned just as tidylib returns them.
-    """
-    global tidy, option_names
-
-    # Unicode approach is to encode as string, then decode libtidy output
-    use_unicode = False
-    if is_unicode(text):
-        use_unicode = True
-        text = text.encode('utf-8')
-
-    # Manage thread-local storage of persistent document object
-    if keep_doc:
-        if not hasattr(thread_local_doc, 'doc'):
-            thread_local_doc.doc = tidy.tidyCreate()
-        doc = thread_local_doc.doc
-    else:
-        doc = tidy.tidyCreate()
-
-    # This is where error messages are sent by libtidy
-    sink = create_sink()
-    tidy.tidySetErrorSink(doc, sink)
-
-    try:
-        # Set options on the document
-        # If keep_doc=True, options will persist between calls, but they can
-        # be overridden, and the BASE_OPTIONS will be set each time
-        tidy_options = dict(BASE_OPTIONS)
-        if options:
-            tidy_options.update(options)
-        if use_unicode:
-            tidy_options['input-encoding'] = 'utf8'
-            tidy_options['output-encoding'] = 'utf8'
-        for key in tidy_options:
-            value = tidy_options[key]
-            key = key.replace('_', '-')
-            if value is None:
-                value = ''
-            key, value = encode_key_value(key, value)
-            tidy.tidyOptParseValue(doc, key, value)
-            error = str(sink)
-            if error:
-                raise ValueError("(tidylib) " + error)
-
-        # The point of the whole thing
-        tidy.tidyParseString(doc, text)
-        tidy.tidyCleanAndRepair(doc)
-
-        # Guess at buffer size; tidy returns ENOMEM if the buffer is too
-        # small and puts the required size into out_length
-        out_length = ctypes.c_int(8192)
-        out = ctypes.c_buffer(out_length.value)
-        while ENOMEM == tidy.tidySaveString(doc, out, ctypes.byref(out_length)):
-            out = ctypes.c_buffer(out_length.value)
-
-        document = out.value
-        if use_unicode:
-            document = document.decode('utf-8')
-        errors = str(sink)
-    finally:
-        destroy_sink(sink)
-        if not keep_doc:
-            tidy.tidyRelease(doc)
-
-    return (document, errors)
-
-
-def tidy_fragment(text, options=None, keep_doc=False):
-    """ Tidy a string with markup and return only the <body> contents.
-
-    HTML Tidy normally returns a full (X)HTML document; this function returns only
-    the contents of the <body> element and is meant to be used for snippets.
-    Calling tidy_fragment on elements that don't go in the <body>, like <title>,
-    will produce incorrect behavior.
-
-    Arguments and return value are the same as tidy_document. Note that HTML
-    Tidy will always complain about the lack of a doctype and <title> element
-    in fragments, and these errors are not stripped out for you. """
-    options = dict(options) if options else dict()
-    options["show-body-only"] = 1
-    document, errors = tidy_document(text, options, keep_doc)
-    document = document.strip()
-    return document, errors
-
-
-def release_tidy_doc():
-    """ Release the stored document object in the current thread. Only useful
-    if you have called tidy_document or tidy_fragament with keep_doc=True. """
-    if hasattr(thread_local_doc, 'doc'):
-        tidy.tidyRelease(thread_local_doc.doc)
-        del thread_local_doc.doc
+from .tidy import Tidy, PersistentTidy, tidy_document, tidy_fragment, release_tidy_doc
diff --git a/tidylib/tidy.py b/tidylib/tidy.py
new file mode 100644
index 0000000..a71ae9f
--- /dev/null
+++ b/tidylib/tidy.py
@@ -0,0 +1,234 @@
+# Copyright 2009-2015 Jason Stitt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import ctypes
+import threading
+import platform
+import warnings
+from contextlib import contextmanager
+from .sink import create_sink, destroy_sink
+
+__all__ = ['Tidy', 'PersistentTidy']
+
+# Default search order for library names if nothing is passed in
+LIB_NAMES = ['libtidy', 'libtidy.so', 'libtidy-0.99.so.0', 'cygtidy-0-99-0',
+             'tidylib', 'libtidy.dylib', 'tidy']
+
+# Error code from library
+ENOMEM = -12
+
+# Default options; can be overriden with argument to Tidy()
+BASE_OPTIONS = {
+    "indent": 1,           # Pretty; not too much of a performance hit
+    "tidy-mark": 0,        # No tidy meta tag in output
+    "wrap": 0,             # No wrapping
+    "alt-text": "",        # Help ensure validation
+    "doctype": 'strict',   # Little sense in transitional for tool-generated markup...
+    "force-output": 1,     # May not get what you expect but you will get something
+}
+
+KEEP_DOC_WARNING = "keep_doc and release_tidy_doc are no longer used. Create a PersistentTidy object instead."
+
+# Fix for Windows b/c tidy uses stdcall on Windows
+if "Windows" == platform.system():
+    load_library = ctypes.windll.LoadLibrary
+else:
+    load_library = ctypes.cdll.LoadLibrary
+
+# -------------------------------------------------------------------------- #
+# 3.x/2.x cross-compatibility
+
+try:
+    unicode  # 2.x
+
+    def is_unicode(obj):
+        return isinstance(obj, unicode)
+
+    def encode_key_value(k, v):
+        return unicode(k).encode('utf-8'), unicode(v).encode('utf-8')
+except NameError:
+    # 3.x
+    def is_unicode(obj):
+        return isinstance(obj, str)
+
+    def encode_key_value(k, v):
+        return str(k).encode('utf-8'), str(v).encode('utf-8')
+
+# -------------------------------------------------------------------------- #
+# The main python interface
+
+
+class Tidy(object):
+
+    """ Wrapper around the HTML Tidy library for cleaning up possibly invalid
+    HTML and XHTML. """
+
+    def __init__(self, lib_names=LIB_NAMES):
+        lib_names = lib_names if isinstance(lib_names, list) else [lib_names]
+        for name in lib_names:
+            try:
+                self._tidy = load_library(name)
+                break
+            except OSError:
+                continue
+        if self._tidy is None:
+            raise OSError(
+                "Could not load libtidy using any of these names: "
+                + ",".join(lib_names))
+        self._tidy.tidyCreate.restype = ctypes.POINTER(ctypes.c_void_p)  # Fix for 64-bit systems
+
+    @contextmanager
+    def _doc_and_sink(self):
+        " Create and cleanup a Tidy document and error sink "
+        doc = self._tidy.tidyCreate()
+        sink = create_sink()
+        self._tidy.tidySetErrorSink(doc, sink)
+        yield (doc, sink)
+        destroy_sink(sink)
+        self._tidy.tidyRelease(doc)
+
+    def tidy_document(self, text, options=None):
+        """ Run a string with markup through HTML Tidy; return the corrected one
+        and any error output.
+
+        text: The markup, which may be anything from an empty string to a complete
+        (X)HTML document. If you pass in a unicode type (py3 str, py2 unicode) you
+        get one back out, and tidy will have some options set that may affect
+        behavior (e.g. named entities converted to plain unicode characters). If
+        you pass in a bytes type (py3 bytes, py2 str) you will get one of those
+        back.
+
+        options (dict): Options passed directly to HTML Tidy; see the HTML Tidy docs
+        (http://tidy.sourceforge.net/docs/quickref.html) or run tidy -help-config
+        from the command line.
+
+        returns (str, str): The tidied markup and unparsed warning/error messages.
+        Warnings and errors are returned just as tidylib returns them.
+        """
+
+        # Unicode approach is to encode as string, then decode libtidy output
+        use_unicode = False
+        if is_unicode(text):
+            use_unicode = True
+            text = text.encode('utf-8')
+
+        with self._doc_and_sink() as (doc, sink):
+            tidy_options = dict(BASE_OPTIONS)
+            if options:
+                tidy_options.update(options)
+            if use_unicode:
+                tidy_options['input-encoding'] = 'utf8'
+                tidy_options['output-encoding'] = 'utf8'
+            for key in tidy_options:
+                value = tidy_options[key]
+                key = key.replace('_', '-')
+                if value is None:
+                    value = ''
+                key, value = encode_key_value(key, value)
+                self._tidy.tidyOptParseValue(doc, key, value)
+                error = str(sink)
+                if error:
+                    raise ValueError("(tidylib) " + error)
+
+            self._tidy.tidyParseString(doc, text)
+            self._tidy.tidyCleanAndRepair(doc)
+
+            # Guess at buffer size; tidy returns ENOMEM if the buffer is too
+            # small and puts the required size into out_length
+            out_length = ctypes.c_int(8192)
+            out = ctypes.c_buffer(out_length.value)
+            while ENOMEM == self._tidy.tidySaveString(doc, out, ctypes.byref(out_length)):
+                out = ctypes.c_buffer(out_length.value)
+
+            document = out.value
+            if use_unicode:
+                document = document.decode('utf-8')
+            errors = str(sink)
+
+        return (document, errors)
+
+    def tidy_fragment(self, text, options=None):
+        """ Tidy a string with markup and return only the <body> contents.
+
+        HTML Tidy normally returns a full (X)HTML document; this function returns only
+        the contents of the <body> element and is meant to be used for snippets.
+        Calling tidy_fragment on elements that don't go in the <body>, like <title>,
+        will produce incorrect behavior.
+
+        Arguments and return value are the same as tidy_document. Note that HTML
+        Tidy will always complain about the lack of a doctype and <title> element
+        in fragments, and these errors are not stripped out for you. """
+        options = dict(options) if options else dict()
+        options["show-body-only"] = 1
+        document, errors = self.tidy_document(text, options)
+        document = document.strip()
+        return document, errors
+
+
+class PersistentTidy(Tidy):
+
+    """ Functions the same as the Tidy class but keeps a persistent reference
+    to one Tidy document object. This increases performance slightly when
+    tidying many documents in a row. It also persists all options (not just
+    the base options) between runs, which could lead to unexpected behavior.
+    If you plan to use different options on each run with PersistentTidy, set
+    all options that could change on every call. Note that passing in unicode
+    text will result in the input-encoding and output-encoding options being
+    automatically set. Thread-local storage is used for the document object
+    (one document per thread). """
+
+    def __init__(self, lib_names=LIB_NAMES):
+        Tidy.__init__(self, lib_names)
+        self._local = threading.local()
+        self._local.doc = self._tidy.tidyCreate()
+
+    def __del__(self):
+        self._tidy.tidyRelease(self._local.doc)
+
+    @contextmanager
+    def _doc_and_sink(self):
+        " Create and cleanup an error sink but use the persistent doc object "
+        sink = create_sink()
+        self._tidy.tidySetErrorSink(self._local.doc, sink)
+        yield (self._local.doc, sink)
+        destroy_sink(sink)
+
+
+def tidy_document(text, options=None, keep_doc=False):
+    if keep_doc:
+        warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
+    return get_module_tidy().tidy_document(text, options)
+
+
+def tidy_fragment(text, options=None, keep_doc=False):
+    if keep_doc:
+        warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
+    return get_module_tidy().tidy_fragment(text, options)
+
+
+def get_module_tidy():
+    global _tidy
+    if '_tidy' not in globals():
+        _tidy = Tidy()
+    return _tidy
+
+
+def release_tidy_doc():
+    warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-tidylib.git



More information about the Python-modules-commits mailing list