[Python-modules-commits] [python-tidylib] 01/08: Import python-tidylib_0.3.0~dfsg.orig.tar.gz
Dmitry Shachnev
mitya57 at moszumanska.debian.org
Sun Sep 25 10:58:35 UTC 2016
This is an automated email from the git hooks/post-receive script.
mitya57 pushed a commit to branch master
in repository python-tidylib.
commit 678ef50477d8d385a9da3da4e2a327e5a8388aff
Author: Dmitry Shachnev <mitya57 at gmail.com>
Date: Sun Sep 25 13:36:10 2016 +0300
Import python-tidylib_0.3.0~dfsg.orig.tar.gz
---
PKG-INFO | 18 ++--
README | 20 ++---
setup.py | 45 +++++-----
tests/test_docs.py | 34 +++++---
tests/threadsafety.py | 9 +-
tidylib/__init__.py | 204 +------------------------------------------
tidylib/tidy.py | 234 ++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 307 insertions(+), 257 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index a1ac0e5..8a64af2 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: pytidylib
-Version: 0.2.4
+Version: 0.3.0
Summary: Python wrapper for HTML Tidy (tidylib) on Python 2 and 3
Home-page: http://countergram.com/open-source/pytidylib/
Author: Jason Stitt
@@ -18,12 +18,18 @@ Description: `PyTidyLib`_ is a Python package that wraps the `HTML Tidy`_ librar
* Indent the output, including proper (i.e. no) indenting for ``pre`` elements,
which some (X)HTML indenting code overlooks.
- Version usage
- =============
+ Changes
+ =======
- * Windows: 0.2.0 and later
- * Python 3: Tests pass on 0.2.3
- * tidylib itself is not actively updated and may have problems with newer HTML
+ * 0.3.0: Refactored to use Tidy and PersistentTidy classes while keeping the
+ functional interface (which will lazily create a global Tidy() object) for
+ backward compatibility. You can now pass a list of library names and base
+ options when instantiating Tidy. The keep_doc argument is now deprecated
+ and does nothing; use PersistentTidy.
+
+ * 0.2.4: Bugfix for a strange memory allocation corner case in Tidy.
+
+ * 0.2.3: Python 3 support (2 + 3 cross compatible) with passing Tox tests.
Small example of use
====================
diff --git a/README b/README
index a471b26..1b16576 100644
--- a/README
+++ b/README
@@ -1,14 +1,10 @@
-For documentation, see docs/html/index.html in this distribution, or
-http://countergram.com/open-source/pytidylib/
+This is a Python wrapper around the HTML Tidy library. Quick start example:
-Small example of use:
+from tidylib import Tidy
+tidy = Tidy()
+document, errors = tidy.tidy_document('<p>fõo <img src="bar.jpg">',
+ options={'alt-text': 'baz'})
+print(document)
+print(errors)
-from tidylib import tidy_document
-document, errors = tidy_document('''<p>fõo <img src="bar.jpg">''',
- options={'numeric-entities':1})
-print document
-print errors
-
-NOTE: HTML Tidy itself has currently not been updated for a long time, and may
-not be, and it may have trouble with newer HTML. This is just a thin Python
-wrapper around HTML Tidy, which is a separate project.
+For full documentation, see the docs/ directory.
diff --git a/setup.py b/setup.py
index 49e1d71..ceadc75 100644
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,4 @@
-# Copyright 2009 Jason Stitt
+# Copyright 2009-2015 Jason Stitt
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
@@ -33,12 +33,18 @@ library's many capabilities include:
* Indent the output, including proper (i.e. no) indenting for ``pre`` elements,
which some (X)HTML indenting code overlooks.
-Version usage
-=============
+Changes
+=======
-* Windows: 0.2.0 and later
-* Python 3: Tests pass on 0.2.3
-* tidylib itself is not actively updated and may have problems with newer HTML
+* 0.3.0: Refactored to use Tidy and PersistentTidy classes while keeping the
+functional interface (which will lazily create a global Tidy() object) for
+backward compatibility. You can now pass a list of library names and base
+options when instantiating Tidy. The keep_doc argument is now deprecated
+and does nothing; use PersistentTidy.
+
+* 0.2.4: Bugfix for a strange memory allocation corner case in Tidy.
+
+* 0.2.3: Python 3 support (2 + 3 cross compatible) with passing Tox tests.
Small example of use
====================
@@ -61,7 +67,7 @@ the `PyTidyLib`_ web page.
.. _`PyTidyLib`: http://countergram.com/open-source/pytidylib/
"""
-VERSION = "0.2.4"
+VERSION = "0.3.0"
setup(
name="pytidylib",
@@ -73,16 +79,15 @@ setup(
url="http://countergram.com/open-source/pytidylib/",
packages=['tidylib'],
classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Environment :: Other Environment',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: MIT License',
- 'Programming Language :: Python',
- 'Programming Language :: Python :: 3',
- 'Natural Language :: English',
- 'Topic :: Utilities',
- 'Topic :: Text Processing :: Markup :: HTML',
- 'Topic :: Text Processing :: Markup :: XML',
- ],
- )
-
+ 'Development Status :: 5 - Production/Stable',
+ 'Environment :: Other Environment',
+ 'Intended Audience :: Developers',
+ 'License :: OSI Approved :: MIT License',
+ 'Programming Language :: Python',
+ 'Programming Language :: Python :: 3',
+ 'Natural Language :: English',
+ 'Topic :: Utilities',
+ 'Topic :: Text Processing :: Markup :: HTML',
+ 'Topic :: Text Processing :: Markup :: XML',
+ ],
+)
diff --git a/tests/test_docs.py b/tests/test_docs.py
index 45ced58..adcffe4 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -22,7 +22,7 @@
from __future__ import unicode_literals
import unittest
-from tidylib import tidy_document, release_tidy_doc, thread_local_doc
+from tidylib import Tidy, PersistentTidy, tidy_document
DOC = u'''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
<html>
@@ -76,6 +76,28 @@ class TestDocs1(unittest.TestCase):
doc, err = tidy_document(h)
self.assertEqual(doc, expected)
+ def test_can_use_two_tidy_instances(self):
+ t1 = Tidy()
+ t2 = Tidy()
+ self.assertEqual(t1.tidy_document(DOC % 'a')[0], DOC % 'a')
+ self.assertEqual(t2.tidy_document(DOC % 'b')[0], DOC % 'b')
+
+ def test_tidy_doesnt_persist_options(self):
+ tidy = Tidy()
+ # This option makes it a fragment
+ doc, err = tidy.tidy_document(DOC % 'a', {'show-body-only': 1})
+ self.assertEqual(doc, 'a\n')
+ doc, err = tidy.tidy_document(DOC % 'a')
+ self.assertEqual(doc, DOC % 'a')
+
+ def test_persistent_tidy_does_persist_options(self):
+ tidy = PersistentTidy()
+ # This option makes it a fragment
+ doc, err = tidy.tidy_document(DOC % 'a', {'show-body-only': 1})
+ self.assertEqual(doc, 'a\n')
+ doc, err = tidy.tidy_document(DOC % 'a')
+ self.assertEqual(doc, 'a\n')
+
def test_xmlns_large_document_xml_corner_case(self):
# Test for a super weird edge case in Tidy that can cause it to return
# the wrong required buffer size.
@@ -84,16 +106,6 @@ class TestDocs1(unittest.TestCase):
doc, err = tidy_document(html, {'output-xml': 1})
self.assertEqual(doc.strip()[-7:], "</html>")
- def test_keep_document(self):
- h = "hello"
- expected = DOC % h
- for i in range(4):
- doc, err = tidy_document(h, keep_doc=True)
- self.assertEqual(doc, expected)
- assert hasattr(thread_local_doc, 'doc')
- release_tidy_doc()
- assert not hasattr(thread_local_doc, 'doc')
-
if __name__ == '__main__':
unittest.main()
diff --git a/tests/threadsafety.py b/tests/threadsafety.py
index cc2a128..85f7e68 100644
--- a/tests/threadsafety.py
+++ b/tests/threadsafety.py
@@ -24,9 +24,8 @@ from tidylib import tidy_document
error_queue = Queue()
-DOC = '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
+DOC = '''<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">
+<html>
<head>
<title></title>
</head>
@@ -63,5 +62,5 @@ def run_test():
if __name__ == '__main__':
run_test()
if not error_queue.empty():
- print "About %s errors out of %s" % (error_queue.qsize(), NUM_THREADS * NUM_TRIES)
- print error_queue.get()
+ print("About %s errors out of %s" % (error_queue.qsize(), NUM_THREADS * NUM_TRIES))
+ print(error_queue.get())
diff --git a/tidylib/__init__.py b/tidylib/__init__.py
index 5a3864c..db089cd 100644
--- a/tidylib/__init__.py
+++ b/tidylib/__init__.py
@@ -1,203 +1 @@
-# Copyright 2009-2014 Jason Stitt
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-import ctypes
-import threading
-import platform
-from tidylib.sink import create_sink, destroy_sink
-
-__all__ = ['tidy_document', 'tidy_fragment', 'release_tidy_doc']
-
-# -------------------------------------------------------------------------- #
-# Constants
-
-LIB_NAMES = ['libtidy', 'libtidy.so', 'libtidy-0.99.so.0', 'cygtidy-0-99-0',
- 'tidylib', 'libtidy.dylib', 'tidy']
-ENOMEM = -12
-BASE_OPTIONS = {
- "indent": 1, # Pretty; not too much of a performance hit
- "tidy-mark": 0, # No tidy meta tag in output
- "wrap": 0, # No wrapping
- "alt-text": "", # Help ensure validation
- "doctype": 'strict', # Little sense in transitional for tool-generated markup...
- "force-output": 1, # May not get what you expect but you will get something
-}
-
-# Note: These are meant as sensible defaults. If you don't like these being
-# applied by default, just set tidylib.BASE_OPTIONS = {} after importing.
-# You can of course override any of these options when you call the
-# tidy_document() or tidy_fragment() function
-
-# -------------------------------------------------------------------------- #
-# Globals
-
-tidy = None
-thread_local_doc = threading.local()
-
-# Fix for Windows b/c tidy uses stdcall on Windows
-if "Windows" == platform.system():
- load_library = ctypes.windll.LoadLibrary
-else:
- load_library = ctypes.cdll.LoadLibrary
-
-for name in LIB_NAMES:
- try:
- tidy = load_library(name)
- break
- except OSError:
- pass
-
-if tidy is None:
- raise OSError("Could not load libtidy using any of these names: %s" % (",".join(LIB_NAMES)))
-
-tidy.tidyCreate.restype = ctypes.POINTER(ctypes.c_void_p) # Fix for 64-bit systems
-
-# -------------------------------------------------------------------------- #
-# 3.x/2.x cross-compatibility
-
-try:
- unicode # 2.x
-
- def is_unicode(obj):
- return isinstance(obj, unicode)
-
- def encode_key_value(k, v):
- return unicode(k).encode('utf-8'), unicode(v).encode('utf-8')
-except NameError:
- # 3.x
- def is_unicode(obj):
- return isinstance(obj, str)
-
- def encode_key_value(k, v):
- return str(k).encode('utf-8'), str(v).encode('utf-8')
-
-# -------------------------------------------------------------------------- #
-# Functions
-
-
-def tidy_document(text, options=None, keep_doc=False):
- """ Run a string with markup through HTML Tidy; return the corrected one.
-
- text: The markup, which may be anything from an empty string to a complete
- (X)HTML document. If you pass in a unicode type (py3 str, py2 unicode) you
- get one back out, and tidy will have some options set that may affect
- behavior (e.g. named entities converted to plain unicode characters). If
- you pass in a bytes type (py3 bytes, py2 str) you will get one of those
- back.
-
- options (dict): Options passed directly to HTML Tidy; see the HTML Tidy docs
- (http://tidy.sourceforge.net/docs/quickref.html) or run tidy -help-config
- from the command line.
-
- keep_doc (boolean): If True, store 1 document object per thread and re-use
- it, for a slight performance boost especially when tidying very large numbers
- of very short documents.
-
- returns (str, str): The tidied markup and unparsed warning/error messages.
- Warnings and errors are returned just as tidylib returns them.
- """
- global tidy, option_names
-
- # Unicode approach is to encode as string, then decode libtidy output
- use_unicode = False
- if is_unicode(text):
- use_unicode = True
- text = text.encode('utf-8')
-
- # Manage thread-local storage of persistent document object
- if keep_doc:
- if not hasattr(thread_local_doc, 'doc'):
- thread_local_doc.doc = tidy.tidyCreate()
- doc = thread_local_doc.doc
- else:
- doc = tidy.tidyCreate()
-
- # This is where error messages are sent by libtidy
- sink = create_sink()
- tidy.tidySetErrorSink(doc, sink)
-
- try:
- # Set options on the document
- # If keep_doc=True, options will persist between calls, but they can
- # be overridden, and the BASE_OPTIONS will be set each time
- tidy_options = dict(BASE_OPTIONS)
- if options:
- tidy_options.update(options)
- if use_unicode:
- tidy_options['input-encoding'] = 'utf8'
- tidy_options['output-encoding'] = 'utf8'
- for key in tidy_options:
- value = tidy_options[key]
- key = key.replace('_', '-')
- if value is None:
- value = ''
- key, value = encode_key_value(key, value)
- tidy.tidyOptParseValue(doc, key, value)
- error = str(sink)
- if error:
- raise ValueError("(tidylib) " + error)
-
- # The point of the whole thing
- tidy.tidyParseString(doc, text)
- tidy.tidyCleanAndRepair(doc)
-
- # Guess at buffer size; tidy returns ENOMEM if the buffer is too
- # small and puts the required size into out_length
- out_length = ctypes.c_int(8192)
- out = ctypes.c_buffer(out_length.value)
- while ENOMEM == tidy.tidySaveString(doc, out, ctypes.byref(out_length)):
- out = ctypes.c_buffer(out_length.value)
-
- document = out.value
- if use_unicode:
- document = document.decode('utf-8')
- errors = str(sink)
- finally:
- destroy_sink(sink)
- if not keep_doc:
- tidy.tidyRelease(doc)
-
- return (document, errors)
-
-
-def tidy_fragment(text, options=None, keep_doc=False):
- """ Tidy a string with markup and return only the <body> contents.
-
- HTML Tidy normally returns a full (X)HTML document; this function returns only
- the contents of the <body> element and is meant to be used for snippets.
- Calling tidy_fragment on elements that don't go in the <body>, like <title>,
- will produce incorrect behavior.
-
- Arguments and return value are the same as tidy_document. Note that HTML
- Tidy will always complain about the lack of a doctype and <title> element
- in fragments, and these errors are not stripped out for you. """
- options = dict(options) if options else dict()
- options["show-body-only"] = 1
- document, errors = tidy_document(text, options, keep_doc)
- document = document.strip()
- return document, errors
-
-
-def release_tidy_doc():
- """ Release the stored document object in the current thread. Only useful
- if you have called tidy_document or tidy_fragament with keep_doc=True. """
- if hasattr(thread_local_doc, 'doc'):
- tidy.tidyRelease(thread_local_doc.doc)
- del thread_local_doc.doc
+from .tidy import Tidy, PersistentTidy, tidy_document, tidy_fragment, release_tidy_doc
diff --git a/tidylib/tidy.py b/tidylib/tidy.py
new file mode 100644
index 0000000..a71ae9f
--- /dev/null
+++ b/tidylib/tidy.py
@@ -0,0 +1,234 @@
+# Copyright 2009-2015 Jason Stitt
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import ctypes
+import threading
+import platform
+import warnings
+from contextlib import contextmanager
+from .sink import create_sink, destroy_sink
+
+__all__ = ['Tidy', 'PersistentTidy']
+
+# Default search order for library names if nothing is passed in
+LIB_NAMES = ['libtidy', 'libtidy.so', 'libtidy-0.99.so.0', 'cygtidy-0-99-0',
+ 'tidylib', 'libtidy.dylib', 'tidy']
+
+# Error code from library
+ENOMEM = -12
+
+# Default options; can be overriden with argument to Tidy()
+BASE_OPTIONS = {
+ "indent": 1, # Pretty; not too much of a performance hit
+ "tidy-mark": 0, # No tidy meta tag in output
+ "wrap": 0, # No wrapping
+ "alt-text": "", # Help ensure validation
+ "doctype": 'strict', # Little sense in transitional for tool-generated markup...
+ "force-output": 1, # May not get what you expect but you will get something
+}
+
+KEEP_DOC_WARNING = "keep_doc and release_tidy_doc are no longer used. Create a PersistentTidy object instead."
+
+# Fix for Windows b/c tidy uses stdcall on Windows
+if "Windows" == platform.system():
+ load_library = ctypes.windll.LoadLibrary
+else:
+ load_library = ctypes.cdll.LoadLibrary
+
+# -------------------------------------------------------------------------- #
+# 3.x/2.x cross-compatibility
+
+try:
+ unicode # 2.x
+
+ def is_unicode(obj):
+ return isinstance(obj, unicode)
+
+ def encode_key_value(k, v):
+ return unicode(k).encode('utf-8'), unicode(v).encode('utf-8')
+except NameError:
+ # 3.x
+ def is_unicode(obj):
+ return isinstance(obj, str)
+
+ def encode_key_value(k, v):
+ return str(k).encode('utf-8'), str(v).encode('utf-8')
+
+# -------------------------------------------------------------------------- #
+# The main python interface
+
+
+class Tidy(object):
+
+ """ Wrapper around the HTML Tidy library for cleaning up possibly invalid
+ HTML and XHTML. """
+
+ def __init__(self, lib_names=LIB_NAMES):
+ lib_names = lib_names if isinstance(lib_names, list) else [lib_names]
+ for name in lib_names:
+ try:
+ self._tidy = load_library(name)
+ break
+ except OSError:
+ continue
+ if self._tidy is None:
+ raise OSError(
+ "Could not load libtidy using any of these names: "
+ + ",".join(lib_names))
+ self._tidy.tidyCreate.restype = ctypes.POINTER(ctypes.c_void_p) # Fix for 64-bit systems
+
+ @contextmanager
+ def _doc_and_sink(self):
+ " Create and cleanup a Tidy document and error sink "
+ doc = self._tidy.tidyCreate()
+ sink = create_sink()
+ self._tidy.tidySetErrorSink(doc, sink)
+ yield (doc, sink)
+ destroy_sink(sink)
+ self._tidy.tidyRelease(doc)
+
+ def tidy_document(self, text, options=None):
+ """ Run a string with markup through HTML Tidy; return the corrected one
+ and any error output.
+
+ text: The markup, which may be anything from an empty string to a complete
+ (X)HTML document. If you pass in a unicode type (py3 str, py2 unicode) you
+ get one back out, and tidy will have some options set that may affect
+ behavior (e.g. named entities converted to plain unicode characters). If
+ you pass in a bytes type (py3 bytes, py2 str) you will get one of those
+ back.
+
+ options (dict): Options passed directly to HTML Tidy; see the HTML Tidy docs
+ (http://tidy.sourceforge.net/docs/quickref.html) or run tidy -help-config
+ from the command line.
+
+ returns (str, str): The tidied markup and unparsed warning/error messages.
+ Warnings and errors are returned just as tidylib returns them.
+ """
+
+ # Unicode approach is to encode as string, then decode libtidy output
+ use_unicode = False
+ if is_unicode(text):
+ use_unicode = True
+ text = text.encode('utf-8')
+
+ with self._doc_and_sink() as (doc, sink):
+ tidy_options = dict(BASE_OPTIONS)
+ if options:
+ tidy_options.update(options)
+ if use_unicode:
+ tidy_options['input-encoding'] = 'utf8'
+ tidy_options['output-encoding'] = 'utf8'
+ for key in tidy_options:
+ value = tidy_options[key]
+ key = key.replace('_', '-')
+ if value is None:
+ value = ''
+ key, value = encode_key_value(key, value)
+ self._tidy.tidyOptParseValue(doc, key, value)
+ error = str(sink)
+ if error:
+ raise ValueError("(tidylib) " + error)
+
+ self._tidy.tidyParseString(doc, text)
+ self._tidy.tidyCleanAndRepair(doc)
+
+ # Guess at buffer size; tidy returns ENOMEM if the buffer is too
+ # small and puts the required size into out_length
+ out_length = ctypes.c_int(8192)
+ out = ctypes.c_buffer(out_length.value)
+ while ENOMEM == self._tidy.tidySaveString(doc, out, ctypes.byref(out_length)):
+ out = ctypes.c_buffer(out_length.value)
+
+ document = out.value
+ if use_unicode:
+ document = document.decode('utf-8')
+ errors = str(sink)
+
+ return (document, errors)
+
+ def tidy_fragment(self, text, options=None):
+ """ Tidy a string with markup and return only the <body> contents.
+
+ HTML Tidy normally returns a full (X)HTML document; this function returns only
+ the contents of the <body> element and is meant to be used for snippets.
+ Calling tidy_fragment on elements that don't go in the <body>, like <title>,
+ will produce incorrect behavior.
+
+ Arguments and return value are the same as tidy_document. Note that HTML
+ Tidy will always complain about the lack of a doctype and <title> element
+ in fragments, and these errors are not stripped out for you. """
+ options = dict(options) if options else dict()
+ options["show-body-only"] = 1
+ document, errors = self.tidy_document(text, options)
+ document = document.strip()
+ return document, errors
+
+
+class PersistentTidy(Tidy):
+
+ """ Functions the same as the Tidy class but keeps a persistent reference
+ to one Tidy document object. This increases performance slightly when
+ tidying many documents in a row. It also persists all options (not just
+ the base options) between runs, which could lead to unexpected behavior.
+ If you plan to use different options on each run with PersistentTidy, set
+ all options that could change on every call. Note that passing in unicode
+ text will result in the input-encoding and output-encoding options being
+ automatically set. Thread-local storage is used for the document object
+ (one document per thread). """
+
+ def __init__(self, lib_names=LIB_NAMES):
+ Tidy.__init__(self, lib_names)
+ self._local = threading.local()
+ self._local.doc = self._tidy.tidyCreate()
+
+ def __del__(self):
+ self._tidy.tidyRelease(self._local.doc)
+
+ @contextmanager
+ def _doc_and_sink(self):
+ " Create and cleanup an error sink but use the persistent doc object "
+ sink = create_sink()
+ self._tidy.tidySetErrorSink(self._local.doc, sink)
+ yield (self._local.doc, sink)
+ destroy_sink(sink)
+
+
+def tidy_document(text, options=None, keep_doc=False):
+ if keep_doc:
+ warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
+ return get_module_tidy().tidy_document(text, options)
+
+
+def tidy_fragment(text, options=None, keep_doc=False):
+ if keep_doc:
+ warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
+ return get_module_tidy().tidy_fragment(text, options)
+
+
+def get_module_tidy():
+ global _tidy
+ if '_tidy' not in globals():
+ _tidy = Tidy()
+ return _tidy
+
+
+def release_tidy_doc():
+ warnings.warn(KEEP_DOC_WARNING, DeprecationWarning, stacklevel=2)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-tidylib.git
More information about the Python-modules-commits
mailing list