[Python-modules-commits] [htmlmin] 01/08: New upstream version 0.1.10
Adrian Vondendriesch
discostu-guest at moszumanska.debian.org
Wed May 10 13:15:29 UTC 2017
This is an automated email from the git hooks/post-receive script.
discostu-guest pushed a commit to branch master
in repository htmlmin.
commit f50c8e14c59e544c90947f1fd52627af116d9ae3
Author: Adrian Vondendriesch <adrian.vondendriesch at credativ.de>
Date: Tue Apr 4 11:15:26 2017 +0200
New upstream version 0.1.10
---
LICENSE | 24 +++
MANIFEST.in | 3 +
PKG-INFO | 25 +++
README.rst | 6 +
htmlmin.egg-info/PKG-INFO | 25 +++
htmlmin.egg-info/SOURCES.txt | 17 ++
htmlmin.egg-info/dependency_links.txt | 1 +
htmlmin.egg-info/entry_points.txt | 3 +
htmlmin.egg-info/top_level.txt | 1 +
htmlmin.egg-info/zip-safe | 1 +
htmlmin/__init__.py | 30 +++
htmlmin/command.py | 169 ++++++++++++++++
htmlmin/decorator.py | 64 ++++++
htmlmin/escape.py | 190 ++++++++++++++++++
htmlmin/main.py | 184 ++++++++++++++++++
htmlmin/middleware.py | 92 +++++++++
htmlmin/parser.py | 355 ++++++++++++++++++++++++++++++++++
setup.cfg | 5 +
setup.py | 42 ++++
19 files changed, 1237 insertions(+)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0da90f0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..5568704
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include README.rst
+include LICENSE
+prune htmlmin/tests
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..6417365
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,25 @@
+Metadata-Version: 1.1
+Name: htmlmin
+Version: 0.1.10
+Summary: An HTML Minifier
+Home-page: https://htmlmin.readthedocs.org/en/latest/
+Author: Dave Mankoff
+Author-email: mankyd at gmail.com
+License: BSD
+Download-URL: https://github.com/mankyd/htmlmin
+Description: A configurable HTML Minifier with safety features.
+
+ .. image:: https://travis-ci.org/mankyd/htmlmin.png?branch=master
+ :target: http://travis-ci.org/mankyd/htmlmin
+
+ Documentation: https://htmlmin.readthedocs.org/en/latest/
+
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Topic :: Text Processing :: Markup :: HTML
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..dcb9240
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,6 @@
+A configurable HTML Minifier with safety features.
+
+.. image:: https://travis-ci.org/mankyd/htmlmin.png?branch=master
+ :target: http://travis-ci.org/mankyd/htmlmin
+
+Documentation: https://htmlmin.readthedocs.org/en/latest/
diff --git a/htmlmin.egg-info/PKG-INFO b/htmlmin.egg-info/PKG-INFO
new file mode 100644
index 0000000..6417365
--- /dev/null
+++ b/htmlmin.egg-info/PKG-INFO
@@ -0,0 +1,25 @@
+Metadata-Version: 1.1
+Name: htmlmin
+Version: 0.1.10
+Summary: An HTML Minifier
+Home-page: https://htmlmin.readthedocs.org/en/latest/
+Author: Dave Mankoff
+Author-email: mankyd at gmail.com
+License: BSD
+Download-URL: https://github.com/mankyd/htmlmin
+Description: A configurable HTML Minifier with safety features.
+
+ .. image:: https://travis-ci.org/mankyd/htmlmin.png?branch=master
+ :target: http://travis-ci.org/mankyd/htmlmin
+
+ Documentation: https://htmlmin.readthedocs.org/en/latest/
+
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Topic :: Text Processing :: Markup :: HTML
diff --git a/htmlmin.egg-info/SOURCES.txt b/htmlmin.egg-info/SOURCES.txt
new file mode 100644
index 0000000..45fbc7e
--- /dev/null
+++ b/htmlmin.egg-info/SOURCES.txt
@@ -0,0 +1,17 @@
+LICENSE
+MANIFEST.in
+README.rst
+setup.py
+htmlmin/__init__.py
+htmlmin/command.py
+htmlmin/decorator.py
+htmlmin/escape.py
+htmlmin/main.py
+htmlmin/middleware.py
+htmlmin/parser.py
+htmlmin.egg-info/PKG-INFO
+htmlmin.egg-info/SOURCES.txt
+htmlmin.egg-info/dependency_links.txt
+htmlmin.egg-info/entry_points.txt
+htmlmin.egg-info/top_level.txt
+htmlmin.egg-info/zip-safe
\ No newline at end of file
diff --git a/htmlmin.egg-info/dependency_links.txt b/htmlmin.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/htmlmin.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/htmlmin.egg-info/entry_points.txt b/htmlmin.egg-info/entry_points.txt
new file mode 100644
index 0000000..e3b716a
--- /dev/null
+++ b/htmlmin.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+htmlmin = htmlmin.command:main
+
diff --git a/htmlmin.egg-info/top_level.txt b/htmlmin.egg-info/top_level.txt
new file mode 100644
index 0000000..a42ee74
--- /dev/null
+++ b/htmlmin.egg-info/top_level.txt
@@ -0,0 +1 @@
+htmlmin
diff --git a/htmlmin.egg-info/zip-safe b/htmlmin.egg-info/zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/htmlmin.egg-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/htmlmin/__init__.py b/htmlmin/__init__.py
new file mode 100644
index 0000000..cdd3d64
--- /dev/null
+++ b/htmlmin/__init__.py
@@ -0,0 +1,30 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import minify, Minifier
+
+__version__ = '0.1.10'
diff --git a/htmlmin/command.py b/htmlmin/command.py
new file mode 100755
index 0000000..83ac2c3
--- /dev/null
+++ b/htmlmin/command.py
@@ -0,0 +1,169 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+#!/usr/bin/env python
+
+import argparse
+import codecs
+import locale
+import sys
+
+#import htmlmin
+from . import Minifier
+
+parser = argparse.ArgumentParser(
+ description='Minify HTML',
+ formatter_class=argparse.RawTextHelpFormatter
+ )
+
+parser.add_argument('input_file',
+ nargs='?',
+ metavar='INPUT',
+ help='File path to html file to minify. Defaults to stdin.',
+ )
+
+parser.add_argument('output_file',
+ nargs='?',
+ metavar='OUTPUT',
+ help="File path to output to. Defaults to stdout.",
+ )
+
+parser.add_argument('-c', '--remove-comments',
+ help=(
+'''When set, comments will be removed. They can be kept on an individual basis
+by starting them with a '!': <!--! comment -->. The '!' will be removed from
+the final output. If you want a '!' as the leading character of your comment,
+put two of them: <!--!! comment -->.
+
+'''),
+ action='store_true')
+
+parser.add_argument('-s', '--remove-empty-space',
+ help=(
+'''When set, this removes empty space betwen tags in certain cases.
+Specifically, it will remove empty space if and only if there a newline
+character occurs within the space. Thus, code like
+'<span>x</span> <span>y</span>' will be left alone, but code such as
+' ...
+ </head>
+ <body>
+ ...'
+will become '...</head><body>...'. Note that this CAN break your
+html if you spread two inline tags over two lines. Use with caution.
+
+'''),
+ action='store_true')
+
+parser.add_argument('--remove-all-empty-space',
+ help=(
+'''When set, this removes ALL empty space betwen tags. WARNING: this can and
+likely will cause unintended consequences. For instance, '<i>X</i> <i>Y</i>'
+will become '<i>X</i><i>Y</i>'. Putting whitespace along with other text will
+avoid this problem. Only use if you are confident in the result. Whitespace is
+not removed from inside of tags, thus '<span> </span>' will be left alone.
+
+'''),
+ action='store_true')
+
+parser.add_argument('--keep-optional-attribute-quotes',
+ help=(
+'''When set, this keeps all attribute quotes, even if they are optional.
+
+'''),
+ action='store_true')
+
+parser.add_argument('-H', '--in-head',
+ help=(
+'''If you are parsing only a fragment of HTML, and the fragment occurs in the
+head of the document, setting this will remove some extra whitespace.
+
+'''),
+ action='store_true')
+
+parser.add_argument('-k', '--keep-pre-attr',
+ help=(
+'''HTMLMin supports the propietary attribute 'pre' that can be added to elements
+to prevent minification. This attribute is removed by default. Set this flag to
+keep the 'pre' attributes in place.
+
+'''),
+ action='store_true')
+
+parser.add_argument('-a', '--pre-attr',
+ help=(
+'''The attribute htmlmin looks for to find blocks of HTML that it should not
+minify. This attribute will be removed from the HTML unless '-k' is
+specified. Defaults to 'pre'.
+
+'''),
+ default='pre')
+
+
+parser.add_argument('-p', '--pre-tags',
+ metavar='TAG',
+ help=(
+'''By default, the contents of 'pre', and 'textarea' tags are left unminified.
+You can specify different tags using the --pre-tags option. 'script' and 'style'
+tags are always left unmininfied.
+
+'''),
+ nargs='*',
+ default=['pre', 'textarea'])
+parser.add_argument('-e', '--encoding',
+
+ help=("Encoding to read and write with. Default 'utf-8'.\n\n"),
+ default='utf-8',
+ )
+
+def main():
+ args = parser.parse_args()
+ minifier = Minifier(
+ remove_comments=args.remove_comments,
+ remove_empty_space=args.remove_empty_space,
+ remove_optional_attribute_quotes=not args.keep_optional_attribute_quotes,
+ pre_tags=args.pre_tags,
+ keep_pre=args.keep_pre_attr,
+ pre_attr=args.pre_attr,
+ )
+ if args.input_file:
+ inp = codecs.open(args.input_file, encoding=args.encoding)
+ else:
+ inp = codecs.getreader(
+ sys.stdin.encoding or locale.getpreferredencoding())(sys.stdin)
+
+ for line in inp.readlines():
+ minifier.input(line)
+
+ if args.output_file:
+ codecs.open(
+ args.output_file, 'w', encoding=args.encoding).write(minifier.output)
+ else:
+ print(minifier.output)
+
+if __name__ == '__main__':
+ main()
+
diff --git a/htmlmin/decorator.py b/htmlmin/decorator.py
new file mode 100644
index 0000000..6e26597
--- /dev/null
+++ b/htmlmin/decorator.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import Minifier
+
+def htmlmin(*args, **kwargs):
+ """Minifies HTML that is returned by a function.
+
+ A simple decorator that minifies the HTML output of any function that it
+ decorates. It supports all the same options that :class:`htmlmin.minify` has.
+ With no options, it uses ``minify``'s default settings::
+
+ @htmlmin
+ def foobar():
+ return ' minify me! '
+
+ or::
+
+ @htmlmin(remove_comments=True)
+ def foobar():
+ return ' minify me! <!-- and remove me! -->'
+ """
+ def _decorator(fn):
+ minify = Minifier(**kwargs).minify
+ def wrapper(*a, **kw):
+ return minify(fn(*a, **kw))
+ return wrapper
+
+ if len(args) == 1:
+ if callable(args[0]) and not kwargs:
+ return _decorator(args[0])
+ else:
+ raise RuntimeError(
+ 'htmlmin decorator does accept positional arguments')
+ elif len(args) > 1:
+ raise RuntimeError(
+ 'htmlmin decorator does accept positional arguments')
+ else:
+ return _decorator
+
diff --git a/htmlmin/escape.py b/htmlmin/escape.py
new file mode 100644
index 0000000..bf1c0cf
--- /dev/null
+++ b/htmlmin/escape.py
@@ -0,0 +1,190 @@
+"""
+Copyright (c) 2015, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+try:
+ from html import escape
+except ImportError:
+ from cgi import escape
+
+NO_QUOTES = 0
+SINGLE_QUOTE = 1
+DOUBLE_QUOTE = 2
+
+UPPER_A = ord('A')
+UPPER_F = ord('F')
+UPPER_Z = ord('Z')
+LOWER_A = ord('a')
+LOWER_F = ord('f')
+LOWER_Z = ord('z')
+ZERO = ord('0')
+NINE = ord('9')
+
+def escape_tag(val):
+ return escape(val)
+
+def escape_attr_name(val):
+ return escape(val)
+
+def escape_attr_value(val, double_quote=False):
+ val = escape_ambiguous_ampersand(val)
+ if double_quote:
+ return (val.replace('"', '"'), DOUBLE_QUOTE)
+ if '"' in val:
+ if "'" in val:
+ return (val.replace('"', '"'), DOUBLE_QUOTE)
+ else:
+ return (val, SINGLE_QUOTE)
+ elif "'" in val:
+ return (val, DOUBLE_QUOTE)
+
+ if not val or any((c.isspace() for c in val)):
+ return (val, DOUBLE_QUOTE)
+ return (val, NO_QUOTES)
+
+def escape_ambiguous_ampersand(val):
+ # TODO: this function could probably me made a lot faster.
+ if not '&' in val: # short circuit for speed
+ return val
+
+ state = 0
+ result = []
+ amp_buff = []
+ for c in val:
+ if state == 0: # beginning
+ if c == '&':
+ state = 1
+ else:
+ result.append(c)
+ elif state == 1: # ampersand
+ ord_c = ord(c)
+ if (UPPER_A <= ord_c <= UPPER_Z or
+ LOWER_A <= ord_c <= LOWER_Z or
+ ZERO <= ord_c <= NINE):
+ amp_buff.append(c) # TODO: use "name character references" section
+ # https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
+ elif c == '#':
+ state = 2
+ elif c == ';':
+ if amp_buff:
+ result.append('&')
+ result.extend(amp_buff)
+ result.append(';')
+ else:
+ result.append('&;')
+ state = 0
+ amp_buff = []
+ elif c == '&':
+ if amp_buff:
+ result.append('&')
+ result.extend(amp_buff)
+ else:
+ result.append('&')
+ amp_buff = []
+ else:
+ result.append('&')
+ result.extend(amp_buff)
+ result.append(c)
+ state = 0
+ amp_buff = []
+ elif state == 2: # numeric character reference
+ ord_c = ord(c)
+ if c == 'x' or c == 'X':
+ state = 3
+ elif ZERO <= ord_c <= NINE:
+ amp_buff.append(c)
+ elif c == ';':
+ if amp_buff:
+ result.append('&#')
+ result.extend(amp_buff)
+ result.append(';')
+ else:
+ result.append('&#;')
+ state = 0
+ amp_buff = []
+ elif c == '&':
+ if amp_buff:
+ result.append('&#')
+ result.extend(amp_buff)
+ else:
+ result.append('&#')
+ state = 1
+ amp_buff = []
+ else:
+ if amp_buff:
+ result.append('&#')
+ result.extend(amp_buff)
+ result.append(c)
+ else:
+ result.append('&#')
+ result.append(c)
+ state = 0
+ amp_buff = []
+ elif state == 3: # hex character reference
+ ord_c = ord(c)
+ if (UPPER_A <= ord_c <= UPPER_F or
+ LOWER_A <= ord_c <= LOWER_F or
+ ZERO <= ord_c <= NINE):
+ amp_buff.append(c)
+ elif c == ';':
+ if amp_buff:
+ result.append('&#x')
+ result.extend(amp_buff)
+ result.append(';')
+ else:
+ result.append('&#x;')
+ state = 0
+ amp_buff = []
+ elif c == '&':
+ if amp_buff:
+ result.append('&#x')
+ result.extend(amp_buff)
+ else:
+ result.append('&#x')
+ state = 1
+ amp_buff = []
+ else:
+ if amp_buff:
+ result.append('&#x')
+ result.extend(amp_buff)
+ result.append(c)
+ else:
+ result.append('&#x')
+ result.append(c)
+ state = 0
+ amp_buff = []
+
+ if state == 1:
+ result.append('&')
+ result.extend(amp_buff)
+ elif state == 2:
+ result.append('&#')
+ result.extend(amp_buff)
+ elif state == 3:
+ result.append('&#x')
+ result.extend(amp_buff)
+
+ return ''.join(result)
diff --git a/htmlmin/main.py b/htmlmin/main.py
new file mode 100644
index 0000000..cc83b99
--- /dev/null
+++ b/htmlmin/main.py
@@ -0,0 +1,184 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import cgi
+import re
+
+from . import parser
+
+def minify(input,
+ remove_comments=False,
+ remove_empty_space=False,
+ remove_all_empty_space=False,
+ reduce_empty_attributes=True,
+ reduce_boolean_attributes=False,
+ remove_optional_attribute_quotes=True,
+ keep_pre=False,
+ pre_tags=parser.PRE_TAGS,
+ pre_attr='pre'):
+ """Minifies HTML in one shot.
+
+ :param input: A string containing the HTML to be minified.
+ :param remove_comments: Remove comments found in HTML. Individual comments can
+ be maintained by putting a ``!`` as the first character inside the comment.
+ Thus::
+
+ <!-- FOO --> <!--! BAR -->
+
+ Will become simply::
+
+ <!-- BAR -->
+
+ The added exclamation is removed.
+ :param remove_empty_space: Remove empty space found in HTML between an opening
+ and a closing tag and when it contains a newline or carriage return. If
+ whitespace is found that is only spaces and/or tabs, it will be turned into
+ a single space. Be careful, this can have unintended consequences.
+ :param remove_all_empty_space: A more extreme version of
+ ``remove_empty_space``, this removes all empty whitespace found between
+ tags. This is almost guaranteed to break your HTML unless you are very
+ careful.
+ :param reduce_boolean_attributes: Where allowed by the HTML5 specification,
+ attributes such as 'disabled' and 'readonly' will have their value removed,
+ so 'disabled="true"' will simply become 'disabled'. This is generally a
+ good option to turn on except when JavaScript relies on the values.
+ :param remove_optional_attribute_quotes: When True, optional quotes around
+ attributes are removed. When False, all attribute quotes are left intact.
+ Defaults to True.
+ :param keep_pre: By default, htmlmin uses the special attribute ``pre`` to
+ allow you to demarcate areas of HTML that should not be minified. It removes
+ this attribute as it finds it. Setting this value to ``True`` tells htmlmin
+ to leave the attribute in the output.
+ :param pre_tags: A list of tag names that should never be minified. You are
+ free to change this list as you see fit, but you will probably want to
+ include ``pre`` and ``textarea`` if you make any changes to the list. Note
+ that ``<script>`` and ``<style>`` tags are never minimized.
+ :param pre_attr: Specifies the attribute that, when found in an HTML tag,
+ indicates that the content of the tag should not be minified. Defaults to
+ ``pre``.
+ :return: A string containing the minified HTML.
+
+ If you are going to be minifying multiple HTML documents, each with the same
+ settings, consider using :class:`.Minifier`.
+ """
+ minifier = parser.HTMLMinParser(
+ remove_comments=remove_comments,
+ remove_empty_space=remove_empty_space,
+ remove_all_empty_space=remove_all_empty_space,
+ reduce_empty_attributes=reduce_empty_attributes,
+ reduce_boolean_attributes=reduce_boolean_attributes,
+ remove_optional_attribute_quotes=remove_optional_attribute_quotes,
+ keep_pre=keep_pre,
+ pre_tags=pre_tags,
+ pre_attr=pre_attr)
+ minifier.feed(input)
+ minifier.close()
+ return minifier.result
+
+class Minifier(object):
+ """An object that supports HTML Minification.
+
+ Options are passed into this class at initialization time and are then
+ persisted across each use of the instance. If you are going to be minifying
+ multiple peices of HTML, this will be more efficient than using
+ :class:`htmlmin.minify`.
+
+ See :class:`htmlmin.minify` for an explanation of options.
+ """
+
+ def __init__(self,
+ remove_comments=False,
+ remove_empty_space=False,
+ remove_all_empty_space=False,
+ reduce_empty_attributes=True,
+ reduce_boolean_attributes=False,
+ remove_optional_attribute_quotes=True,
+ keep_pre=False,
+ pre_tags=parser.PRE_TAGS,
+ pre_attr='pre'):
+ """Initialize the Minifier.
+
+ See :class:`htmlmin.minify` for an explanation of options.
+ """
+ self._parser = parser.HTMLMinParser(
+ remove_comments=remove_comments,
+ remove_empty_space=remove_empty_space,
+ remove_all_empty_space=remove_all_empty_space,
+ reduce_empty_attributes=reduce_empty_attributes,
+ reduce_boolean_attributes=reduce_boolean_attributes,
+ remove_optional_attribute_quotes=remove_optional_attribute_quotes,
+ keep_pre=keep_pre,
+ pre_tags=pre_tags,
+ pre_attr=pre_attr)
+
+ def minify(self, *input):
+ """Runs HTML through the minifier in one pass.
+
+ :param input: HTML to be fed into the minimizer. Multiple chunks of HTML
+ can be provided, and they are fed in sequentially as if they were
+ concatenated.
+ :returns: A string containing the minified HTML.
+
+ This is the simplest way to use an existing ``Minifier`` instance. This
+ method takes in HTML and minfies it, returning the result. Note that this
+ method resets the internal state of the parser before it does any work. If
+ there is pending HTML in the buffers, it will be lost.
+ """
+ self._parser.reset()
+ self.input(*input)
+ return self.finalize()
+
+ def input(self, *input):
+ """Feed more HTML into the input stream
+
+ :param input: HTML to be fed into the minimizer. Multiple chunks of HTML
+ can be provided, and they are fed in sequentially as if they were
+ concatenated. You can also call this method multiple times to achieve
+ the same effect.
+ """
+ for i in input:
+ self._parser.feed(i)
+
+ @property
+ def output(self):
+ """Retrieve the minified output generated thus far.
+ """
+ return self._parser.result
+
+ def finalize(self):
+ """Finishes current input HTML and returns mininified result.
+
+ This method flushes any remaining input HTML and returns the minified
+ result. It resets the state of the internal parser in the process so that
+ new HTML can be minified. Be sure to call this method before you reuse
+ the ``Minifier`` instance on a new HTML document.
+ """
+ self._parser.close()
+ result = self._parser.result
+ self._parser.reset()
+ return result
+
diff --git a/htmlmin/middleware.py b/htmlmin/middleware.py
new file mode 100644
index 0000000..66f77be
--- /dev/null
+++ b/htmlmin/middleware.py
@@ -0,0 +1,92 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import Minifier
+
+class HTMLMinMiddleware(object):
+ """WSGI Middleware that minifies html on the way out.
+
+ :param by_default: Specifies if minification should be turned on or off by
+ default. Defaults to ``True``.
+ :param keep_header: The middleware recognizes one custom HTTP header that
+ can be used to turn minification on or off on a per-request basis:
+ ``X-HTML-Min-Enable``. Setting the header to ``true`` will turn minfication
+ on; anything else will turn minification off. If ``by_default`` is set to
+ ``False``, this header is how you would turn minification back on. The
+ middleware, by default, removes the header from the output. Setting this
+ to ``True`` leaves the header in tact.
+ :param debug: A quick setting to turn all minification off. The middleware
+ is effectively bypassed.
+
+ This simple middleware minifies any HTML content that passes through it. Any
+ additional keyword arguments beyond the three settings the middleware has are
+ passed on to the internal minifier. The documentation for the options can
+ be found under :class:`htmlmin.minify`.
+ """
+ def __init__(self, app, by_default=True, keep_header=False,
+ debug=False, **kwargs):
+ self.app = app
+ self.by_default = by_default
+ self.debug = debug
+ self.keep_header = keep_header
+ self.minifier = Minifier(**kwargs)
+
+ def __call__(self, environ, start_response):
+ if self.debug:
+ return self.app(environ, start_response)
+
+ should_minify = [] # need to use a mutable object so we can change it
+ # in a different scope.
+ def minified_start_response(status, headers, exc_info=None):
+ should_minify.append(self.should_minify(headers))
+ if not self.keep_header:
+ headers = [(header, value) for header, value in
+ headers if header != 'X-HTML-Min-Enable']
+ start_response(status, headers, exc_info)
+
+ html = [i for i in self.app(environ, minified_start_response)]
+ if should_minify[0]:
+ return [self.minifier.minify(*html)]
+ return html
+
+ def should_minify(self, headers):
+ is_html = False
+ flag_header = None
+ for header, value in headers:
+ if not is_html and header == 'Content-Type' and value == 'text/html':
+ is_html = True
+ if flag_header is not None:
+ break
+
+ if flag_header is None and header == 'X-HTML-Min-Enable':
+ flag_header = (value.lower() == 'true')
+ if is_html:
+ break
+
+ return is_html and (
+ (self.by_default and flag_header != False) or
+ (not self.by_default and flag_header))
diff --git a/htmlmin/parser.py b/htmlmin/parser.py
new file mode 100644
index 0000000..06af6bc
--- /dev/null
+++ b/htmlmin/parser.py
@@ -0,0 +1,355 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Dave Mankoff nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from __future__ import unicode_literals
+import sys
+
+from io import StringIO
+
+import re
+try:
+ from html.parser import HTMLParser
... 379 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/htmlmin.git
More information about the Python-modules-commits
mailing list