[Python-modules-commits] [htmlmin] 01/08: New upstream version 0.1.10

Adrian Vondendriesch discostu-guest at moszumanska.debian.org
Wed May 10 13:15:29 UTC 2017


This is an automated email from the git hooks/post-receive script.

discostu-guest pushed a commit to branch master
in repository htmlmin.

commit f50c8e14c59e544c90947f1fd52627af116d9ae3
Author: Adrian Vondendriesch <adrian.vondendriesch at credativ.de>
Date:   Tue Apr 4 11:15:26 2017 +0200

    New upstream version 0.1.10
---
 LICENSE                               |  24 +++
 MANIFEST.in                           |   3 +
 PKG-INFO                              |  25 +++
 README.rst                            |   6 +
 htmlmin.egg-info/PKG-INFO             |  25 +++
 htmlmin.egg-info/SOURCES.txt          |  17 ++
 htmlmin.egg-info/dependency_links.txt |   1 +
 htmlmin.egg-info/entry_points.txt     |   3 +
 htmlmin.egg-info/top_level.txt        |   1 +
 htmlmin.egg-info/zip-safe             |   1 +
 htmlmin/__init__.py                   |  30 +++
 htmlmin/command.py                    | 169 ++++++++++++++++
 htmlmin/decorator.py                  |  64 ++++++
 htmlmin/escape.py                     | 190 ++++++++++++++++++
 htmlmin/main.py                       | 184 ++++++++++++++++++
 htmlmin/middleware.py                 |  92 +++++++++
 htmlmin/parser.py                     | 355 ++++++++++++++++++++++++++++++++++
 setup.cfg                             |   5 +
 setup.py                              |  42 ++++
 19 files changed, 1237 insertions(+)

diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0da90f0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..5568704
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+include README.rst
+include LICENSE
+prune htmlmin/tests
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..6417365
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,25 @@
+Metadata-Version: 1.1
+Name: htmlmin
+Version: 0.1.10
+Summary: An HTML Minifier
+Home-page: https://htmlmin.readthedocs.org/en/latest/
+Author: Dave Mankoff
+Author-email: mankyd at gmail.com
+License: BSD
+Download-URL: https://github.com/mankyd/htmlmin
+Description: A configurable HTML Minifier with safety features.
+        
+        .. image:: https://travis-ci.org/mankyd/htmlmin.png?branch=master
+           :target: http://travis-ci.org/mankyd/htmlmin
+        
+        Documentation: https://htmlmin.readthedocs.org/en/latest/
+        
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Topic :: Text Processing :: Markup :: HTML
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..dcb9240
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,6 @@
+A configurable HTML Minifier with safety features.
+
+.. image:: https://travis-ci.org/mankyd/htmlmin.png?branch=master
+   :target: http://travis-ci.org/mankyd/htmlmin
+
+Documentation: https://htmlmin.readthedocs.org/en/latest/
diff --git a/htmlmin.egg-info/PKG-INFO b/htmlmin.egg-info/PKG-INFO
new file mode 100644
index 0000000..6417365
--- /dev/null
+++ b/htmlmin.egg-info/PKG-INFO
@@ -0,0 +1,25 @@
+Metadata-Version: 1.1
+Name: htmlmin
+Version: 0.1.10
+Summary: An HTML Minifier
+Home-page: https://htmlmin.readthedocs.org/en/latest/
+Author: Dave Mankoff
+Author-email: mankyd at gmail.com
+License: BSD
+Download-URL: https://github.com/mankyd/htmlmin
+Description: A configurable HTML Minifier with safety features.
+        
+        .. image:: https://travis-ci.org/mankyd/htmlmin.png?branch=master
+           :target: http://travis-ci.org/mankyd/htmlmin
+        
+        Documentation: https://htmlmin.readthedocs.org/en/latest/
+        
+Platform: UNKNOWN
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Topic :: Text Processing :: Markup :: HTML
diff --git a/htmlmin.egg-info/SOURCES.txt b/htmlmin.egg-info/SOURCES.txt
new file mode 100644
index 0000000..45fbc7e
--- /dev/null
+++ b/htmlmin.egg-info/SOURCES.txt
@@ -0,0 +1,17 @@
+LICENSE
+MANIFEST.in
+README.rst
+setup.py
+htmlmin/__init__.py
+htmlmin/command.py
+htmlmin/decorator.py
+htmlmin/escape.py
+htmlmin/main.py
+htmlmin/middleware.py
+htmlmin/parser.py
+htmlmin.egg-info/PKG-INFO
+htmlmin.egg-info/SOURCES.txt
+htmlmin.egg-info/dependency_links.txt
+htmlmin.egg-info/entry_points.txt
+htmlmin.egg-info/top_level.txt
+htmlmin.egg-info/zip-safe
\ No newline at end of file
diff --git a/htmlmin.egg-info/dependency_links.txt b/htmlmin.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/htmlmin.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/htmlmin.egg-info/entry_points.txt b/htmlmin.egg-info/entry_points.txt
new file mode 100644
index 0000000..e3b716a
--- /dev/null
+++ b/htmlmin.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+htmlmin = htmlmin.command:main
+
diff --git a/htmlmin.egg-info/top_level.txt b/htmlmin.egg-info/top_level.txt
new file mode 100644
index 0000000..a42ee74
--- /dev/null
+++ b/htmlmin.egg-info/top_level.txt
@@ -0,0 +1 @@
+htmlmin
diff --git a/htmlmin.egg-info/zip-safe b/htmlmin.egg-info/zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/htmlmin.egg-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/htmlmin/__init__.py b/htmlmin/__init__.py
new file mode 100644
index 0000000..cdd3d64
--- /dev/null
+++ b/htmlmin/__init__.py
@@ -0,0 +1,30 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import minify, Minifier
+
+__version__ = '0.1.10'
diff --git a/htmlmin/command.py b/htmlmin/command.py
new file mode 100755
index 0000000..83ac2c3
--- /dev/null
+++ b/htmlmin/command.py
@@ -0,0 +1,169 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+#!/usr/bin/env python
+
+import argparse
+import codecs
+import locale
+import sys
+
+#import htmlmin
+from . import Minifier
+
+parser = argparse.ArgumentParser(
+  description='Minify HTML',
+  formatter_class=argparse.RawTextHelpFormatter
+  )
+
+parser.add_argument('input_file',
+  nargs='?',
+  metavar='INPUT',
+  help='File path to html file to minify. Defaults to stdin.',
+  )
+
+parser.add_argument('output_file',
+  nargs='?',
+  metavar='OUTPUT',
+  help="File path to output to. Defaults to stdout.",
+  )
+
+parser.add_argument('-c', '--remove-comments',
+  help=(
+'''When set, comments will be removed. They can be kept on an individual basis
+by starting them with a '!': <!--! comment -->. The '!' will be removed from
+the final output. If you want a '!' as the leading character of your comment,
+put two of them: <!--!! comment -->.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-s', '--remove-empty-space',
+  help=(
+'''When set, this removes empty space betwen tags in certain cases.
+Specifically, it will remove empty space if and only if there a newline
+character occurs within the space. Thus, code like
+'<span>x</span> <span>y</span>' will be left alone, but code such as
+'   ...
+  </head>
+  <body>
+    ...'
+will become '...</head><body>...'. Note that this CAN break your
+html if you spread two inline tags over two lines. Use with caution.
+
+'''),
+  action='store_true')
+
+parser.add_argument('--remove-all-empty-space',
+  help=(
+'''When set, this removes ALL empty space betwen tags. WARNING: this can and
+likely will cause unintended consequences. For instance, '<i>X</i> <i>Y</i>'
+will become '<i>X</i><i>Y</i>'. Putting whitespace along with other text will
+avoid this problem. Only use if you are confident in the result. Whitespace is
+not removed from inside of tags, thus '<span> </span>' will be left alone.
+
+'''),
+  action='store_true')
+
+parser.add_argument('--keep-optional-attribute-quotes',
+  help=(
+'''When set, this keeps all attribute quotes, even if they are optional.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-H', '--in-head',
+  help=(
+'''If you are parsing only a fragment of HTML, and the fragment occurs in the
+head of the document, setting this will remove some extra whitespace.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-k', '--keep-pre-attr',
+  help=(
+'''HTMLMin supports the propietary attribute 'pre' that can be added to elements
+to prevent minification. This attribute is removed by default. Set this flag to
+keep the 'pre' attributes in place.
+
+'''),
+  action='store_true')
+
+parser.add_argument('-a', '--pre-attr',
+  help=(
+'''The attribute htmlmin looks for to find blocks of HTML that it should not
+minify. This attribute will be removed from the HTML unless '-k' is
+specified. Defaults to 'pre'.
+
+'''),
+  default='pre')
+
+
+parser.add_argument('-p', '--pre-tags',
+  metavar='TAG',
+  help=(
+'''By default, the contents of 'pre', and 'textarea' tags are left unminified.
+You can specify different tags using the --pre-tags option. 'script' and 'style'
+tags are always left unmininfied.
+
+'''),
+  nargs='*',
+  default=['pre', 'textarea'])
+parser.add_argument('-e', '--encoding',
+
+  help=("Encoding to read and write with. Default 'utf-8'.\n\n"),
+  default='utf-8',
+  )
+
+def main():
+  args = parser.parse_args()
+  minifier = Minifier(
+    remove_comments=args.remove_comments,
+    remove_empty_space=args.remove_empty_space,
+    remove_optional_attribute_quotes=not args.keep_optional_attribute_quotes,
+    pre_tags=args.pre_tags,
+    keep_pre=args.keep_pre_attr,
+    pre_attr=args.pre_attr,
+    )
+  if args.input_file:
+    inp = codecs.open(args.input_file, encoding=args.encoding)
+  else:
+    inp = codecs.getreader(
+      sys.stdin.encoding or locale.getpreferredencoding())(sys.stdin)
+
+  for line in inp.readlines():
+    minifier.input(line)
+
+  if args.output_file:
+    codecs.open(
+      args.output_file, 'w', encoding=args.encoding).write(minifier.output)
+  else:
+    print(minifier.output)
+
+if __name__ == '__main__':
+  main()
+
diff --git a/htmlmin/decorator.py b/htmlmin/decorator.py
new file mode 100644
index 0000000..6e26597
--- /dev/null
+++ b/htmlmin/decorator.py
@@ -0,0 +1,64 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import Minifier
+
+def htmlmin(*args, **kwargs):
+  """Minifies HTML that is returned by a function.
+
+  A simple decorator that minifies the HTML output of any function that it
+  decorates. It supports all the same options that :class:`htmlmin.minify` has.
+  With no options, it uses ``minify``'s default settings::
+
+      @htmlmin
+      def foobar():
+         return '   minify me!   '
+
+  or::
+
+      @htmlmin(remove_comments=True)
+      def foobar():
+         return '   minify me!  <!-- and remove me! -->'
+  """
+  def _decorator(fn):
+    minify = Minifier(**kwargs).minify
+    def wrapper(*a, **kw):
+      return minify(fn(*a, **kw))
+    return wrapper
+
+  if len(args) == 1:
+    if callable(args[0]) and not kwargs:
+      return _decorator(args[0])
+    else:
+      raise RuntimeError(
+          'htmlmin decorator does accept positional arguments')
+  elif len(args) > 1:
+    raise RuntimeError(
+      'htmlmin decorator does accept positional arguments')
+  else:
+    return _decorator
+        
diff --git a/htmlmin/escape.py b/htmlmin/escape.py
new file mode 100644
index 0000000..bf1c0cf
--- /dev/null
+++ b/htmlmin/escape.py
@@ -0,0 +1,190 @@
+"""
+Copyright (c) 2015, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+try:
+  from html import escape
+except ImportError:
+  from cgi import escape
+
+NO_QUOTES = 0
+SINGLE_QUOTE = 1
+DOUBLE_QUOTE = 2
+
+UPPER_A = ord('A')
+UPPER_F = ord('F')
+UPPER_Z = ord('Z')
+LOWER_A = ord('a')
+LOWER_F = ord('f')
+LOWER_Z = ord('z')
+ZERO = ord('0')
+NINE = ord('9')
+
+def escape_tag(val):
+  return escape(val)
+
+def escape_attr_name(val):
+  return escape(val)
+
+def escape_attr_value(val, double_quote=False):
+  val = escape_ambiguous_ampersand(val)
+  if double_quote:
+    return (val.replace('"', '"'), DOUBLE_QUOTE)
+  if '"' in val:
+    if "'" in val:
+      return (val.replace('"', '"'), DOUBLE_QUOTE)
+    else:
+      return (val, SINGLE_QUOTE)
+  elif "'" in val:
+    return (val, DOUBLE_QUOTE)
+
+  if not val or any((c.isspace() for c in val)):
+    return (val, DOUBLE_QUOTE)
+  return (val, NO_QUOTES)
+
+def escape_ambiguous_ampersand(val):
+  # TODO: this function could probably me made a lot faster.
+  if not '&' in val:  # short circuit for speed
+    return val
+
+  state = 0
+  result = []
+  amp_buff = []
+  for c in val:
+    if state == 0:  # beginning
+      if c == '&':
+        state = 1
+      else:
+        result.append(c)
+    elif state == 1:  # ampersand
+      ord_c = ord(c)
+      if (UPPER_A <= ord_c <= UPPER_Z or
+            LOWER_A <= ord_c <= LOWER_Z or
+            ZERO <= ord_c <= NINE):
+        amp_buff.append(c)  # TODO: use "name character references" section
+        # https://html.spec.whatwg.org/multipage/syntax.html#named-character-references
+      elif c == '#':
+        state = 2
+      elif c == ';':
+        if amp_buff:
+          result.append('&')
+          result.extend(amp_buff)
+          result.append(';')
+        else:
+          result.append('&;')
+        state = 0
+        amp_buff = []
+      elif c == '&':
+        if amp_buff:
+          result.append('&')
+          result.extend(amp_buff)
+        else:
+          result.append('&')
+        amp_buff = []
+      else:
+        result.append('&')
+        result.extend(amp_buff)
+        result.append(c)
+        state = 0
+        amp_buff = []
+    elif state == 2:  # numeric character reference
+      ord_c = ord(c)
+      if c == 'x' or c == 'X':
+        state = 3
+      elif ZERO <= ord_c <= NINE:
+        amp_buff.append(c)
+      elif c == ';':
+        if amp_buff:
+          result.append('&#')
+          result.extend(amp_buff)
+          result.append(';')
+        else:
+          result.append('&#;')
+        state = 0
+        amp_buff = []
+      elif c == '&':
+        if amp_buff:
+          result.append('&#')
+          result.extend(amp_buff)
+        else:
+          result.append('&#')
+        state = 1
+        amp_buff = []
+      else:
+        if amp_buff:
+          result.append('&#')
+          result.extend(amp_buff)
+          result.append(c)
+        else:
+          result.append('&#')
+          result.append(c)
+        state = 0
+        amp_buff = []
+    elif state == 3:  # hex character reference
+      ord_c = ord(c)
+      if (UPPER_A <= ord_c <= UPPER_F or
+          LOWER_A <= ord_c <= LOWER_F or
+          ZERO <= ord_c <= NINE):
+        amp_buff.append(c)
+      elif c == ';':
+        if amp_buff:
+          result.append('&#x')
+          result.extend(amp_buff)
+          result.append(';')
+        else:
+          result.append('&#x;')
+        state = 0
+        amp_buff = []
+      elif c == '&':
+        if amp_buff:
+          result.append('&#x')
+          result.extend(amp_buff)
+        else:
+          result.append('&#x')
+        state = 1
+        amp_buff = []
+      else:
+        if amp_buff:
+          result.append('&#x')
+          result.extend(amp_buff)
+          result.append(c)
+        else:
+          result.append('&#x')
+          result.append(c)
+        state = 0
+        amp_buff = []
+
+  if state == 1:
+    result.append('&')
+    result.extend(amp_buff)
+  elif state == 2:
+    result.append('&#')
+    result.extend(amp_buff)
+  elif state == 3:
+    result.append('&#x')
+    result.extend(amp_buff)
+
+  return ''.join(result)
diff --git a/htmlmin/main.py b/htmlmin/main.py
new file mode 100644
index 0000000..cc83b99
--- /dev/null
+++ b/htmlmin/main.py
@@ -0,0 +1,184 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import cgi
+import re
+
+from . import parser
+
+def minify(input,
+           remove_comments=False,
+           remove_empty_space=False,
+           remove_all_empty_space=False,
+           reduce_empty_attributes=True,
+           reduce_boolean_attributes=False,
+           remove_optional_attribute_quotes=True,
+           keep_pre=False,
+           pre_tags=parser.PRE_TAGS,
+           pre_attr='pre'):
+  """Minifies HTML in one shot.
+
+  :param input: A string containing the HTML to be minified.
+  :param remove_comments: Remove comments found in HTML. Individual comments can
+    be maintained by putting a ``!`` as the first character inside the comment.
+    Thus::
+
+       <!-- FOO --> <!--! BAR -->
+
+    Will become simply::
+
+       <!-- BAR -->
+
+    The added exclamation is removed.
+  :param remove_empty_space: Remove empty space found in HTML between an opening
+    and a closing tag and when it contains a newline or carriage return. If
+    whitespace is found that is only spaces and/or tabs, it will be turned into
+    a single space. Be careful, this can have unintended consequences.
+  :param remove_all_empty_space: A more extreme version of
+    ``remove_empty_space``, this removes all empty whitespace found between
+    tags. This is almost guaranteed to break your HTML unless you are very
+    careful.
+  :param reduce_boolean_attributes: Where allowed by the HTML5 specification,
+    attributes such as 'disabled' and 'readonly' will have their value removed,
+    so 'disabled="true"' will simply become 'disabled'. This is generally a
+    good option to turn on except when JavaScript relies on the values.
+  :param remove_optional_attribute_quotes: When True, optional quotes around
+    attributes are removed. When False, all attribute quotes are left intact.
+    Defaults to True.
+  :param keep_pre: By default, htmlmin uses the special attribute ``pre`` to
+    allow you to demarcate areas of HTML that should not be minified. It removes
+    this attribute as it finds it. Setting this value to ``True`` tells htmlmin
+    to leave the attribute in the output.
+  :param pre_tags: A list of tag names that should never be minified. You are
+    free to change this list as you see fit, but you will probably want to
+    include ``pre`` and ``textarea`` if you make any changes to the list. Note
+    that ``<script>`` and ``<style>`` tags are never minimized.
+  :param pre_attr: Specifies the attribute that, when found in an HTML tag,
+    indicates that the content of the tag should not be minified. Defaults to
+    ``pre``.
+  :return: A string containing the minified HTML.
+
+  If you are going to be minifying multiple HTML documents, each with the same
+  settings, consider using :class:`.Minifier`.
+  """
+  minifier = parser.HTMLMinParser(
+      remove_comments=remove_comments,
+      remove_empty_space=remove_empty_space,
+      remove_all_empty_space=remove_all_empty_space,
+      reduce_empty_attributes=reduce_empty_attributes,
+      reduce_boolean_attributes=reduce_boolean_attributes,
+      remove_optional_attribute_quotes=remove_optional_attribute_quotes,
+      keep_pre=keep_pre,
+      pre_tags=pre_tags,
+      pre_attr=pre_attr)
+  minifier.feed(input)
+  minifier.close()
+  return minifier.result
+
+class Minifier(object):
+  """An object that supports HTML Minification.
+
+  Options are passed into this class at initialization time and are then
+  persisted across each use of the instance. If you are going to be minifying
+  multiple peices of HTML, this will be more efficient than using
+  :class:`htmlmin.minify`.
+
+  See :class:`htmlmin.minify` for an explanation of options.
+  """
+
+  def __init__(self,
+               remove_comments=False,
+               remove_empty_space=False,
+               remove_all_empty_space=False,
+               reduce_empty_attributes=True,
+               reduce_boolean_attributes=False,
+               remove_optional_attribute_quotes=True,
+               keep_pre=False,
+               pre_tags=parser.PRE_TAGS,
+               pre_attr='pre'):
+    """Initialize the Minifier.
+
+    See :class:`htmlmin.minify` for an explanation of options.
+    """
+    self._parser = parser.HTMLMinParser(
+      remove_comments=remove_comments,
+      remove_empty_space=remove_empty_space,
+      remove_all_empty_space=remove_all_empty_space,
+      reduce_empty_attributes=reduce_empty_attributes,
+      reduce_boolean_attributes=reduce_boolean_attributes,
+      remove_optional_attribute_quotes=remove_optional_attribute_quotes,
+      keep_pre=keep_pre,
+      pre_tags=pre_tags,
+      pre_attr=pre_attr)
+
+  def minify(self, *input):
+    """Runs HTML through the minifier in one pass.
+
+    :param input: HTML to be fed into the minimizer. Multiple chunks of HTML
+      can be provided, and they are fed in sequentially as if they were
+      concatenated.
+    :returns: A string containing the minified HTML.
+
+    This is the simplest way to use an existing ``Minifier`` instance. This
+    method takes in HTML and minfies it, returning the result. Note that this
+    method resets the internal state of  the parser before it does any work. If
+    there is pending HTML in the buffers, it will be lost.
+    """
+    self._parser.reset()
+    self.input(*input)
+    return self.finalize()
+
+  def input(self, *input):
+    """Feed more HTML into the input stream
+
+    :param input: HTML to be fed into the minimizer. Multiple chunks of HTML
+      can be provided, and they are fed in sequentially as if they were
+      concatenated. You can also call this method multiple times to achieve
+      the same effect.
+    """
+    for i in input:
+      self._parser.feed(i)
+
+  @property
+  def output(self):
+    """Retrieve the minified output generated thus far.
+    """
+    return self._parser.result
+
+  def finalize(self):
+    """Finishes current input HTML and returns mininified result.
+
+    This method flushes any remaining input HTML and returns the minified
+    result. It resets the state of the internal parser in the process so that
+    new HTML can be minified. Be sure to call this method before you reuse
+    the ``Minifier`` instance on a new HTML document.
+    """
+    self._parser.close()
+    result = self._parser.result
+    self._parser.reset()
+    return result
+
diff --git a/htmlmin/middleware.py b/htmlmin/middleware.py
new file mode 100644
index 0000000..66f77be
--- /dev/null
+++ b/htmlmin/middleware.py
@@ -0,0 +1,92 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from .main import Minifier
+
+class HTMLMinMiddleware(object):
+  """WSGI Middleware that minifies html on the way out.
+
+  :param by_default: Specifies if minification should be turned on or off by
+    default. Defaults to ``True``.
+  :param keep_header: The middleware recognizes one custom HTTP header that 
+    can be used to turn minification on or off on a per-request basis:
+    ``X-HTML-Min-Enable``. Setting the header to ``true`` will turn minfication
+    on; anything else will turn minification off. If ``by_default`` is set to 
+    ``False``, this header is how you would turn minification back on. The
+    middleware, by default, removes the header from the output. Setting this
+    to ``True`` leaves the header in tact.
+  :param debug: A quick setting to turn all minification off. The middleware
+    is effectively bypassed.
+
+  This simple middleware minifies any HTML content that passes through it. Any
+  additional keyword arguments beyond the three settings the middleware has are
+  passed on to the internal minifier. The documentation for the options can
+  be found under :class:`htmlmin.minify`.
+  """
+  def __init__(self, app, by_default=True, keep_header=False, 
+               debug=False, **kwargs):
+    self.app = app
+    self.by_default = by_default
+    self.debug = debug
+    self.keep_header = keep_header
+    self.minifier = Minifier(**kwargs)
+    
+  def __call__(self, environ, start_response):
+    if self.debug:
+      return self.app(environ, start_response)
+
+    should_minify = []  # need to use a mutable object so we can change it
+                        # in a different scope.
+    def minified_start_response(status, headers, exc_info=None):
+      should_minify.append(self.should_minify(headers))
+      if not self.keep_header:
+        headers = [(header, value) for header, value in 
+                   headers if header != 'X-HTML-Min-Enable']
+      start_response(status, headers, exc_info)
+
+    html = [i for i in self.app(environ, minified_start_response)]
+    if should_minify[0]:
+      return [self.minifier.minify(*html)]
+    return html
+  
+  def should_minify(self, headers):
+    is_html = False
+    flag_header = None
+    for header, value in headers:
+      if not is_html and header == 'Content-Type' and value == 'text/html':
+        is_html = True
+        if flag_header is not None:
+          break
+
+      if flag_header is None and header == 'X-HTML-Min-Enable':
+        flag_header = (value.lower() == 'true')
+        if is_html:
+          break
+
+    return is_html and (
+      (self.by_default and flag_header != False) or 
+      (not self.by_default and flag_header))
diff --git a/htmlmin/parser.py b/htmlmin/parser.py
new file mode 100644
index 0000000..06af6bc
--- /dev/null
+++ b/htmlmin/parser.py
@@ -0,0 +1,355 @@
+"""
+Copyright (c) 2013, Dave Mankoff
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    * Neither the name of Dave Mankoff nor the
+      names of its contributors may be used to endorse or promote products
+      derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL DAVE MANKOFF BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from __future__ import unicode_literals
+import sys
+
+from io import StringIO
+
+import re
+try:
+  from html.parser import HTMLParser
... 379 lines suppressed ...

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/htmlmin.git



More information about the Python-modules-commits mailing list