[Python-modules-commits] [python-w3lib] 02/10: New upstream version 1.19.0

Sat Jan 27 12:46:43 UTC 2018

This is an automated email from the git hooks/post-receive script.

fladi pushed a commit to branch debian/master
in repository python-w3lib.

commit b9e5b0ee37bb2b4da2ed1c15672401806623ee34
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date:   Sat Jan 27 13:37:48 2018 +0100

    New upstream version 1.19.0
---
 PKG-INFO                |  3 ++-
 docs/conf.py            |  2 +-
 setup.py                |  2 +-
 tests/test_encoding.py  | 16 ++++++++++++++--
 tests/test_http.py      |  9 +++++++++
 tox.ini                 |  2 +-
 w3lib.egg-info/PKG-INFO |  3 ++-
 w3lib/__init__.py       |  2 +-
 w3lib/encoding.py       |  5 +++--
 w3lib/http.py           |  4 ++--
 10 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/PKG-INFO b/PKG-INFO
index 34cbb36..ab30c09 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: w3lib
-Version: 1.18.0
+Version: 1.19.0
 Summary: Library of web-related functions
 Home-page: https://github.com/scrapy/w3lib
 Author: Scrapy project
 Author-email: info at scrapy.org
 License: BSD
+Description-Content-Type: UNKNOWN
 Description: UNKNOWN
 Platform: Any
 Classifier: Development Status :: 5 - Production/Stable
diff --git a/docs/conf.py b/docs/conf.py
index e06a9b4..d4d6732 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -53,7 +53,7 @@ copyright = u'2014, w3lib developers'
 # built documents.
 #
 # The full version, including alpha/beta/rc tags.
-release = '1.18.0'
+release = '1.19.0'
 # The short X.Y version.
 version = '.'.join(release.split('.')[:2])
 
diff --git a/setup.py b/setup.py
index d152b4c..63c3b6e 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
 
 setup(
     name='w3lib',
-    version='1.18.0',
+    version='1.19.0',
     license='BSD',
     description='Library of web-related functions',
     author='Scrapy project',
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index df2e5ce..649c189 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -144,9 +144,9 @@ class HtmlConversionTests(unittest.TestCase):
     def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
         # unlike scrapy, the BOM is stripped
         self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffd\ufffdWORD2')
+                'utf-8', u'WORD\ufffdWORD2')
         self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xabWORD2",
-                'utf-8', u'WORD\ufffd\ufffdWORD2')
+                'utf-8', u'WORD\ufffdWORD2')
 
     def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
         # Python implementations handle unexpected end of UTF8 data
@@ -220,6 +220,18 @@ class HtmlConversionTests(unittest.TestCase):
         self._assert_encoding('utf-16', u"hi".encode('utf-16-be'), 'utf-16-be', u"hi")
         self._assert_encoding('utf-32', u"hi".encode('utf-32-be'), 'utf-32-be', u"hi")
 
+    def test_python_crash(self):
+        import random
+        from io import BytesIO
+        random.seed(42)
+        buf = BytesIO()
+        for i in range(150000):
+            buf.write(bytes([random.randint(0, 255)]))
+        to_unicode(buf.getvalue(), 'utf-16-le')
+        to_unicode(buf.getvalue(), 'utf-16-be')
+        to_unicode(buf.getvalue(), 'utf-32-le')
+        to_unicode(buf.getvalue(), 'utf-32-be')
+
     def test_html_encoding(self):
         # extracting the encoding from raw html is tested elsewhere
         body = b"""blah blah < meta   http-equiv="Content-Type"
diff --git a/tests/test_http.py b/tests/test_http.py
index 453624f..01f903e 100644
--- a/tests/test_http.py
+++ b/tests/test_http.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 import unittest
 from collections import OrderedDict
 from w3lib.http import (basic_auth_header,
@@ -14,6 +16,13 @@ class HttpTests(unittest.TestCase):
         self.assertEqual(b'Basic c29tZXVzZXI6QDx5dTk-Jm8_UQ==',
             basic_auth_header('someuser', '@<yu9>&o?Q'))
 
+    def test_basic_auth_header_encoding(self):
+        self.assertEqual(b'Basic c29tw6Z1c8Oocjpzw7htZXDDpHNz',
+                basic_auth_header(u'somæusèr', u'sømepäss', encoding='utf8'))
+        # default encoding (ISO-8859-1)
+        self.assertEqual(b'Basic c29t5nVz6HI6c_htZXDkc3M=',
+                basic_auth_header(u'somæusèr', u'sømepäss'))
+
     def test_headers_raw_dict_none(self):
         self.assertIsNone(headers_raw_to_dict(None))
         self.assertIsNone(headers_dict_to_raw(None))
diff --git a/tox.ini b/tox.ini
index d36b9e8..d616681 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
 # and then run "tox" from this directory.
 
 [tox]
-envlist = py27, pypy, py33, py34, py35, py36
+envlist = py27, pypy, py33, py34, py35, py36, pypy3
 
 [testenv]
 deps =
diff --git a/w3lib.egg-info/PKG-INFO b/w3lib.egg-info/PKG-INFO
index 34cbb36..ab30c09 100644
--- a/w3lib.egg-info/PKG-INFO
+++ b/w3lib.egg-info/PKG-INFO
@@ -1,11 +1,12 @@
 Metadata-Version: 1.1
 Name: w3lib
-Version: 1.18.0
+Version: 1.19.0
 Summary: Library of web-related functions
 Home-page: https://github.com/scrapy/w3lib
 Author: Scrapy project
 Author-email: info at scrapy.org
 License: BSD
+Description-Content-Type: UNKNOWN
 Description: UNKNOWN
 Platform: Any
 Classifier: Development Status :: 5 - Production/Stable
diff --git a/w3lib/__init__.py b/w3lib/__init__.py
index 41515b9..20d2d78 100644
--- a/w3lib/__init__.py
+++ b/w3lib/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "1.18.0"
+__version__ = "1.19.0"
 version_info = tuple(int(v) if v.isdigit() else v
                      for v in __version__.split('.'))
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index ac44efe..aef8c46 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -3,6 +3,7 @@
 Functions for handling encoding of web pages
 """
 import re, codecs, encodings
+from sys import version_info
 
 _HEADER_ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I)
 
@@ -173,7 +174,7 @@ def read_bom(data):
 
 # Python decoder doesn't follow unicode standard when handling
 # bad utf-8 encoded strings. see http://bugs.python.org/issue8271
-codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.start+1))
+codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.end))
 
 def to_unicode(data_str, encoding):
     """Convert a str object to unicode using the encoding given
@@ -181,7 +182,7 @@ def to_unicode(data_str, encoding):
     Characters that cannot be converted will be converted to ``\\ufffd`` (the
     unicode replacement character).
     """
-    return data_str.decode(encoding, 'w3lib_replace')
+    return data_str.decode(encoding, 'replace' if version_info[0:2] >= (3, 3) else 'w3lib_replace')
 
 def html_to_unicode(content_type_header, html_body_str,
         default_encoding='utf8', auto_detect_fun=None):
diff --git a/w3lib/http.py b/w3lib/http.py
index accfb5d..c7b94a2 100644
--- a/w3lib/http.py
+++ b/w3lib/http.py
@@ -78,7 +78,7 @@ def headers_dict_to_raw(headers_dict):
     return b'\r\n'.join(raw_lines)
 
 
-def basic_auth_header(username, password):
+def basic_auth_header(username, password, encoding='ISO-8859-1'):
     """
     Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_
 
@@ -95,5 +95,5 @@ def basic_auth_header(username, password):
         # XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
         # seems to be the most widely used encoding here. See also:
         # http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html
-        auth = auth.encode('ISO-8859-1')
+        auth = auth.encode(encoding)
     return b'Basic ' + urlsafe_b64encode(auth)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-w3lib.git