[Python-modules-commits] [python-w3lib] 02/10: New upstream version 1.19.0
Michael Fladischer
fladi at moszumanska.debian.org
Sat Jan 27 12:46:43 UTC 2018
This is an automated email from the git hooks/post-receive script.
fladi pushed a commit to branch debian/master
in repository python-w3lib.
commit b9e5b0ee37bb2b4da2ed1c15672401806623ee34
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date: Sat Jan 27 13:37:48 2018 +0100
New upstream version 1.19.0
---
PKG-INFO | 3 ++-
docs/conf.py | 2 +-
setup.py | 2 +-
tests/test_encoding.py | 16 ++++++++++++++--
tests/test_http.py | 9 +++++++++
tox.ini | 2 +-
w3lib.egg-info/PKG-INFO | 3 ++-
w3lib/__init__.py | 2 +-
w3lib/encoding.py | 5 +++--
w3lib/http.py | 4 ++--
10 files changed, 36 insertions(+), 12 deletions(-)
diff --git a/PKG-INFO b/PKG-INFO
index 34cbb36..ab30c09 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,11 +1,12 @@
Metadata-Version: 1.1
Name: w3lib
-Version: 1.18.0
+Version: 1.19.0
Summary: Library of web-related functions
Home-page: https://github.com/scrapy/w3lib
Author: Scrapy project
Author-email: info at scrapy.org
License: BSD
+Description-Content-Type: UNKNOWN
Description: UNKNOWN
Platform: Any
Classifier: Development Status :: 5 - Production/Stable
diff --git a/docs/conf.py b/docs/conf.py
index e06a9b4..d4d6732 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -53,7 +53,7 @@ copyright = u'2014, w3lib developers'
# built documents.
#
# The full version, including alpha/beta/rc tags.
-release = '1.18.0'
+release = '1.19.0'
# The short X.Y version.
version = '.'.join(release.split('.')[:2])
diff --git a/setup.py b/setup.py
index d152b4c..63c3b6e 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
name='w3lib',
- version='1.18.0',
+ version='1.19.0',
license='BSD',
description='Library of web-related functions',
author='Scrapy project',
diff --git a/tests/test_encoding.py b/tests/test_encoding.py
index df2e5ce..649c189 100644
--- a/tests/test_encoding.py
+++ b/tests/test_encoding.py
@@ -144,9 +144,9 @@ class HtmlConversionTests(unittest.TestCase):
def test_invalid_utf8_encoded_body_with_valid_utf8_BOM(self):
# unlike scrapy, the BOM is stripped
self._assert_encoding('utf-8', b"\xef\xbb\xbfWORD\xe3\xabWORD2",
- 'utf-8', u'WORD\ufffd\ufffdWORD2')
+ 'utf-8', u'WORD\ufffdWORD2')
self._assert_encoding(None, b"\xef\xbb\xbfWORD\xe3\xabWORD2",
- 'utf-8', u'WORD\ufffd\ufffdWORD2')
+ 'utf-8', u'WORD\ufffdWORD2')
def test_utf8_unexpected_end_of_data_with_valid_utf8_BOM(self):
# Python implementations handle unexpected end of UTF8 data
@@ -220,6 +220,18 @@ class HtmlConversionTests(unittest.TestCase):
self._assert_encoding('utf-16', u"hi".encode('utf-16-be'), 'utf-16-be', u"hi")
self._assert_encoding('utf-32', u"hi".encode('utf-32-be'), 'utf-32-be', u"hi")
+ def test_python_crash(self):
+ import random
+ from io import BytesIO
+ random.seed(42)
+ buf = BytesIO()
+ for i in range(150000):
+ buf.write(bytes([random.randint(0, 255)]))
+ to_unicode(buf.getvalue(), 'utf-16-le')
+ to_unicode(buf.getvalue(), 'utf-16-be')
+ to_unicode(buf.getvalue(), 'utf-32-le')
+ to_unicode(buf.getvalue(), 'utf-32-be')
+
def test_html_encoding(self):
# extracting the encoding from raw html is tested elsewhere
body = b"""blah blah < meta http-equiv="Content-Type"
diff --git a/tests/test_http.py b/tests/test_http.py
index 453624f..01f903e 100644
--- a/tests/test_http.py
+++ b/tests/test_http.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
import unittest
from collections import OrderedDict
from w3lib.http import (basic_auth_header,
@@ -14,6 +16,13 @@ class HttpTests(unittest.TestCase):
self.assertEqual(b'Basic c29tZXVzZXI6QDx5dTk-Jm8_UQ==',
basic_auth_header('someuser', '@<yu9>&o?Q'))
+ def test_basic_auth_header_encoding(self):
+ self.assertEqual(b'Basic c29tw6Z1c8Oocjpzw7htZXDDpHNz',
+ basic_auth_header(u'somæusèr', u'sømepäss', encoding='utf8'))
+ # default encoding (ISO-8859-1)
+ self.assertEqual(b'Basic c29t5nVz6HI6c_htZXDkc3M=',
+ basic_auth_header(u'somæusèr', u'sømepäss'))
+
def test_headers_raw_dict_none(self):
self.assertIsNone(headers_raw_to_dict(None))
self.assertIsNone(headers_dict_to_raw(None))
diff --git a/tox.ini b/tox.ini
index d36b9e8..d616681 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,7 +4,7 @@
# and then run "tox" from this directory.
[tox]
-envlist = py27, pypy, py33, py34, py35, py36
+envlist = py27, pypy, py33, py34, py35, py36, pypy3
[testenv]
deps =
diff --git a/w3lib.egg-info/PKG-INFO b/w3lib.egg-info/PKG-INFO
index 34cbb36..ab30c09 100644
--- a/w3lib.egg-info/PKG-INFO
+++ b/w3lib.egg-info/PKG-INFO
@@ -1,11 +1,12 @@
Metadata-Version: 1.1
Name: w3lib
-Version: 1.18.0
+Version: 1.19.0
Summary: Library of web-related functions
Home-page: https://github.com/scrapy/w3lib
Author: Scrapy project
Author-email: info at scrapy.org
License: BSD
+Description-Content-Type: UNKNOWN
Description: UNKNOWN
Platform: Any
Classifier: Development Status :: 5 - Production/Stable
diff --git a/w3lib/__init__.py b/w3lib/__init__.py
index 41515b9..20d2d78 100644
--- a/w3lib/__init__.py
+++ b/w3lib/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "1.18.0"
+__version__ = "1.19.0"
version_info = tuple(int(v) if v.isdigit() else v
for v in __version__.split('.'))
diff --git a/w3lib/encoding.py b/w3lib/encoding.py
index ac44efe..aef8c46 100644
--- a/w3lib/encoding.py
+++ b/w3lib/encoding.py
@@ -3,6 +3,7 @@
Functions for handling encoding of web pages
"""
import re, codecs, encodings
+from sys import version_info
_HEADER_ENCODING_RE = re.compile(r'charset=([\w-]+)', re.I)
@@ -173,7 +174,7 @@ def read_bom(data):
# Python decoder doesn't follow unicode standard when handling
# bad utf-8 encoded strings. see http://bugs.python.org/issue8271
-codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.start+1))
+codecs.register_error('w3lib_replace', lambda exc: (u'\ufffd', exc.end))
def to_unicode(data_str, encoding):
"""Convert a str object to unicode using the encoding given
@@ -181,7 +182,7 @@ def to_unicode(data_str, encoding):
Characters that cannot be converted will be converted to ``\\ufffd`` (the
unicode replacement character).
"""
- return data_str.decode(encoding, 'w3lib_replace')
+ return data_str.decode(encoding, 'replace' if version_info[0:2] >= (3, 3) else 'w3lib_replace')
def html_to_unicode(content_type_header, html_body_str,
default_encoding='utf8', auto_detect_fun=None):
diff --git a/w3lib/http.py b/w3lib/http.py
index accfb5d..c7b94a2 100644
--- a/w3lib/http.py
+++ b/w3lib/http.py
@@ -78,7 +78,7 @@ def headers_dict_to_raw(headers_dict):
return b'\r\n'.join(raw_lines)
-def basic_auth_header(username, password):
+def basic_auth_header(username, password, encoding='ISO-8859-1'):
"""
Return an `Authorization` header field value for `HTTP Basic Access Authentication (RFC 2617)`_
@@ -95,5 +95,5 @@ def basic_auth_header(username, password):
# XXX: RFC 2617 doesn't define encoding, but ISO-8859-1
# seems to be the most widely used encoding here. See also:
# http://greenbytes.de/tech/webdav/draft-ietf-httpauth-basicauth-enc-latest.html
- auth = auth.encode('ISO-8859-1')
+ auth = auth.encode(encoding)
return b'Basic ' + urlsafe_b64encode(auth)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-w3lib.git
More information about the Python-modules-commits
mailing list