[Python-modules-team] Bug#949029: python-bleach is failing the autopkg tests with Python 3.8

Matthias Klose doko at debian.org
Thu Jan 16 08:32:39 GMT 2020


Package: src:python-bleach
Version: 3.1.0-2
Severity: serious
Tags: sid bullseye

python-bleach is failing the autopkg tests with Python 3.8:

[...]
autopkgtest [07:36:46]: test py3: [-----------------------
============================= test session starts ==============================
platform linux -- Python 3.8.1, pytest-4.6.9, py-1.8.0, pluggy-0.13.0
rootdir: /tmp/autopkgtest-lxc.l4h7ii7j/downtmp/build.VsR/src
collected 311 items

tests/test_callbacks.py ..........                                       [  3%]
tests/test_clean.py .................................................... [ 19%]
...............................................F.F.F.................... [ 43%]
......................                                                   [ 50%]
tests/test_css.py ........................                               [ 57%]
tests/test_html5lib_shim.py ...................                          [ 63%]
tests/test_linkify.py .................................................. [ 80%]
..................................................                       [ 96%]
tests/test_unicode.py .........                                          [ 99%]
tests/test_utils.py ...                                                  [100%]

=================================== FAILURES ===================================
_ test_uri_value_allowed_protocols[<a
href="example.com:8000">valid</a>-kwargs6-<a href="example.com:8000">valid</a>] _

data = '<a href="example.com:8000">valid</a>', kwargs = {'protocols': ['http']}
expected = '<a href="example.com:8000">valid</a>'

    @pytest.mark.parametrize('data, kwargs, expected', [
        # javascript: is not allowed by default
        (
            '<a href="javascript:alert(\'XSS\')">xss</a>',
            {},
            '<a>xss</a>'
        ),

        # File protocol is not allowed by default
        (
            '<a href="file:///tmp/foo">foo</a>',
            {},
            '<a>foo</a>'
        ),

        # Specified protocols are allowed
        (
            '<a href="myprotocol://more_text">allowed href</a>',
            {'protocols': ['myprotocol']},
            '<a href="myprotocol://more_text">allowed href</a>'
        ),

        # Unspecified protocols are not allowed
        (
            '<a href="http://example.com">invalid href</a>',
            {'protocols': ['myprotocol']},
            '<a>invalid href</a>'
        ),

        # Anchors are ok
        (
            '<a href="#example.com">foo</a>',
            {'protocols': []},
            '<a href="#example.com">foo</a>'
        ),

        # Allow implicit http if allowed
        (
            '<a href="example.com">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com">valid</a>'
        ),
        (
            '<a href="example.com:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com:8000">valid</a>'
        ),
        (
            '<a href="localhost">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost">valid</a>'
        ),
        (
            '<a href="localhost:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost:8000">valid</a>'
        ),
        (
            '<a href="192.168.100.100">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100">valid</a>'
        ),
        (
            '<a href="192.168.100.100:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100:8000">valid</a>'
        ),

        # Disallow implicit http if disallowed
        (
            '<a href="example.com">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="example.com:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),

        # Disallowed protocols with sneaky character entities
        (
            '<a href="javas&#x09;cript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),
        (
            '<a href="javascript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),

        # Checking the uri should change it at all
        (
            '<a href="http://example.com/?foo bar">foo</a>',
            {},
            '<a href="http://example.com/?foo bar">foo</a>'
        ),
    ])
    def test_uri_value_allowed_protocols(data, kwargs, expected):
>       assert clean(data, **kwargs) == expected
E       assert '<a>valid</a>' == '<a href="example.com:8000">valid</a>'
E         - <a>valid</a>
E         + <a href="example.com:8000">valid</a>

tests/test_clean.py:676: AssertionError
_ test_uri_value_allowed_protocols[<a href="localhost:8000">valid</a>-kwargs8-<a
href="localhost:8000">valid</a>] _

data = '<a href="localhost:8000">valid</a>', kwargs = {'protocols': ['http']}
expected = '<a href="localhost:8000">valid</a>'

    @pytest.mark.parametrize('data, kwargs, expected', [
        # javascript: is not allowed by default
        (
            '<a href="javascript:alert(\'XSS\')">xss</a>',
            {},
            '<a>xss</a>'
        ),

        # File protocol is not allowed by default
        (
            '<a href="file:///tmp/foo">foo</a>',
            {},
            '<a>foo</a>'
        ),

        # Specified protocols are allowed
        (
            '<a href="myprotocol://more_text">allowed href</a>',
            {'protocols': ['myprotocol']},
            '<a href="myprotocol://more_text">allowed href</a>'
        ),

        # Unspecified protocols are not allowed
        (
            '<a href="http://example.com">invalid href</a>',
            {'protocols': ['myprotocol']},
            '<a>invalid href</a>'
        ),

        # Anchors are ok
        (
            '<a href="#example.com">foo</a>',
            {'protocols': []},
            '<a href="#example.com">foo</a>'
        ),

        # Allow implicit http if allowed
        (
            '<a href="example.com">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com">valid</a>'
        ),
        (
            '<a href="example.com:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com:8000">valid</a>'
        ),
        (
            '<a href="localhost">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost">valid</a>'
        ),
        (
            '<a href="localhost:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost:8000">valid</a>'
        ),
        (
            '<a href="192.168.100.100">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100">valid</a>'
        ),
        (
            '<a href="192.168.100.100:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100:8000">valid</a>'
        ),

        # Disallow implicit http if disallowed
        (
            '<a href="example.com">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="example.com:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),

        # Disallowed protocols with sneaky character entities
        (
            '<a href="javas&#x09;cript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),
        (
            '<a href="javascript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),

        # Checking the uri should change it at all
        (
            '<a href="http://example.com/?foo bar">foo</a>',
            {},
            '<a href="http://example.com/?foo bar">foo</a>'
        ),
    ])
    def test_uri_value_allowed_protocols(data, kwargs, expected):
>       assert clean(data, **kwargs) == expected
E       assert '<a>valid</a>' == '<a href="localhost:8000">valid</a>'
E         - <a>valid</a>
E         + <a href="localhost:8000">valid</a>

tests/test_clean.py:676: AssertionError
_ test_uri_value_allowed_protocols[<a
href="192.168.100.100:8000">valid</a>-kwargs10-<a
href="192.168.100.100:8000">valid</a>] _

data = '<a href="192.168.100.100:8000">valid</a>'
kwargs = {'protocols': ['http']}
expected = '<a href="192.168.100.100:8000">valid</a>'

    @pytest.mark.parametrize('data, kwargs, expected', [
        # javascript: is not allowed by default
        (
            '<a href="javascript:alert(\'XSS\')">xss</a>',
            {},
            '<a>xss</a>'
        ),

        # File protocol is not allowed by default
        (
            '<a href="file:///tmp/foo">foo</a>',
            {},
            '<a>foo</a>'
        ),

        # Specified protocols are allowed
        (
            '<a href="myprotocol://more_text">allowed href</a>',
            {'protocols': ['myprotocol']},
            '<a href="myprotocol://more_text">allowed href</a>'
        ),

        # Unspecified protocols are not allowed
        (
            '<a href="http://example.com">invalid href</a>',
            {'protocols': ['myprotocol']},
            '<a>invalid href</a>'
        ),

        # Anchors are ok
        (
            '<a href="#example.com">foo</a>',
            {'protocols': []},
            '<a href="#example.com">foo</a>'
        ),

        # Allow implicit http if allowed
        (
            '<a href="example.com">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com">valid</a>'
        ),
        (
            '<a href="example.com:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com:8000">valid</a>'
        ),
        (
            '<a href="localhost">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost">valid</a>'
        ),
        (
            '<a href="localhost:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost:8000">valid</a>'
        ),
        (
            '<a href="192.168.100.100">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100">valid</a>'
        ),
        (
            '<a href="192.168.100.100:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100:8000">valid</a>'
        ),

        # Disallow implicit http if disallowed
        (
            '<a href="example.com">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="example.com:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),

        # Disallowed protocols with sneaky character entities
        (
            '<a href="javas&#x09;cript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),
        (
            '<a href="javascript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),

        # Checking the uri should change it at all
        (
            '<a href="http://example.com/?foo bar">foo</a>',
            {},
            '<a href="http://example.com/?foo bar">foo</a>'
        ),
    ])
    def test_uri_value_allowed_protocols(data, kwargs, expected):
>       assert clean(data, **kwargs) == expected
E       assert '<a>valid</a>' == '<a href="192.168.100.100:8000">valid</a>'
E         - <a>valid</a>
E         + <a href="192.168.100.100:8000">valid</a>

tests/test_clean.py:676: AssertionError
=============================== warnings summary ===============================
/usr/lib/python3/dist-packages/html5lib/_trie/_base.py:3
  /usr/lib/python3/dist-packages/html5lib/_trie/_base.py:3: DeprecationWarning:
Using or importing the ABCs from 'collections' instead of from 'collections.abc'
is deprecated since Python 3.3, and in 3.9 it will stop working
    from collections import Mapping

-- Docs: https://docs.pytest.org/en/latest/warnings.html
=============== 3 failed, 308 passed, 1 warnings in 1.03 seconds ===============
============================= test session starts ==============================
platform linux -- Python 3.7.6, pytest-4.6.9, py-1.8.0, pluggy-0.13.0
rootdir: /tmp/autopkgtest-lxc.l4h7ii7j/downtmp/build.VsR/src
collected 311 items

tests/test_callbacks.py ..........                                       [  3%]
tests/test_clean.py .................................................... [ 19%]
...............................................F.F.F.................... [ 43%]
......................                                                   [ 50%]
tests/test_css.py ........................                               [ 57%]
tests/test_html5lib_shim.py ...................                          [ 63%]
tests/test_linkify.py .................................................. [ 80%]
..................................................                       [ 96%]
tests/test_unicode.py .........                                          [ 99%]
tests/test_utils.py ...                                                  [100%]

=================================== FAILURES ===================================
_ test_uri_value_allowed_protocols[<a
href="example.com:8000">valid</a>-kwargs6-<a href="example.com:8000">valid</a>] _

data = '<a href="example.com:8000">valid</a>', kwargs = {'protocols': ['http']}
expected = '<a href="example.com:8000">valid</a>'

    @pytest.mark.parametrize('data, kwargs, expected', [
        # javascript: is not allowed by default
        (
            '<a href="javascript:alert(\'XSS\')">xss</a>',
            {},
            '<a>xss</a>'
        ),

        # File protocol is not allowed by default
        (
            '<a href="file:///tmp/foo">foo</a>',
            {},
            '<a>foo</a>'
        ),

        # Specified protocols are allowed
        (
            '<a href="myprotocol://more_text">allowed href</a>',
            {'protocols': ['myprotocol']},
            '<a href="myprotocol://more_text">allowed href</a>'
        ),

        # Unspecified protocols are not allowed
        (
            '<a href="http://example.com">invalid href</a>',
            {'protocols': ['myprotocol']},
            '<a>invalid href</a>'
        ),

        # Anchors are ok
        (
            '<a href="#example.com">foo</a>',
            {'protocols': []},
            '<a href="#example.com">foo</a>'
        ),

        # Allow implicit http if allowed
        (
            '<a href="example.com">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com">valid</a>'
        ),
        (
            '<a href="example.com:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com:8000">valid</a>'
        ),
        (
            '<a href="localhost">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost">valid</a>'
        ),
        (
            '<a href="localhost:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost:8000">valid</a>'
        ),
        (
            '<a href="192.168.100.100">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100">valid</a>'
        ),
        (
            '<a href="192.168.100.100:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100:8000">valid</a>'
        ),

        # Disallow implicit http if disallowed
        (
            '<a href="example.com">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="example.com:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),

        # Disallowed protocols with sneaky character entities
        (
            '<a href="javas&#x09;cript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),
        (
            '<a href="javascript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),

        # Checking the uri should change it at all
        (
            '<a href="http://example.com/?foo bar">foo</a>',
            {},
            '<a href="http://example.com/?foo bar">foo</a>'
        ),
    ])
    def test_uri_value_allowed_protocols(data, kwargs, expected):
>       assert clean(data, **kwargs) == expected
E       assert '<a>valid</a>' == '<a href="example.com:8000">valid</a>'
E         - <a>valid</a>
E         + <a href="example.com:8000">valid</a>

tests/test_clean.py:676: AssertionError
_ test_uri_value_allowed_protocols[<a href="localhost:8000">valid</a>-kwargs8-<a
href="localhost:8000">valid</a>] _

data = '<a href="localhost:8000">valid</a>', kwargs = {'protocols': ['http']}
expected = '<a href="localhost:8000">valid</a>'

    @pytest.mark.parametrize('data, kwargs, expected', [
        # javascript: is not allowed by default
        (
            '<a href="javascript:alert(\'XSS\')">xss</a>',
            {},
            '<a>xss</a>'
        ),

        # File protocol is not allowed by default
        (
            '<a href="file:///tmp/foo">foo</a>',
            {},
            '<a>foo</a>'
        ),

        # Specified protocols are allowed
        (
            '<a href="myprotocol://more_text">allowed href</a>',
            {'protocols': ['myprotocol']},
            '<a href="myprotocol://more_text">allowed href</a>'
        ),

        # Unspecified protocols are not allowed
        (
            '<a href="http://example.com">invalid href</a>',
            {'protocols': ['myprotocol']},
            '<a>invalid href</a>'
        ),

        # Anchors are ok
        (
            '<a href="#example.com">foo</a>',
            {'protocols': []},
            '<a href="#example.com">foo</a>'
        ),

        # Allow implicit http if allowed
        (
            '<a href="example.com">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com">valid</a>'
        ),
        (
            '<a href="example.com:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com:8000">valid</a>'
        ),
        (
            '<a href="localhost">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost">valid</a>'
        ),
        (
            '<a href="localhost:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost:8000">valid</a>'
        ),
        (
            '<a href="192.168.100.100">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100">valid</a>'
        ),
        (
            '<a href="192.168.100.100:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100:8000">valid</a>'
        ),

        # Disallow implicit http if disallowed
        (
            '<a href="example.com">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="example.com:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),

        # Disallowed protocols with sneaky character entities
        (
            '<a href="javas&#x09;cript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),
        (
            '<a href="javascript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),

        # Checking the uri should change it at all
        (
            '<a href="http://example.com/?foo bar">foo</a>',
            {},
            '<a href="http://example.com/?foo bar">foo</a>'
        ),
    ])
    def test_uri_value_allowed_protocols(data, kwargs, expected):
>       assert clean(data, **kwargs) == expected
E       assert '<a>valid</a>' == '<a href="localhost:8000">valid</a>'
E         - <a>valid</a>
E         + <a href="localhost:8000">valid</a>

tests/test_clean.py:676: AssertionError
_ test_uri_value_allowed_protocols[<a
href="192.168.100.100:8000">valid</a>-kwargs10-<a
href="192.168.100.100:8000">valid</a>] _

data = '<a href="192.168.100.100:8000">valid</a>'
kwargs = {'protocols': ['http']}
expected = '<a href="192.168.100.100:8000">valid</a>'

    @pytest.mark.parametrize('data, kwargs, expected', [
        # javascript: is not allowed by default
        (
            '<a href="javascript:alert(\'XSS\')">xss</a>',
            {},
            '<a>xss</a>'
        ),

        # File protocol is not allowed by default
        (
            '<a href="file:///tmp/foo">foo</a>',
            {},
            '<a>foo</a>'
        ),

        # Specified protocols are allowed
        (
            '<a href="myprotocol://more_text">allowed href</a>',
            {'protocols': ['myprotocol']},
            '<a href="myprotocol://more_text">allowed href</a>'
        ),

        # Unspecified protocols are not allowed
        (
            '<a href="http://example.com">invalid href</a>',
            {'protocols': ['myprotocol']},
            '<a>invalid href</a>'
        ),

        # Anchors are ok
        (
            '<a href="#example.com">foo</a>',
            {'protocols': []},
            '<a href="#example.com">foo</a>'
        ),

        # Allow implicit http if allowed
        (
            '<a href="example.com">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com">valid</a>'
        ),
        (
            '<a href="example.com:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="example.com:8000">valid</a>'
        ),
        (
            '<a href="localhost">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost">valid</a>'
        ),
        (
            '<a href="localhost:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="localhost:8000">valid</a>'
        ),
        (
            '<a href="192.168.100.100">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100">valid</a>'
        ),
        (
            '<a href="192.168.100.100:8000">valid</a>',
            {'protocols': ['http']},
            '<a href="192.168.100.100:8000">valid</a>'
        ),

        # Disallow implicit http if disallowed
        (
            '<a href="example.com">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="example.com:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="localhost:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),
        (
            '<a href="192.168.100.100:8000">foo</a>',
            {'protocols': []},
            '<a>foo</a>'
        ),

        # Disallowed protocols with sneaky character entities
        (
            '<a href="javas&#x09;cript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),
        (
            '<a href="javascript:alert(1)">alert</a>',
            {},
            '<a>alert</a>'
        ),

        # Checking the uri should change it at all
        (
            '<a href="http://example.com/?foo bar">foo</a>',
            {},
            '<a href="http://example.com/?foo bar">foo</a>'
        ),
    ])
    def test_uri_value_allowed_protocols(data, kwargs, expected):
>       assert clean(data, **kwargs) == expected
E       assert '<a>valid</a>' == '<a href="192.168.100.100:8000">valid</a>'
E         - <a>valid</a>
E         + <a href="192.168.100.100:8000">valid</a>

tests/test_clean.py:676: AssertionError



More information about the Python-modules-team mailing list