[Python-modules-commits] [python-parsel] 01/01: New upstream version 1.3.1

Michael Fladischer fladi at moszumanska.debian.org
Tue Jan 9 07:52:18 UTC 2018


This is an automated email from the git hooks/post-receive script.

fladi pushed a commit to branch upstream
in repository python-parsel.

commit e8f599299b8c5c1c3db979c050355c2aea2f7e74
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date:   Tue Jan 9 08:43:38 2018 +0100

    New upstream version 1.3.1
---
 .bumpversion.cfg         |  2 +-
 .travis.yml              | 31 +++++++++-------
 NEWS                     | 31 +++++++++++++---
 README.rst               |  4 +-
 docs/Makefile            |  6 +++
 docs/usage.rst           | 41 ++++++++++++++++++++
 parsel/__init__.py       |  5 ++-
 parsel/selector.py       |  2 +
 parsel/xpathfuncs.py     | 54 +++++++++++++++++++++++++++
 release.rst              |  3 +-
 setup.py                 |  3 +-
 tests/test_selector.py   |  2 +
 tests/test_xpathfuncs.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
 tox.ini                  |  2 +-
 14 files changed, 257 insertions(+), 26 deletions(-)

diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f8747a9..6f8b2b1 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0
+current_version = 1.3.1
 commit = True
 tag = True
 tag_name = v{new_version}
diff --git a/.travis.yml b/.travis.yml
index 4beb4ec..61cf27f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,32 +1,37 @@
 language: python
+branches:
+  only:
+    - master
+    - /^v\d\.\d+$/
+    - /^v\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
 matrix:
   include:
     - python: 2.7
       env: TOXENV=py27
     - python: 2.7
       env: TOXENV=pypy
-    - python: 3.3
-      env: TOXENV=py33
+    - python: 2.7
+      env: TOXENV=pypy3
     - python: 3.4
       env: TOXENV=py34
     - python: 3.5
       env: TOXENV=py35
     - python: 3.6
       env: TOXENV=py36
-
 install:
   - |
       if [ "$TOXENV" = "pypy" ]; then
-        export PYENV_ROOT="$HOME/.pyenv"
-        if [ -f "$PYENV_ROOT/bin/pyenv" ]; then
-          pushd "$PYENV_ROOT" && git pull && popd
-        else
-          rm -rf "$PYENV_ROOT" && git clone --depth 1 https://github.com/yyuu/pyenv.git "$PYENV_ROOT"
-        fi
-        # get latest (portable) PyPy from pyenv directly (thanks to natural version sort option -V)
-        export PYPY_VERSION=`"$PYENV_ROOT/bin/pyenv" install --list |grep -o -E 'pypy-portable-[0-9][\.0-9]*$' |sort -V |tail -1`
-        "$PYENV_ROOT/bin/pyenv" install --skip-existing "$PYPY_VERSION"
-        virtualenv --python="$PYENV_ROOT/versions/$PYPY_VERSION/bin/python" "$HOME/virtualenvs/$PYPY_VERSION"
+        export PYPY_VERSION="pypy-5.9-linux_x86_64-portable"
+        wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2"
+        tar -jxf ${PYPY_VERSION}.tar.bz2
+        virtualenv --python="$PYPY_VERSION/bin/pypy" "$HOME/virtualenvs/$PYPY_VERSION"
+        source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
+      fi
+      if [ "$TOXENV" = "pypy3" ]; then
+        export PYPY_VERSION="pypy3.5-5.9-beta-linux_x86_64-portable"
+        wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2"
+        tar -jxf ${PYPY_VERSION}.tar.bz2
+        virtualenv --python="$PYPY_VERSION/bin/pypy3" "$HOME/virtualenvs/$PYPY_VERSION"
         source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
       fi
   - pip install -U pip tox twine wheel codecov
diff --git a/NEWS b/NEWS
index 28e2b68..8c65d77 100644
--- a/NEWS
+++ b/NEWS
@@ -3,14 +3,33 @@
 History
 -------
 
-1.2.0 (2017-05-XX)
+1.3.1 (2017-12-28)
+------------------
+
+* Fix artifact uploads to pypi.
+
+1.3.0 (2017-12-28)
+------------------
+
+* ``has-class`` XPath extension function;
+* ``parsel.xpathfuncs.set_xpathfunc`` is a simplified way to register
+  XPath extensions;
+* ``Selector.remove_namespaces`` now removes namespace declarations;
+* Python 3.3 support is dropped;
+* ``make htmlview`` command for easier Parsel docs development.
+* CI: PyPy installation is fixed; parsel now runs tests for PyPy3 as well.
+
+
+1.2.0 (2017-05-17)
 ~~~~~~~~~~~~~~~~~~
 
-* Add :meth:`~parsel.selector.SelectorList.get` and :meth:`~parsel.selector.SelectorList.getall`
-  methods as aliases for :meth:`~parsel.selector.SelectorList.extract_first`
-  and :meth:`~parsel.selector.SelectorList.extract` respectively
-* Add default value parameter to :meth:`~parsel.selector.SelectorList.re_first` method
-* Add :meth:`~parsel.selector.Selector.re_first` method to :class:`parsel.selector.Selector` class
+* Add ``SelectorList.get`` and ``SelectorList.getall``
+  methods as aliases for ``SelectorList.extract_first``
+  and ``SelectorList.extract`` respectively
+* Add default value parameter to ``SelectorList.re_first`` method
+* Add ``Selector.re_first`` method
+* Add ``replace_entities`` argument on ``.re()`` and ``.re_first()``
+  to turn off replacing of character entity references
 * Bug fix: detect ``None`` result from lxml parsing and fallback with an empty document
 * Rearrange XML/HTML examples in the selectors usage docs
 * Travis CI:
diff --git a/README.rst b/README.rst
index 8442bde..1ec30aa 100644
--- a/README.rst
+++ b/README.rst
@@ -2,11 +2,13 @@
 Parsel
 ===============================
 
-.. image:: https://img.shields.io/travis/scrapy/parsel.svg
+.. image:: https://img.shields.io/travis/scrapy/parsel/master.svg
    :target: https://travis-ci.org/scrapy/parsel
+   :alt: Build Status
 
 .. image:: https://img.shields.io/pypi/v/parsel.svg
    :target: https://pypi.python.org/pypi/parsel
+   :alt: PyPI Version
 
 .. image:: https://img.shields.io/codecov/c/github/scrapy/parsel/master.svg
    :target: http://codecov.io/github/scrapy/parsel?branch=master
diff --git a/docs/Makefile b/docs/Makefile
index 2caf2c2..9af9f45 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -2,6 +2,7 @@
 #
 
 # You can set these variables from the command line.
+PYTHON        = python
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 PAPER         =
@@ -45,6 +46,7 @@ help:
 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 	@echo "  linkcheck  to check all external links for integrity"
 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+	@echo "  htmlview   to view the compiled HTML files in browser"
 
 clean:
 	rm -rf $(BUILDDIR)/*
@@ -175,3 +177,7 @@ pseudoxml:
 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
 	@echo
 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+
+htmlview: html
+	 $(PYTHON) -c "import webbrowser, os; webbrowser.open('file://' + \
+	 os.path.realpath('_build/html/index.html'))"
diff --git a/docs/usage.rst b/docs/usage.rst
index fa05f58..9108c97 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -450,6 +450,47 @@ inside another ``itemscope``.
 .. _regular expressions: http://exslt.org/regexp/index.html
 .. _set manipulation: http://exslt.org/set/index.html
 
+Other XPath extensions
+----------------------
+
+Parsel also defines a sorely missed XPath extension function ``has-class`` that
+returns ``True`` for nodes that have all of the specified HTML classes::
+
+    >>> from parsel import Selector
+    >>> sel = Selector("""
+    ...         <p class="foo bar-baz">First</p>
+    ...         <p class="foo">Second</p>
+    ...         <p class="bar">Third</p>
+    ...         <p>Fourth</p>
+    ... """)
+    ...
+    >>> sel = Selector(u"""
+    ...         <p class="foo bar-baz">First</p>
+    ...         <p class="foo">Second</p>
+    ...         <p class="bar">Third</p>
+    ...         <p>Fourth</p>
+    ... """)
+    ...
+    >>> sel.xpath('//p[has-class("foo")]')
+    [<Selector xpath='//p[has-class("foo")]' data=u'<p class="foo bar-baz">First</p>'>,
+     <Selector xpath='//p[has-class("foo")]' data=u'<p class="foo">Second</p>'>]
+    >>> sel.xpath('//p[has-class("foo", "bar-baz")]')
+    [<Selector xpath='//p[has-class("foo", "bar-baz")]' data=u'<p class="foo bar-baz">First</p>'>]
+    >>> sel.xpath('//p[has-class("foo", "bar")]')
+    []
+
+So XPath ``//p[has-class("foo", "bar-baz")]`` is roughly equivalent to CSS
+``p.foo.bar-baz``.  Please note, that it is slower in most of the cases,
+because it's a pure-Python function that's invoked for every node in question
+whereas the CSS lookup is translated into XPath and thus runs more efficiently,
+so performance-wise its uses are limited to situations that are not easily
+described with CSS selectors.
+
+Parsel also simplifies adding your own XPath extensions.
+
+.. autofunction:: parsel.xpathfuncs.set_xpathfunc
+
+
 
 Some XPath tips
 ---------------
diff --git a/parsel/__init__.py b/parsel/__init__.py
index 735e62d..1b51f56 100644
--- a/parsel/__init__.py
+++ b/parsel/__init__.py
@@ -5,7 +5,10 @@ or CSS selectors
 
 __author__ = 'Scrapy project'
 __email__ = 'info at scrapy.org'
-__version__ = '1.2.0'
+__version__ = '1.3.1'
 
 from parsel.selector import Selector, SelectorList  # NOQA
 from parsel.csstranslator import css2xpath  # NOQA
+from parsel import xpathfuncs # NOQA
+
+xpathfuncs.setup()
diff --git a/parsel/selector.py b/parsel/selector.py
index 33eaede..7b9bdc5 100644
--- a/parsel/selector.py
+++ b/parsel/selector.py
@@ -321,6 +321,8 @@ class Selector(object):
             for an in el.attrib.keys():
                 if an.startswith('{'):
                     el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an)
+            # remove namespace declarations
+            etree.cleanup_namespaces(self.root)
 
     def __bool__(self):
         """
diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
new file mode 100644
index 0000000..777bd32
--- /dev/null
+++ b/parsel/xpathfuncs.py
@@ -0,0 +1,54 @@
+from lxml import etree
+
+from six import string_types
+
+
+def set_xpathfunc(fname, func):
+    """Register a custom extension function to use in XPath expressions.
+
+    The function ``func`` registered under ``fname`` identifier will be called
+    for every matching node, being passed a ``context`` parameter as well as
+    any parameters passed from the corresponding XPath expression.
+
+    If ``func`` is ``None``, the extension function will be removed.
+
+    See more `in lxml documentation`_.
+
+    .. _`in lxml documentation`: http://lxml.de/extensions.html#xpath-extension-functions
+
+    """
+    ns_fns = etree.FunctionNamespace(None)
+    if func is not None:
+        ns_fns[fname] = func
+    else:
+        del ns_fns[fname]
+
+
+def setup():
+    set_xpathfunc('has-class', has_class)
+
+
+def has_class(context, *classes):
+    """has-class function.
+
+    Return True if all ``classes`` are present in element's class attr.
+
+    """
+    if not context.eval_context.get('args_checked'):
+        if not classes:
+            raise ValueError(
+                'XPath error: has-class must have at least 1 argument')
+        for c in classes:
+            if not isinstance(c, string_types):
+                raise ValueError(
+                    'XPath error: has-class arguments must be strings')
+        context.eval_context['args_checked'] = True
+
+    node_cls = context.context_node.get('class')
+    if node_cls is None:
+        return False
+    node_cls = ' ' + node_cls + ' '
+    for cls in classes:
+        if ' ' + cls + ' ' not in node_cls:
+            return False
+    return True
diff --git a/release.rst b/release.rst
index 72b4341..1f827b7 100644
--- a/release.rst
+++ b/release.rst
@@ -1,7 +1,8 @@
 Release procedures
 ------------------
 
-* Update NEWS file with the release notes
+* Update NEWS file with the release notes.
+  Review changes using: ``restview --pypi-strict <(cat README.rst NEWS | grep -v ':changelog')``
 * Run bumpversion with the proper release type
 * Push code and tags to GitHub to trigger build
 * Copy release notes to https://github.com/scrapy/parsel/releases
diff --git a/setup.py b/setup.py
index 44b2954..a72a5a9 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ test_requirements = [
 
 setup(
     name='parsel',
-    version='1.2.0',
+    version='1.3.1',
     description="Parsel is a library to extract data from HTML and XML using XPath and CSS selectors",
     long_description=readme + '\n\n' + history,
     author="Scrapy project",
@@ -48,7 +48,6 @@ setup(
         'Programming Language :: Python :: 2',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
diff --git a/tests/test_selector.py b/tests/test_selector.py
index d36aa27..dcac22b 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -588,8 +588,10 @@ class SelectorTestCase(unittest.TestCase):
 """
         sel = self.sscls(text=xml, type='xml')
         self.assertEqual(len(sel.xpath("//link")), 0)
+        self.assertEqual(len(sel.xpath("./namespace::*")), 3)
         sel.remove_namespaces()
         self.assertEqual(len(sel.xpath("//link")), 2)
+        self.assertEqual(len(sel.xpath("./namespace::*")), 1)
 
     def test_remove_attributes_namespaces(self):
         xml = u"""<?xml version="1.0" encoding="UTF-8"?>
diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py
new file mode 100644
index 0000000..29b1e5f
--- /dev/null
+++ b/tests/test_xpathfuncs.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+
+from parsel import Selector
+from parsel.xpathfuncs import set_xpathfunc
+import unittest
+
+
+class XPathFuncsTestCase(unittest.TestCase):
+    def test_has_class_simple(self):
+        body = u"""
+        <p class="foo bar-baz">First</p>
+        <p class="foo">Second</p>
+        <p class="bar">Third</p>
+        <p>Fourth</p>
+        """
+        sel = Selector(text=body)
+        self.assertEqual(
+            [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
+            [u'First', u'Second'])
+        self.assertEqual(
+            [x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')],
+            [u'Third'])
+        self.assertEqual(
+            [x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')],
+            [])
+        self.assertEqual(
+            [x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')],
+            [u'First'])
+
+    def test_has_class_error_no_args(self):
+        body = u"""
+        <p CLASS="foo">First</p>
+        """
+        sel = Selector(text=body)
+        self.assertRaisesRegexp(
+            ValueError, 'has-class must have at least 1 argument',
+            sel.xpath, 'has-class()')
+
+    def test_has_class_error_invalid_arg_type(self):
+        body = u"""
+        <p CLASS="foo">First</p>
+        """
+        sel = Selector(text=body)
+        self.assertRaisesRegexp(
+            ValueError, 'has-class arguments must be strings',
+            sel.xpath, 'has-class(.)')
+
+    def test_has_class_error_invalid_unicode(self):
+        body = u"""
+        <p CLASS="foo">First</p>
+        """
+        sel = Selector(text=body)
+        self.assertRaisesRegexp(
+            ValueError, 'All strings must be XML compatible',
+            sel.xpath, u'has-class("héllö")'.encode('utf-8'))
+
+    def test_has_class_unicode(self):
+        body = u"""
+        <p CLASS="fóó">First</p>
+        """
+        sel = Selector(text=body)
+        self.assertEqual(
+            [x.extract() for x in sel.xpath(u'//p[has-class("fóó")]/text()')],
+            [u'First'])
+
+    def test_has_class_uppercase(self):
+        body = u"""
+        <p CLASS="foo">First</p>
+        """
+        sel = Selector(text=body)
+        self.assertEqual(
+            [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
+            [u'First'])
+
+    def test_set_xpathfunc(self):
+
+        def myfunc(ctx):
+            myfunc.call_count += 1
+
+        myfunc.call_count = 0
+
+        body = u"""
+        <p CLASS="foo">First</p>
+        """
+        sel = Selector(text=body)
+        self.assertRaisesRegexp(
+            ValueError, 'Unregistered function in myfunc',
+            sel.xpath, 'myfunc()')
+
+        set_xpathfunc('myfunc', myfunc)
+        sel.xpath('myfunc()')
+        self.assertEqual(myfunc.call_count, 1)
+
+        set_xpathfunc('myfunc', None)
+        self.assertRaisesRegexp(
+            ValueError, 'Unregistered function in myfunc',
+            sel.xpath, 'myfunc()')
diff --git a/tox.ini b/tox.ini
index ae9eee0..7886e16 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27, py33, py34, py35, py36, pypy
+envlist = py27, py34, py35, py36, pypy, pypy3
 
 [testenv]
 deps =

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-parsel.git



More information about the Python-modules-commits mailing list