[Python-modules-commits] [python-parsel] 01/01: New upstream version 1.3.1
Michael Fladischer
fladi at moszumanska.debian.org
Tue Jan 9 07:52:18 UTC 2018
This is an automated email from the git hooks/post-receive script.
fladi pushed a commit to branch upstream
in repository python-parsel.
commit e8f599299b8c5c1c3db979c050355c2aea2f7e74
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date: Tue Jan 9 08:43:38 2018 +0100
New upstream version 1.3.1
---
.bumpversion.cfg | 2 +-
.travis.yml | 31 +++++++++-------
NEWS | 31 +++++++++++++---
README.rst | 4 +-
docs/Makefile | 6 +++
docs/usage.rst | 41 ++++++++++++++++++++
parsel/__init__.py | 5 ++-
parsel/selector.py | 2 +
parsel/xpathfuncs.py | 54 +++++++++++++++++++++++++++
release.rst | 3 +-
setup.py | 3 +-
tests/test_selector.py | 2 +
tests/test_xpathfuncs.py | 97 ++++++++++++++++++++++++++++++++++++++++++++++++
tox.ini | 2 +-
14 files changed, 257 insertions(+), 26 deletions(-)
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
index f8747a9..6f8b2b1 100644
--- a/.bumpversion.cfg
+++ b/.bumpversion.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 1.2.0
+current_version = 1.3.1
commit = True
tag = True
tag_name = v{new_version}
diff --git a/.travis.yml b/.travis.yml
index 4beb4ec..61cf27f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,32 +1,37 @@
language: python
+branches:
+ only:
+ - master
+ - /^v\d\.\d+$/
+ - /^v\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/
matrix:
include:
- python: 2.7
env: TOXENV=py27
- python: 2.7
env: TOXENV=pypy
- - python: 3.3
- env: TOXENV=py33
+ - python: 2.7
+ env: TOXENV=pypy3
- python: 3.4
env: TOXENV=py34
- python: 3.5
env: TOXENV=py35
- python: 3.6
env: TOXENV=py36
-
install:
- |
if [ "$TOXENV" = "pypy" ]; then
- export PYENV_ROOT="$HOME/.pyenv"
- if [ -f "$PYENV_ROOT/bin/pyenv" ]; then
- pushd "$PYENV_ROOT" && git pull && popd
- else
- rm -rf "$PYENV_ROOT" && git clone --depth 1 https://github.com/yyuu/pyenv.git "$PYENV_ROOT"
- fi
- # get latest (portable) PyPy from pyenv directly (thanks to natural version sort option -V)
- export PYPY_VERSION=`"$PYENV_ROOT/bin/pyenv" install --list |grep -o -E 'pypy-portable-[0-9][\.0-9]*$' |sort -V |tail -1`
- "$PYENV_ROOT/bin/pyenv" install --skip-existing "$PYPY_VERSION"
- virtualenv --python="$PYENV_ROOT/versions/$PYPY_VERSION/bin/python" "$HOME/virtualenvs/$PYPY_VERSION"
+ export PYPY_VERSION="pypy-5.9-linux_x86_64-portable"
+ wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2"
+ tar -jxf ${PYPY_VERSION}.tar.bz2
+ virtualenv --python="$PYPY_VERSION/bin/pypy" "$HOME/virtualenvs/$PYPY_VERSION"
+ source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
+ fi
+ if [ "$TOXENV" = "pypy3" ]; then
+ export PYPY_VERSION="pypy3.5-5.9-beta-linux_x86_64-portable"
+ wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2"
+ tar -jxf ${PYPY_VERSION}.tar.bz2
+ virtualenv --python="$PYPY_VERSION/bin/pypy3" "$HOME/virtualenvs/$PYPY_VERSION"
source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate"
fi
- pip install -U pip tox twine wheel codecov
diff --git a/NEWS b/NEWS
index 28e2b68..8c65d77 100644
--- a/NEWS
+++ b/NEWS
@@ -3,14 +3,33 @@
History
-------
-1.2.0 (2017-05-XX)
+1.3.1 (2017-12-28)
+------------------
+
+* Fix artifact uploads to pypi.
+
+1.3.0 (2017-12-28)
+------------------
+
+* ``has-class`` XPath extension function;
+* ``parsel.xpathfuncs.set_xpathfunc`` is a simplified way to register
+ XPath extensions;
+* ``Selector.remove_namespaces`` now removes namespace declarations;
+* Python 3.3 support is dropped;
+* ``make htmlview`` command for easier Parsel docs development.
+* CI: PyPy installation is fixed; parsel now runs tests for PyPy3 as well.
+
+
+1.2.0 (2017-05-17)
~~~~~~~~~~~~~~~~~~
-* Add :meth:`~parsel.selector.SelectorList.get` and :meth:`~parsel.selector.SelectorList.getall`
- methods as aliases for :meth:`~parsel.selector.SelectorList.extract_first`
- and :meth:`~parsel.selector.SelectorList.extract` respectively
-* Add default value parameter to :meth:`~parsel.selector.SelectorList.re_first` method
-* Add :meth:`~parsel.selector.Selector.re_first` method to :class:`parsel.selector.Selector` class
+* Add ``SelectorList.get`` and ``SelectorList.getall``
+ methods as aliases for ``SelectorList.extract_first``
+ and ``SelectorList.extract`` respectively
+* Add default value parameter to ``SelectorList.re_first`` method
+* Add ``Selector.re_first`` method
+* Add ``replace_entities`` argument on ``.re()`` and ``.re_first()``
+ to turn off replacing of character entity references
* Bug fix: detect ``None`` result from lxml parsing and fallback with an empty document
* Rearrange XML/HTML examples in the selectors usage docs
* Travis CI:
diff --git a/README.rst b/README.rst
index 8442bde..1ec30aa 100644
--- a/README.rst
+++ b/README.rst
@@ -2,11 +2,13 @@
Parsel
===============================
-.. image:: https://img.shields.io/travis/scrapy/parsel.svg
+.. image:: https://img.shields.io/travis/scrapy/parsel/master.svg
:target: https://travis-ci.org/scrapy/parsel
+ :alt: Build Status
.. image:: https://img.shields.io/pypi/v/parsel.svg
:target: https://pypi.python.org/pypi/parsel
+ :alt: PyPI Version
.. image:: https://img.shields.io/codecov/c/github/scrapy/parsel/master.svg
:target: http://codecov.io/github/scrapy/parsel?branch=master
diff --git a/docs/Makefile b/docs/Makefile
index 2caf2c2..9af9f45 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -2,6 +2,7 @@
#
# You can set these variables from the command line.
+PYTHON = python
SPHINXOPTS =
SPHINXBUILD = sphinx-build
PAPER =
@@ -45,6 +46,7 @@ help:
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
+ @echo " htmlview to view the compiled HTML files in browser"
clean:
rm -rf $(BUILDDIR)/*
@@ -175,3 +177,7 @@ pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
+
+htmlview: html
+ $(PYTHON) -c "import webbrowser, os; webbrowser.open('file://' + \
+ os.path.realpath('_build/html/index.html'))"
diff --git a/docs/usage.rst b/docs/usage.rst
index fa05f58..9108c97 100644
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -450,6 +450,47 @@ inside another ``itemscope``.
.. _regular expressions: http://exslt.org/regexp/index.html
.. _set manipulation: http://exslt.org/set/index.html
+Other XPath extensions
+----------------------
+
+Parsel also defines a sorely missed XPath extension function ``has-class`` that
+returns ``True`` for nodes that have all of the specified HTML classes::
+
+ >>> from parsel import Selector
+ >>> sel = Selector("""
+ ... <p class="foo bar-baz">First</p>
+ ... <p class="foo">Second</p>
+ ... <p class="bar">Third</p>
+ ... <p>Fourth</p>
+ ... """)
+ ...
+ >>> sel = Selector(u"""
+ ... <p class="foo bar-baz">First</p>
+ ... <p class="foo">Second</p>
+ ... <p class="bar">Third</p>
+ ... <p>Fourth</p>
+ ... """)
+ ...
+ >>> sel.xpath('//p[has-class("foo")]')
+ [<Selector xpath='//p[has-class("foo")]' data=u'<p class="foo bar-baz">First</p>'>,
+ <Selector xpath='//p[has-class("foo")]' data=u'<p class="foo">Second</p>'>]
+ >>> sel.xpath('//p[has-class("foo", "bar-baz")]')
+ [<Selector xpath='//p[has-class("foo", "bar-baz")]' data=u'<p class="foo bar-baz">First</p>'>]
+ >>> sel.xpath('//p[has-class("foo", "bar")]')
+ []
+
+So XPath ``//p[has-class("foo", "bar-baz")]`` is roughly equivalent to CSS
+``p.foo.bar-baz``. Please note, that it is slower in most of the cases,
+because it's a pure-Python function that's invoked for every node in question
+whereas the CSS lookup is translated into XPath and thus runs more efficiently,
+so performance-wise its uses are limited to situations that are not easily
+described with CSS selectors.
+
+Parsel also simplifies adding your own XPath extensions.
+
+.. autofunction:: parsel.xpathfuncs.set_xpathfunc
+
+
Some XPath tips
---------------
diff --git a/parsel/__init__.py b/parsel/__init__.py
index 735e62d..1b51f56 100644
--- a/parsel/__init__.py
+++ b/parsel/__init__.py
@@ -5,7 +5,10 @@ or CSS selectors
__author__ = 'Scrapy project'
__email__ = 'info at scrapy.org'
-__version__ = '1.2.0'
+__version__ = '1.3.1'
from parsel.selector import Selector, SelectorList # NOQA
from parsel.csstranslator import css2xpath # NOQA
+from parsel import xpathfuncs # NOQA
+
+xpathfuncs.setup()
diff --git a/parsel/selector.py b/parsel/selector.py
index 33eaede..7b9bdc5 100644
--- a/parsel/selector.py
+++ b/parsel/selector.py
@@ -321,6 +321,8 @@ class Selector(object):
for an in el.attrib.keys():
if an.startswith('{'):
el.attrib[an.split('}', 1)[1]] = el.attrib.pop(an)
+ # remove namespace declarations
+ etree.cleanup_namespaces(self.root)
def __bool__(self):
"""
diff --git a/parsel/xpathfuncs.py b/parsel/xpathfuncs.py
new file mode 100644
index 0000000..777bd32
--- /dev/null
+++ b/parsel/xpathfuncs.py
@@ -0,0 +1,54 @@
+from lxml import etree
+
+from six import string_types
+
+
+def set_xpathfunc(fname, func):
+ """Register a custom extension function to use in XPath expressions.
+
+ The function ``func`` registered under ``fname`` identifier will be called
+ for every matching node, being passed a ``context`` parameter as well as
+ any parameters passed from the corresponding XPath expression.
+
+ If ``func`` is ``None``, the extension function will be removed.
+
+ See more `in lxml documentation`_.
+
+ .. _`in lxml documentation`: http://lxml.de/extensions.html#xpath-extension-functions
+
+ """
+ ns_fns = etree.FunctionNamespace(None)
+ if func is not None:
+ ns_fns[fname] = func
+ else:
+ del ns_fns[fname]
+
+
+def setup():
+ set_xpathfunc('has-class', has_class)
+
+
+def has_class(context, *classes):
+ """has-class function.
+
+ Return True if all ``classes`` are present in element's class attr.
+
+ """
+ if not context.eval_context.get('args_checked'):
+ if not classes:
+ raise ValueError(
+ 'XPath error: has-class must have at least 1 argument')
+ for c in classes:
+ if not isinstance(c, string_types):
+ raise ValueError(
+ 'XPath error: has-class arguments must be strings')
+ context.eval_context['args_checked'] = True
+
+ node_cls = context.context_node.get('class')
+ if node_cls is None:
+ return False
+ node_cls = ' ' + node_cls + ' '
+ for cls in classes:
+ if ' ' + cls + ' ' not in node_cls:
+ return False
+ return True
diff --git a/release.rst b/release.rst
index 72b4341..1f827b7 100644
--- a/release.rst
+++ b/release.rst
@@ -1,7 +1,8 @@
Release procedures
------------------
-* Update NEWS file with the release notes
+* Update NEWS file with the release notes.
+ Review changes using: ``restview --pypi-strict <(cat README.rst NEWS | grep -v ':changelog')``
* Run bumpversion with the proper release type
* Push code and tags to GitHub to trigger build
* Copy release notes to https://github.com/scrapy/parsel/releases
diff --git a/setup.py b/setup.py
index 44b2954..a72a5a9 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ test_requirements = [
setup(
name='parsel',
- version='1.2.0',
+ version='1.3.1',
description="Parsel is a library to extract data from HTML and XML using XPath and CSS selectors",
long_description=readme + '\n\n' + history,
author="Scrapy project",
@@ -48,7 +48,6 @@ setup(
'Programming Language :: Python :: 2',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.3',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
diff --git a/tests/test_selector.py b/tests/test_selector.py
index d36aa27..dcac22b 100644
--- a/tests/test_selector.py
+++ b/tests/test_selector.py
@@ -588,8 +588,10 @@ class SelectorTestCase(unittest.TestCase):
"""
sel = self.sscls(text=xml, type='xml')
self.assertEqual(len(sel.xpath("//link")), 0)
+ self.assertEqual(len(sel.xpath("./namespace::*")), 3)
sel.remove_namespaces()
self.assertEqual(len(sel.xpath("//link")), 2)
+ self.assertEqual(len(sel.xpath("./namespace::*")), 1)
def test_remove_attributes_namespaces(self):
xml = u"""<?xml version="1.0" encoding="UTF-8"?>
diff --git a/tests/test_xpathfuncs.py b/tests/test_xpathfuncs.py
new file mode 100644
index 0000000..29b1e5f
--- /dev/null
+++ b/tests/test_xpathfuncs.py
@@ -0,0 +1,97 @@
+# coding: utf-8
+
+from parsel import Selector
+from parsel.xpathfuncs import set_xpathfunc
+import unittest
+
+
+class XPathFuncsTestCase(unittest.TestCase):
+ def test_has_class_simple(self):
+ body = u"""
+ <p class="foo bar-baz">First</p>
+ <p class="foo">Second</p>
+ <p class="bar">Third</p>
+ <p>Fourth</p>
+ """
+ sel = Selector(text=body)
+ self.assertEqual(
+ [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
+ [u'First', u'Second'])
+ self.assertEqual(
+ [x.extract() for x in sel.xpath('//p[has-class("bar")]/text()')],
+ [u'Third'])
+ self.assertEqual(
+ [x.extract() for x in sel.xpath('//p[has-class("foo","bar")]/text()')],
+ [])
+ self.assertEqual(
+ [x.extract() for x in sel.xpath('//p[has-class("foo","bar-baz")]/text()')],
+ [u'First'])
+
+ def test_has_class_error_no_args(self):
+ body = u"""
+ <p CLASS="foo">First</p>
+ """
+ sel = Selector(text=body)
+ self.assertRaisesRegexp(
+ ValueError, 'has-class must have at least 1 argument',
+ sel.xpath, 'has-class()')
+
+ def test_has_class_error_invalid_arg_type(self):
+ body = u"""
+ <p CLASS="foo">First</p>
+ """
+ sel = Selector(text=body)
+ self.assertRaisesRegexp(
+ ValueError, 'has-class arguments must be strings',
+ sel.xpath, 'has-class(.)')
+
+ def test_has_class_error_invalid_unicode(self):
+ body = u"""
+ <p CLASS="foo">First</p>
+ """
+ sel = Selector(text=body)
+ self.assertRaisesRegexp(
+ ValueError, 'All strings must be XML compatible',
+ sel.xpath, u'has-class("héllö")'.encode('utf-8'))
+
+ def test_has_class_unicode(self):
+ body = u"""
+ <p CLASS="fóó">First</p>
+ """
+ sel = Selector(text=body)
+ self.assertEqual(
+ [x.extract() for x in sel.xpath(u'//p[has-class("fóó")]/text()')],
+ [u'First'])
+
+ def test_has_class_uppercase(self):
+ body = u"""
+ <p CLASS="foo">First</p>
+ """
+ sel = Selector(text=body)
+ self.assertEqual(
+ [x.extract() for x in sel.xpath('//p[has-class("foo")]/text()')],
+ [u'First'])
+
+ def test_set_xpathfunc(self):
+
+ def myfunc(ctx):
+ myfunc.call_count += 1
+
+ myfunc.call_count = 0
+
+ body = u"""
+ <p CLASS="foo">First</p>
+ """
+ sel = Selector(text=body)
+ self.assertRaisesRegexp(
+ ValueError, 'Unregistered function in myfunc',
+ sel.xpath, 'myfunc()')
+
+ set_xpathfunc('myfunc', myfunc)
+ sel.xpath('myfunc()')
+ self.assertEqual(myfunc.call_count, 1)
+
+ set_xpathfunc('myfunc', None)
+ self.assertRaisesRegexp(
+ ValueError, 'Unregistered function in myfunc',
+ sel.xpath, 'myfunc()')
diff --git a/tox.ini b/tox.ini
index ae9eee0..7886e16 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py27, py33, py34, py35, py36, pypy
+envlist = py27, py34, py35, py36, pypy, pypy3
[testenv]
deps =
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-parsel.git
More information about the Python-modules-commits
mailing list