[Python-modules-commits] [python-w3lib] 01/01: New upstream version 1.18.0
Michael Fladischer
fladi at moszumanska.debian.org
Tue Aug 8 11:04:01 UTC 2017
This is an automated email from the git hooks/post-receive script.
fladi pushed a commit to branch upstream
in repository python-w3lib.
commit 1d11e20ac1f2de8d17c5a6fc64e97ca967829eb4
Author: Michael Fladischer <FladischerMichael at fladi.at>
Date: Tue Aug 8 10:58:42 2017 +0200
New upstream version 1.18.0
---
LICENSE | 27 +++++
MANIFEST.in | 8 ++
PKG-INFO | 2 +-
docs/Makefile | 153 +++++++++++++++++++++++++++
docs/conf.py | 251 ++++++++++++++++++++++++++++++++++++++++++++
docs/index.rst | 77 ++++++++++++++
docs/make.bat | 190 +++++++++++++++++++++++++++++++++
docs/w3lib.rst | 28 +++++
pytest.ini | 2 +
setup.py | 2 +-
tests/test_url.py | 4 +
tox.ini | 17 +++
w3lib.egg-info/PKG-INFO | 2 +-
w3lib.egg-info/SOURCES.txt | 8 ++
w3lib.egg-info/requires.txt | 2 +-
w3lib/__init__.py | 2 +-
w3lib/url.py | 96 +++++++++--------
17 files changed, 821 insertions(+), 50 deletions(-)
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..b789a76
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) w3lib and Scrapy developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of Scrapy nor the names of its contributors may be used
+ to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/MANIFEST.in b/MANIFEST.in
index fb4a5a7..0fd999a 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,3 +1,11 @@
# Include tests into distribution
recursive-include tests *.py *.txt
+# Include documentation source
+recursive-include docs Makefile make.bat conf.py *.rst
+
+# Miscellaneous assets
+include LICENSE
+include README.rst
+include pytest.ini
+include tox.ini
diff --git a/PKG-INFO b/PKG-INFO
index 98ac7ae..34cbb36 100644
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: w3lib
-Version: 1.17.0
+Version: 1.18.0
Summary: Library of web-related functions
Home-page: https://github.com/scrapy/w3lib
Author: Scrapy project
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..62925a8
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,153 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = _build
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ -rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/w3lib.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/w3lib.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/w3lib"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/w3lib"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..e06a9b4
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,251 @@
+# -*- coding: utf-8 -*-
+#
+# w3lib documentation build configuration file, created by
+# sphinx-quickstart on Sun Jan 26 22:19:38 2014.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys, os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath('..'))
+
+# -- General configuration -----------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.intersphinx',
+ 'sphinx.ext.viewcode',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'w3lib'
+copyright = u'2014, w3lib developers'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The full version, including alpha/beta/rc tags.
+release = '1.18.0'
+# The short X.Y version.
+version = '.'.join(release.split('.')[:2])
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'w3libdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+#'papersize': 'letterpaper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('index', 'w3lib.tex', u'w3lib Documentation',
+ u'w3lib developers', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output --------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('index', 'w3lib', u'w3lib Documentation',
+ [u'w3lib developers'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output ------------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ ('index', 'w3lib', u'w3lib Documentation',
+ u'w3lib developers', 'w3lib', 'One line description of project.',
+ 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+
+# Example configuration for intersphinx: refer to the Python standard library.
+intersphinx_mapping = {'http://docs.python.org/': None}
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..406d494
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,77 @@
+Welcome to w3lib's documentation!
+=================================
+
+Overview
+========
+
+This is a Python library of web-related functions, such as:
+
+* remove comments, or tags from HTML snippets
+* extract base url from HTML snippets
+* translate entites on HTML strings
+* convert raw HTTP headers to dicts and vice-versa
+* construct HTTP auth header
+* converting HTML pages to unicode
+* sanitize urls (like browsers do)
+* extract arguments from urls
+
+The w3lib library is licensed under the BSD license.
+
+Modules
+=======
+
+.. toctree::
+ :maxdepth: 4
+
+ w3lib
+
+Requirements
+============
+
+Python 2.7 or Python 3.3+
+
+Install
+=======
+
+``pip install w3lib``
+
+
+Tests
+=====
+
+`nose`_ is the preferred way to run tests. Just run: ``nosetests`` from the
+root directory to execute tests using the default Python interpreter.
+
+`tox`_ could be used to run tests for all supported Python versions.
+Install it (using 'pip install tox') and then run ``tox`` from
+the root directory - tests will be executed for all available
+Python interpreters.
+
+.. _tox: http://tox.testrun.org
+.. _nose: http://readthedocs.org/docs/nose/en/latest/
+
+
+Changelog
+=========
+
+.. include:: ../NEWS
+ :start-line: 3
+
+History
+-------
+
+The code of w3lib was originally part of the `Scrapy framework`_ but was later
+stripped out of Scrapy, with the aim of make it more reusable and to provide a
+useful library of web functions without depending on Scrapy.
+
+.. _Scrapy framework: http://scrapy.org
+.. _NEWS file: https://github.com/scrapy/w3lib/blob/master/NEWS
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
diff --git a/docs/make.bat b/docs/make.bat
new file mode 100644
index 0000000..688daec
--- /dev/null
+++ b/docs/make.bat
@@ -0,0 +1,190 @@
+ at ECHO OFF
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set BUILDDIR=_build
+set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
+set I18NSPHINXOPTS=%SPHINXOPTS% .
+if NOT "%PAPER%" == "" (
+ set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
+ set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
+)
+
+if "%1" == "" goto help
+
+if "%1" == "help" (
+ :help
+ echo.Please use `make ^<target^>` where ^<target^> is one of
+ echo. html to make standalone HTML files
+ echo. dirhtml to make HTML files named index.html in directories
+ echo. singlehtml to make a single large HTML file
+ echo. pickle to make pickle files
+ echo. json to make JSON files
+ echo. htmlhelp to make HTML files and a HTML help project
+ echo. qthelp to make HTML files and a qthelp project
+ echo. devhelp to make HTML files and a Devhelp project
+ echo. epub to make an epub
+ echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
+ echo. text to make text files
+ echo. man to make manual pages
+ echo. texinfo to make Texinfo files
+ echo. gettext to make PO message catalogs
+ echo. changes to make an overview over all changed/added/deprecated items
+ echo. linkcheck to check all external links for integrity
+ echo. doctest to run all doctests embedded in the documentation if enabled
+ goto end
+)
+
+if "%1" == "clean" (
+ for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
+ del /q /s %BUILDDIR%\*
+ goto end
+)
+
+if "%1" == "html" (
+ %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/html.
+ goto end
+)
+
+if "%1" == "dirhtml" (
+ %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
+ goto end
+)
+
+if "%1" == "singlehtml" (
+ %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
+ goto end
+)
+
+if "%1" == "pickle" (
+ %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can process the pickle files.
+ goto end
+)
+
+if "%1" == "json" (
+ %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can process the JSON files.
+ goto end
+)
+
+if "%1" == "htmlhelp" (
+ %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can run HTML Help Workshop with the ^
+.hhp project file in %BUILDDIR%/htmlhelp.
+ goto end
+)
+
+if "%1" == "qthelp" (
+ %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; now you can run "qcollectiongenerator" with the ^
+.qhcp project file in %BUILDDIR%/qthelp, like this:
+ echo.^> qcollectiongenerator %BUILDDIR%\qthelp\w3lib.qhcp
+ echo.To view the help file:
+ echo.^> assistant -collectionFile %BUILDDIR%\qthelp\w3lib.ghc
+ goto end
+)
+
+if "%1" == "devhelp" (
+ %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished.
+ goto end
+)
+
+if "%1" == "epub" (
+ %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The epub file is in %BUILDDIR%/epub.
+ goto end
+)
+
+if "%1" == "latex" (
+ %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
+ goto end
+)
+
+if "%1" == "text" (
+ %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The text files are in %BUILDDIR%/text.
+ goto end
+)
+
+if "%1" == "man" (
+ %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The manual pages are in %BUILDDIR%/man.
+ goto end
+)
+
+if "%1" == "texinfo" (
+ %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
+ goto end
+)
+
+if "%1" == "gettext" (
+ %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
+ goto end
+)
+
+if "%1" == "changes" (
+ %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.The overview file is in %BUILDDIR%/changes.
+ goto end
+)
+
+if "%1" == "linkcheck" (
+ %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Link check complete; look for any errors in the above output ^
+or in %BUILDDIR%/linkcheck/output.txt.
+ goto end
+)
+
+if "%1" == "doctest" (
+ %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
+ if errorlevel 1 exit /b 1
+ echo.
+ echo.Testing of doctests in the sources finished, look at the ^
+results in %BUILDDIR%/doctest/output.txt.
+ goto end
+)
+
+:end
diff --git a/docs/w3lib.rst b/docs/w3lib.rst
new file mode 100644
index 0000000..a040adc
--- /dev/null
+++ b/docs/w3lib.rst
@@ -0,0 +1,28 @@
+w3lib Package
+=============
+
+:mod:`encoding` Module
+----------------------
+
+.. automodule:: w3lib.encoding
+ :members:
+
+
+:mod:`html` Module
+------------------
+
+.. automodule:: w3lib.html
+ :members:
+
+
+:mod:`http` Module
+------------------
+
+.. automodule:: w3lib.http
+ :members:
+
+:mod:`url` Module
+-----------------
+
+.. automodule:: w3lib.url
+ :members:
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000..515b219
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+doctest_optionflags = ALLOW_UNICODE ALLOW_BYTES
diff --git a/setup.py b/setup.py
index b9937dd..d152b4c 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
name='w3lib',
- version='1.17.0',
+ version='1.18.0',
license='BSD',
description='Library of web-related functions',
author='Scrapy project',
diff --git a/tests/test_url.py b/tests/test_url.py
index 9bb1ea4..0df5bfd 100644
--- a/tests/test_url.py
+++ b/tests/test_url.py
@@ -59,6 +59,10 @@ class UrlTests(unittest.TestCase):
self.assertTrue(isinstance(safe_url_string(b'http://example.com/'), str))
+ def test_safe_url_string_unsafe_chars(self):
+ safeurl = safe_url_string(r"http://localhost:8001/unwise{,},|,\,^,[,],`?|=[]&[]=|")
+ self.assertEqual(safeurl, r"http://localhost:8001/unwise%7B,%7D,|,%5C,%5E,[,],%60?|=[]&[]=|")
+
def test_safe_url_string_with_query(self):
safeurl = safe_url_string(u"http://www.example.com/£?unit=µ")
self.assertTrue(isinstance(safeurl, str))
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..d36b9e8
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,17 @@
+# Tox (http://tox.testrun.org/) is a tool for running tests
+# in multiple virtualenvs. This configuration file will run the
+# test suite on all supported python versions. To use it, "pip install tox"
+# and then run "tox" from this directory.
+
+[tox]
+envlist = py27, pypy, py33, py34, py35, py36
+
+[testenv]
+deps =
+ pytest !=3.1.1, !=3.1.2
+ pytest-cov
+commands =
+ py.test \
+ --doctest-modules \
+ --cov=w3lib --cov-report=term \
+ {posargs:w3lib tests}
diff --git a/w3lib.egg-info/PKG-INFO b/w3lib.egg-info/PKG-INFO
index 98ac7ae..34cbb36 100644
--- a/w3lib.egg-info/PKG-INFO
+++ b/w3lib.egg-info/PKG-INFO
@@ -1,6 +1,6 @@
Metadata-Version: 1.1
Name: w3lib
-Version: 1.17.0
+Version: 1.18.0
Summary: Library of web-related functions
Home-page: https://github.com/scrapy/w3lib
Author: Scrapy project
diff --git a/w3lib.egg-info/SOURCES.txt b/w3lib.egg-info/SOURCES.txt
index afc648b..3781c13 100644
--- a/w3lib.egg-info/SOURCES.txt
+++ b/w3lib.egg-info/SOURCES.txt
@@ -1,7 +1,15 @@
+LICENSE
MANIFEST.in
README.rst
+pytest.ini
setup.cfg
setup.py
+tox.ini
+docs/Makefile
+docs/conf.py
+docs/index.rst
+docs/make.bat
+docs/w3lib.rst
tests/__init__.py
tests/py3-ignores.txt
tests/test_encoding.py
diff --git a/w3lib.egg-info/requires.txt b/w3lib.egg-info/requires.txt
index 1517eeb..58b77b6 100644
--- a/w3lib.egg-info/requires.txt
+++ b/w3lib.egg-info/requires.txt
@@ -1 +1 @@
-six >= 1.4.1
+six>=1.4.1
diff --git a/w3lib/__init__.py b/w3lib/__init__.py
index 0a4c374..41515b9 100644
--- a/w3lib/__init__.py
+++ b/w3lib/__init__.py
@@ -1,3 +1,3 @@
-__version__ = "1.17.0"
+__version__ = "1.18.0"
version_info = tuple(int(v) if v.isdigit() else v
for v in __version__.split('.'))
diff --git a/w3lib/url.py b/w3lib/url.py
index ef3189d..4be74f7 100644
--- a/w3lib/url.py
+++ b/w3lib/url.py
@@ -8,8 +8,9 @@ import os
import re
import posixpath
import warnings
-import six
+import string
from collections import namedtuple
+import six
from six.moves.urllib.parse import (urljoin, urlsplit, urlunsplit,
urldefrag, urlencode, urlparse,
quote, parse_qs, parse_qsl,
@@ -24,51 +25,14 @@ def _quote_byte(error):
codecs.register_error('percentencode', _quote_byte)
+# constants from RFC 3986, Section 2.2 and 2.3
+RFC3986_GEN_DELIMS = b':/?#[]@'
+RFC3986_SUB_DELIMS = b"!$&'()*+,;="
+RFC3986_RESERVED = RFC3986_GEN_DELIMS + RFC3986_SUB_DELIMS
+RFC3986_UNRESERVED = (string.ascii_letters + string.digits + "-._~").encode('ascii')
+EXTRA_SAFE_CHARS = b'|' # see https://github.com/scrapy/w3lib/pull/25
-# Python 2.x urllib.always_safe become private in Python 3.x;
-# its content is copied here
-_ALWAYS_SAFE_BYTES = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
- b'abcdefghijklmnopqrstuvwxyz'
- b'0123456789' b'_.-')
-
-
-def urljoin_rfc(base, ref, encoding='utf-8'):
- r"""
- .. warning::
-
- This function is deprecated and will be removed in future.
- It is not supported with Python 3.
- Please use ``urlparse.urljoin`` instead.
-
- Same as urlparse.urljoin but supports unicode values in base and ref
- parameters (in which case they will be converted to str using the given
- encoding).
-
- Always returns a str.
-
- >>> import w3lib.url
- >>> w3lib.url.urljoin_rfc('http://www.example.com/path/index.html', u'/otherpath/index2.html')
- 'http://www.example.com/otherpath/index2.html'
- >>>
-
- >>> # Note: the following does not work in Python 3
- >>> w3lib.url.urljoin_rfc(b'http://www.example.com/path/index.html', u'fran\u00e7ais/d\u00e9part.htm') # doctest: +SKIP
- 'http://www.example.com/path/fran\xc3\xa7ais/d\xc3\xa9part.htm'
- >>>
-
-
- """
-
- warnings.warn("w3lib.url.urljoin_rfc is deprecated, use urlparse.urljoin instead",
- DeprecationWarning)
-
- str_base = to_bytes(base, encoding)
- str_ref = to_bytes(ref, encoding)
- return urljoin(str_base, str_ref)
-
-_reserved = b';/?:@&=+$|,#' # RFC 3986 (Generic Syntax)
-_unreserved_marks = b"-_.!~*'()" # RFC 3986 sec 2.3
-_safe_chars = _ALWAYS_SAFE_BYTES + b'%' + _reserved + _unreserved_marks
+_safe_chars = RFC3986_RESERVED + RFC3986_UNRESERVED + EXTRA_SAFE_CHARS + b'%'
def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
"""Convert the given URL into a legal URL by escaping unsafe characters
@@ -117,6 +81,7 @@ def safe_url_string(url, encoding='utf8', path_encoding='utf8'):
quote(to_bytes(parts.fragment, encoding), _safe_chars),
))
... 87 lines suppressed ...
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/python-modules/packages/python-w3lib.git
More information about the Python-modules-commits
mailing list