[Git][debian-gis-team/pooch][upstream] New upstream version 1.9.0

Antonio Valentino (@antonio.valentino) gitlab at salsa.debian.org
Sun Feb 1 21:00:23 GMT 2026



Antonio Valentino pushed to branch upstream at Debian GIS Project / pooch


Commits:
1fe3ff63 by Antonio Valentino at 2026-02-01T20:49:48+00:00
New upstream version 1.9.0
- - - - -


29 changed files:

- .github/workflows/docs.yml
- .github/workflows/pypi.yml
- .github/workflows/style.yml
- .github/workflows/test.yml
- .pylintrc
- AUTHORS.md
- Makefile
- README.md
- doc/api/index.rst
- doc/changes.rst
- doc/compatibility.rst
- doc/conf.py
- doc/install.rst
- doc/registry-files.rst
- doc/versions.rst
- env/requirements-style.txt
- + env/requirements-types.txt
- environment.yml
- pooch/__init__.py
- pooch/core.py
- pooch/downloaders.py
- pooch/hashes.py
- pooch/processors.py
- pooch/tests/test_core.py
- pooch/tests/test_downloaders.py
- pooch/tests/test_hashes.py
- + pooch/typing/__init__.py
- pooch/utils.py
- pyproject.toml


Changes:

=====================================
.github/workflows/docs.yml
=====================================
@@ -36,13 +36,13 @@ jobs:
       # We pin the commit hash corresponding to v0.5.0, and not pinning the tag
       # because we are giving full access through the github.token.
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action at 0.12.1
+        uses: styfle/cancel-workflow-action at 0.13.0
         with:
           access_token: ${{ github.token }}
 
       # Checks-out your repository under $GITHUB_WORKSPACE
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           # Need to fetch more than the last commit so that setuptools-scm can
           # create the correct version string. If the number of commits since
@@ -58,7 +58,7 @@ jobs:
         run: git fetch origin 'refs/tags/*:refs/tags/*'
 
       - name: Setup Python
-        uses: actions/setup-python at v5
+        uses: actions/setup-python at v6
         with:
           python-version: "3.x"
 
@@ -83,10 +83,11 @@ jobs:
       - name: Get the pip cache folder
         id: pip-cache
         run: |
-          echo "::set-output name=dir::$(pip cache dir)"
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
+
 
       - name: Setup caching for pip packages
-        uses: actions/cache at v4
+        uses: actions/cache at v5
         with:
           path: ${{ steps.pip-cache.outputs.dir }}
           key: ${{ runner.os }}-pip-${{ hashFiles('requirements-full.txt') }}
@@ -113,7 +114,7 @@ jobs:
 
       # Store the docs as a build artifact so we can deploy it later
       - name: Upload HTML documentation as an artifact
-        uses: actions/upload-artifact at v4
+        uses: actions/upload-artifact at v6
         with:
           name: docs-${{ github.sha }}
           path: doc/_build/html
@@ -127,17 +128,17 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
 
       # Fetch the built docs from the "build" job
       - name: Download HTML documentation artifact
-        uses: actions/download-artifact at v4
+        uses: actions/download-artifact at v7
         with:
           name: docs-${{ github.sha }}
           path: doc/_build/html
 
       - name: Checkout the gh-pages branch in a separate folder
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           ref: gh-pages
           # Checkout to this folder instead of the current one


=====================================
.github/workflows/pypi.yml
=====================================
@@ -29,7 +29,7 @@ jobs:
     steps:
       # Checks-out your repository under $GITHUB_WORKSPACE
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           # Need to fetch more than the last commit so that setuptools_scm can
           # create the correct version string. If the number of commits since
@@ -45,7 +45,7 @@ jobs:
         run: git fetch origin 'refs/tags/*:refs/tags/*'
 
       - name: Setup Python
-        uses: actions/setup-python at v5
+        uses: actions/setup-python at v6
         with:
           python-version: "3.x"
 
@@ -79,7 +79,7 @@ jobs:
       - name: Upload archives as artifacts
         # Only if not a pull request
         if: success() && github.event_name != 'pull_request'
-        uses: actions/upload-artifact at v4
+        uses: actions/upload-artifact at v6
         with:
           name: pypi-${{ github.sha }}
           path: dist
@@ -98,7 +98,7 @@ jobs:
 
     steps:
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           # The GitHub token is preserved by default but this job doesn't need
           # to be able to push to GitHub.
@@ -106,7 +106,7 @@ jobs:
 
       # Fetch the built archives from the "build" job
       - name: Download built archives artifact
-        uses: actions/download-artifact at v4
+        uses: actions/download-artifact at v7
         with:
           name: pypi-${{ github.sha }}
           path: dist
@@ -114,7 +114,7 @@ jobs:
       - name: Publish to Test PyPI
         # Only publish to TestPyPI when a PR is merged (pushed to main)
         if: success() && github.event_name == 'push'
-        uses: pypa/gh-action-pypi-publish at v1.8.14
+        uses: pypa/gh-action-pypi-publish at v1.13.0
         with:
           repository_url: https://test.pypi.org/legacy/
           # Allow existing releases on test PyPI without errors.
@@ -124,4 +124,4 @@ jobs:
       - name: Publish to PyPI
         # Only publish to PyPI when a release triggers the build
         if: success() && github.event_name == 'release'
-        uses: pypa/gh-action-pypi-publish at v1.8.14
+        uses: pypa/gh-action-pypi-publish at v1.13.0


=====================================
.github/workflows/style.yml
=====================================
@@ -20,12 +20,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           persist-credentials: false
 
       - name: Setup Python
-        uses: actions/setup-python at v5
+        uses: actions/setup-python at v6
         with:
           python-version: "3.10"
 
@@ -42,12 +42,12 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           persist-credentials: false
 
       - name: Setup Python
-        uses: actions/setup-python at v5
+        uses: actions/setup-python at v6
         with:
           python-version: "3.10"
 
@@ -59,3 +59,25 @@ jobs:
 
       - name: Check code style
         run: make check-style lint
+
+  types:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout at v6
+        with:
+          persist-credentials: false
+
+      - name: Setup Python
+        uses: actions/setup-python at v6
+        with:
+          python-version: "3.10"
+
+      - name: Install requirements
+        run: python -m pip install -r env/requirements-types.txt
+
+      - name: List installed packages
+        run: python -m pip freeze
+
+      - name: Check code style
+        run: make check-types


=====================================
.github/workflows/test.yml
=====================================
@@ -17,6 +17,7 @@ on:
   release:
     types:
       - published
+  workflow_dispatch:
 
 # Use bash by default in all jobs
 defaults:
@@ -25,9 +26,9 @@ defaults:
 
 jobs:
   #############################################################################
-  # Run tests and upload to codecov
+  # Run tests
   test:
-    name: ${{ matrix.os }} python=${{ matrix.python }} dependencies=${{ matrix.dependencies }}
+    name: ${{ matrix.os }} python=${{ matrix.python }} dependencies=${{ matrix.dependencies }} network=${{ matrix.network }}
     if: ${{ github.repository_owner == 'fatiando' || github.event_name != 'schedule' }}
     runs-on: ${{ matrix.os }}
     strategy:
@@ -45,19 +46,18 @@ jobs:
           - optional
         include:
           - dependencies: oldest
-            python: "3.7"
+            python: "3.9"
+            network: false
           - dependencies: latest
-            python: "3.11"
+            python: "3.14"
+            network: false
           - dependencies: optional
-            python: "3.11"
-          # test on macos-13 (x86) using oldest dependencies and python 3.7
-          - os: macos-13
-            dependencies: oldest
-            python: "3.7"
-        exclude:
-          # don't test on macos-latest (arm64) with oldest dependencies
-          - os: macos-latest
-            dependencies: oldest
+            python: "3.14"
+            network: false
+          - dependencies: optional
+            python: "3.14"
+            network: true
+            os: ubuntu-latest
     env:
       REQUIREMENTS: env/requirements-build.txt env/requirements-test.txt
       # Used to tag codecov submissions
@@ -66,17 +66,10 @@ jobs:
       DEPENDENCIES: ${{ matrix.dependencies }}
 
     steps:
-      # Cancel any previous run of the test job
-      # We pin the commit hash corresponding to v0.5.0, and not pinning the tag
-      # because we are giving full access through the github.token.
-      - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action at 0.12.1
-        with:
-          access_token: ${{ github.token }}
 
       # Checks-out your repository under $GITHUB_WORKSPACE
       - name: Checkout
-        uses: actions/checkout at v4
+        uses: actions/checkout at v6
         with:
           # Need to fetch more than the last commit so that setuptools-scm can
           # create the correct version string. If the number of commits since
@@ -92,7 +85,7 @@ jobs:
         run: git fetch origin 'refs/tags/*:refs/tags/*'
 
       - name: Setup Python
-        uses: actions/setup-python at v5
+        uses: actions/setup-python at v6
         with:
           python-version: ${{ matrix.python }}
 
@@ -124,10 +117,10 @@ jobs:
       - name: Get the pip cache folder
         id: pip-cache
         run: |
-          echo "::set-output name=dir::$(pip cache dir)"
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
 
       - name: Setup caching for pip packages
-        uses: actions/cache at v4
+        uses: actions/cache at v5
         with:
           path: ${{ steps.pip-cache.outputs.dir }}
           key: ${{ runner.os }}-pip-${{ hashFiles('requirements-full.txt') }}
@@ -150,19 +143,62 @@ jobs:
         run: python -m pip freeze
 
       - name: Run the tests
-        run: make test
+        run: |
+          if [ ${{ matrix.network }} == "true" ]; then
+            make test
+          else
+            make PYTEST_ARGS_EXTRA="-m 'not network'" test
+          fi
 
       - name: Convert coverage report to XML for codecov
         run: coverage xml
 
-      - name: Upload coverage to Codecov
-        uses: codecov/codecov-action at v4
+      - name: Upload coverage report as an artifact
+        uses: actions/upload-artifact at v6
+        with:
+          name: coverage_${{ matrix.os }}_${{ matrix.dependencies }}
+          path: ./coverage.xml
+
+
+  #############################################################################
+  # Upload coverage report to codecov
+  codecov-upload:
+    runs-on: ubuntu-latest
+    needs: test
+
+    steps:
+
+      - name: Checkout
+        uses: actions/checkout at v6
+        with:
+          # Need to fetch more than the last commit so that setuptools-scm can
+          # create the correct version string. If the number of commits since
+          # the last release is greater than this, the version still be wrong.
+          # Increase if necessary.
+          fetch-depth: 100
+          # The GitHub token is preserved by default but this job doesn't need
+          # to be able to push to GitHub.
+          persist-credentials: false
+
+      - name: Download coverage report artifacts
+        # Download coverage reports from every runner.
+        # Maximum coverage is achieved by combining reports from every runner.
+        # Each coverage file will live in its own folder with the same name as
+        # the artifact.
+        uses: actions/download-artifact at v7
+        with:
+          pattern: coverage_*
+
+      - name: List all downloaded artifacts
+        run: ls -l -R .
+
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action at v5
         with:
-          files: ./coverage.xml
-          env_vars: OS,PYTHON,DEPENDENCIES
-          # Don't mark the job as failed if the upload fails for some reason.
-          # It does sometimes but shouldn't be the reason for running
-          # everything again unless something else is broken.
-          fail_ci_if_error: false
+          # Upload all coverage report files
+          files: ./coverage_*/coverage.xml
+          # Fail the job so we know coverage isn't being updated. Otherwise it
+          # can silently drop and we won't know.
+          fail_ci_if_error: true
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}


=====================================
.pylintrc
=====================================
@@ -332,6 +332,9 @@ max-statements=50
 # Minimum number of public methods for a class (see R0903).
 min-public-methods=2
 
+# Increase maximum positional arguemnts
+max-positional-arguments=9
+
 
 [IMPORTS]
 


=====================================
AUTHORS.md
=====================================
@@ -19,7 +19,7 @@ order by last name) and are considered "The Pooch Developers":
 * [Rémi Rampin](https://github.com/remram44) - New York University, USA (ORCID: [0000-0002-0524-2282](https://www.orcid.org/0000-0002-0524-2282))
 * [Clément Robert](https://github.com/neutrinoceros) - Institut de Planétologie et d'Astrophysique de Grenoble, France (ORCID: [0000-0001-8629-7068](https://orcid.org/0000-0001-8629-7068))
 * [Daniel Shapero](https://github.com/danshapero) - Polar Science Center, University of Washington Applied Physics Lab, USA (ORCID: [0000-0002-3651-0649](https://www.orcid.org/0000-0002-3651-0649))
-* [Santiago Soler](https://github.com/santisoler) - CONICET, Argentina; Instituto Geofísico Sismológico Volponi, Universidad Nacional de San Juan, Argentina (ORCID: [0000-0001-9202-5317](https://www.orcid.org/0000-0001-9202-5317))
+* [Santiago Soler](https://github.com/santisoler) - Department of Earth, Ocean and Atmospheric Sciences, University of British Columbia (ORCID: 0000-0001-9202-5317)
 * [Matthew Turk](https://github.com/matthewturk) - University of Illinois at Urbana-Champaign, USA (ORCID: [0000-0002-5294-0198](https://www.orcid.org/0000-0002-5294-0198))
 * [Leonardo Uieda](https://github.com/leouieda) - Universidade de São Paulo, Brazil (ORCID: [0000-0001-6123-9515](https://www.orcid.org/0000-0001-6123-9515))
 * [Antonio Valentino](https://github.com/avalentino)


=====================================
Makefile
=====================================
@@ -1,7 +1,7 @@
 # Build, package, test, and clean
 PROJECT=pooch
 TESTDIR=tmp-test-dir-with-unique-name
-PYTEST_ARGS=--cov-config=../.coveragerc --cov-report=term-missing --cov=$(PROJECT) --doctest-modules -v --pyargs
+PYTEST_ARGS=--cov-config=../.coveragerc --cov-report=term-missing --cov=$(PROJECT) --doctest-modules -v --pyargs $(PYTEST_ARGS_EXTRA)
 LINT_FILES=$(PROJECT)
 CHECK_STYLE=$(PROJECT) doc
 
@@ -34,7 +34,7 @@ format:
 	black $(CHECK_STYLE)
 	burocrata --extension=py $(CHECK_STYLE)
 
-check: check-format check-style
+check: check-format check-style check-types
 
 check-format:
 	black --check $(CHECK_STYLE)
@@ -43,6 +43,9 @@ check-format:
 check-style:
 	flake8 $(CHECK_STYLE)
 
+check-types:
+	mypy $(CHECK_STYLE)
+
 lint:
 	pylint --jobs=0 $(LINT_FILES)
 


=====================================
README.md
=====================================
@@ -4,7 +4,8 @@
 <a href="https://www.fatiando.org/pooch"><strong>Documentation</strong> (latest)</a> •
 <a href="https://www.fatiando.org/pooch/dev"><strong>Documentation</strong> (main branch)</a> •
 <a href="https://github.com/fatiando/pooch/blob/main/CONTRIBUTING.md"><strong>Contributing</strong></a> •
-<a href="https://www.fatiando.org/contact/"><strong>Contact</strong></a>
+<a href="https://www.fatiando.org/contact/"><strong>Contact</strong></a> •
+<a href="https://github.com/orgs/fatiando/discussions"><strong>Ask a question</strong></a>
 </p>
 
 <p align="center">
@@ -42,7 +43,7 @@ Are you a **scientist** or researcher? Pooch can help you too!
 
 ## Projects using Pooch
 
-[SciPy](https://github.com/scipy/scipy), 
+[SciPy](https://github.com/scipy/scipy),
 [scikit-image](https://github.com/scikit-image/scikit-image),
 [xarray](https://github.com/pydata/xarray),
 [Ensaio](https://github.com/fatiando/ensaio),
@@ -77,7 +78,11 @@ Are you a **scientist** or researcher? Pooch can help you too!
 [Py-ART](https://github.com/ARM-DOE/pyart),
 [HyperSpy](https://github.com/hyperspy/hyperspy),
 [RosettaSciIO](https://github.com/hyperspy/rosettasciio),
-[eXSpy](https://github.com/hyperspy/exspy)
+[eXSpy](https://github.com/hyperspy/exspy),
+[SPLASH](https://github.com/Adam-Boesky/astro_SPLASH),
+[xclim](https://github.com/Ouranosinc/xclim),
+[CLISOPS](https://github.com/roocs/clisops),
+[scXpand](https://github.com/yizhak-lab-ccg/scXpand)
 
 
 > If you're using Pooch, **send us a pull request** adding your project to the list.
@@ -116,7 +121,7 @@ For **package developers** including sample data in their projects:
 """
 Module mypackage/datasets.py
 """
-import pkg_resources
+from importlib import resources
 import pandas
 import pooch
 
@@ -151,7 +156,7 @@ GOODBOY = pooch.create(
 # manage large numbers of data files. The registry file should be packaged
 # and distributed with your software.
 GOODBOY.load_registry(
-    pkg_resources.resource_stream("mypackage", "registry.txt")
+    resources.open_text("mypackage", "registry.txt")
 )
 
 # Define functions that your users can call to get back the data in memory


=====================================
doc/api/index.rst
=====================================
@@ -64,3 +64,20 @@ Miscellaneous
     :toctree: generated/
 
     pooch.test
+
+Typing
+------
+
+Custom classes for type annotations.
+This module provides additional `PEP 484 <https://peps.python.org/pep-0484/>`_
+type aliases used in ``pooch``'s codebase.
+
+.. autosummary::
+   :toctree: generated/
+
+    pooch.typing.Action
+    pooch.typing.Downloader
+    pooch.typing.PathType
+    pooch.typing.PathInputType
+    pooch.typing.ParsedURL
+    pooch.typing.Processor


=====================================
doc/changes.rst
=====================================
@@ -3,6 +3,67 @@
 Changelog
 =========
 
+Version 1.9.0
+-------------
+
+Released on: 2026/01/30
+
+DOI: https://doi.org/10.5281/zenodo.18379610
+
+Breaking changes:
+
+* Drop support for Python 3.7 and 3.8 (`#450 <https://github.com/fatiando/pooch/pull/450>`__).
+
+Bug fixes:
+
+* Explicitly pass ``filter`` to ``TarFile.extractall`` on Python >=3.12 (`#458 <https://github.com/fatiando/pooch/pull/458>`__). Pass a ``filter="data"`` argument to ``TarFile.extractall`` to prevent dangerous security issues. The ``filter`` argument was added in Python 3.12, so only pass it on versions greater or equal than that. This change matches the default behaviour that will take place since Python 3.14.
+* Fix TQDM usage (`#465 <https://github.com/fatiando/pooch/pull/465>`__). Newer versions of tqdm behave differently at a terminal vs in a jupyter notebook. Import from ``tqdm.auto`` instead so that the downloader looks right in either a notebook or the terminal.
+* Fix bug in file hashing on FIPS enabled system (`#511 <https://github.com/fatiando/pooch/pull/511>`__). Set ``userforsecurity=False`` on ``hashlib`` hashing algorithms to make FIPS enabled systems happy.
+
+New features:
+
+* Set User-Agent in requests headers for DOI downloaders (`#507 <https://github.com/fatiando/pooch/pull/507>`__). Pass a custom User-Agent when making requests through DOI downloaders in order to bypass limit rates imposed by services like Zenodo to block abusive requests. The can now filter requests coming from Pooch from the rest. Add a global ``REQUESTS_HEADERS`` variable that is used by the ``doi_to_url`` function (which requires to make a request to doi.org to figure out the service provider). Add a new ``headers`` argument to the ``DOIDownloader`` to specifically pass requests headers. By default it’ll use the Pooch’s default user agent.
+* Extend support for Python 3.13 (`#451 <https://github.com/fatiando/pooch/pull/451>`__) and Python 3.14 (`#505 <https://github.com/fatiando/pooch/pull/505>`__).
+* Provide more descriptive errors when DOI request fails (`#477 <https://github.com/fatiando/pooch/pull/477>`__). Raise the ``requests`` response to provide more informative errors when the status code is between 400 and 600.
+
+Maintenance:
+
+* Add testing data to the package distributions (`#421 <https://github.com/fatiando/pooch/pull/421>`__). The test code ``pooch/tests`` is installed but he data in ``pooch/tests/data`` are not. This makes it impossible to run tests on the installed package. Add the appropriate setuptools configuration to make it happen.
+* Move push to codecov to its own job in Actions (`#424 <https://github.com/fatiando/pooch/pull/424>`__). Remove the push to codecov step from the ``test`` job into a new job that depends on the test job. Upload the coverage reports as artifacts after testing, and reuse the artifacts in the new job. Upload all coverage reports in a single push to Codecov to minimize the number of hits.
+* Increase the max positional args allowed by pylint (`#438 <https://github.com/fatiando/pooch/pull/438>`__). Configure ``pylint`` to increase the maximum number of positional arguments allowed in any function or method.
+* Replace usage of ``pkg_resources`` for ``importlib.resources`` (`#449 <https://github.com/fatiando/pooch/pull/449>`__).
+* Add mypy to CI job and type hints for one class. (`#404 <https://github.com/fatiando/pooch/pull/404>`__). Add type hints to ``pooch/core.py`` and create a new ``typing`` submodule for custom type classes, and add it to the API Reference. Run ``mypy`` on CI to perform type checks, and create new targets in the ``Makefile``. Extend the list of dependencies required to run the type checks.
+* Add pytest ``figshare`` mark to tests (`#481 <https://github.com/fatiando/pooch/pull/481>`__). Add a pytest ``figshare`` mark to tests that make requests to Figshare. Such mark allows us to filter tests: use ``pytest -v -m figshare`` to only run tests with that mark, or use ``pytest   -v -m "not figshare`` to run all test but the marked ones.
+* Skip Figshare related tests on Actions under MacOS (`#482 <https://github.com/fatiando/pooch/pull/482>`__). Skip tests marked with ``figshare`` on Actions that use MacOS as runner. Those tests in CI were constantly failing, probably due to too many requests coming from GitHub. Add an optional ``PYTEST_ARGS_EXTRA`` variable to ``Makefile`` that can be used to pass extra arguments to ``pytest``. Skip doctests that download files from Figshare.
+* List requirements to run type checks in new file (`#492 <https://github.com/fatiando/pooch/pull/492>`__). Create a new ``env/requirements-types.txt`` file with the list of required packages to run types checks. This file is used by the GitHub Action workflow that automatically runs the type checks. List new requirements for type checks in ``environment.yml``. Stop ignoring missing imports of ``xxhash`` in ``pyproject.toml``. Ignore type assignment for ``xxhash`` in test file.
+* Fix uploads of coverage reports to codecov (`#496 <https://github.com/fatiando/pooch/pull/496>`__). Checkout the repository in the ``codecov-upload`` job before uploading the coverage reports to codecov.
+* Pin black to v25 (`#506 <https://github.com/fatiando/pooch/pull/506>`__). Pin black version used in the ``environment.yml`` and to run style checks on CI to ``25.*.*`` and ``<26.0.0``, respectively. Since we plan to replace black with Ruff for autoformatting, it’s better to pin for now than reformat it with latest version.
+* Only run tests with network access on some CI jobs (`#484 <https://github.com/fatiando/pooch/pull/484>`__). Our CI is continuously hitting some external network providers which is causing some of them (mostly figshare for now) to block our traffic. This means that our CI fails randomly and it’s annoying. Only run network tests on jobs with the latest Python and optional dependencies installed to try to mitigate this.
+* Use a SPDX expression for license in ``pyproject.toml`` (`#476 <https://github.com/fatiando/pooch/pull/476>`__). Use a SPDX expression for the license in ``pyproject.toml`` and remove the unneeded license classifier. This removes the warnings we were getting after running ``make build``.
+* Add ``Typing :: Typed`` trove classifier (`#472 <https://github.com/fatiando/pooch/pull/472>`__). Allow PyPI users know that Pooch supports type hints.
+* Allow to manually trigger test job in Actions (`#475 <https://github.com/fatiando/pooch/pull/475>`__). Add ``workflow_dispatch`` as an event trigger for the ``test.yml`` workflow.
+* Standardize requests made by ``DOIDownloaders`` (`#514 <https://github.com/fatiando/pooch/pull/514>`__). Respect user’s decisions when defining the ``DOIDownloader`` with respect to arguments passed to ``requests.get`` whenever we call that function. This way, all calls made by ``DOIDownloaders`` and the repository classes make use of the same arguments, including ``timeout``, ``headers``, etc.
+
+Documentation:
+
+* Add a link to the Fatiando Forum in the README (`#461 <https://github.com/fatiando/pooch/pull/461>`__).
+* Add ``scXpand`` (`#488 <https://github.com/fatiando/pooch/pull/488>`__), ``xclim`` (`#445 <https://github.com/fatiando/pooch/pull/445>`__), ``CLISOPS`` (`#445 <https://github.com/fatiando/pooch/pull/445>`__), and ``SPLASH`` (`#432 <https://github.com/fatiando/pooch/pull/432>`__) to list of projects using Pooch.
+
+This release contains contributions from:
+
+* Adam Boesky
+* Antonio Valentino
+* Daniel McCloy
+* Daniel Shapero
+* Eliot Robson
+* Joren Hammudoglu
+* Leonardo Uieda
+* Mridul Seth
+* ofirshorer
+* Santiago Soler
+* Trevor James Smith
+
+
 Version 1.8.2
 -------------
 


=====================================
doc/compatibility.rst
=====================================
@@ -68,4 +68,8 @@ following releases to ensure compatibility:
       - 1.2.0
     * - 3.6
       - 1.6.0
+    * - 3.7
+      - 1.8.2
+    * - 3.8
+      - 1.8.2
 


=====================================
doc/conf.py
=====================================
@@ -44,7 +44,7 @@ intersphinx_mapping = {
 }
 
 # Autosummary pages will be generated by sphinx-autogen instead of sphinx-build
-autosummary_generate = []
+autosummary_generate: list = []
 
 # Otherwise, the Return parameter list looks different from the Parameters list
 napoleon_use_rtype = False
@@ -77,7 +77,7 @@ html_copy_source = True
 html_static_path = ["_static"]
 # CSS files are relative to the static path
 html_css_files = ["style.css"]
-html_extra_path = []
+html_extra_path: list = []
 html_show_sourcelink = False
 html_show_sphinx = True
 html_show_copyright = True


=====================================
doc/install.rst
=====================================
@@ -47,7 +47,7 @@ There are different ways to install Pooch:
 Which Python?
 -------------
 
-You'll need **Python >= 3.7**. See :ref:`python-versions` if you
+You'll need **Python >= 3.9**. See :ref:`python-versions` if you
 require support for older versions.
 
 .. _dependencies:


=====================================
doc/registry-files.rst
=====================================
@@ -13,7 +13,7 @@ hashes in a file and use :meth:`pooch.Pooch.load_registry` to read them.
 .. code:: python
 
     import os
-    import pkg_resources
+    from importlib import resources
 
     POOCH = pooch.create(
         path=pooch.os_cache("plumbus"),
@@ -24,14 +24,15 @@ hashes in a file and use :meth:`pooch.Pooch.load_registry` to read them.
         registry=None,
     )
     # Get registry file from package_data
-    registry_file = pkg_resources.resource_stream("plumbus", "registry.txt")
+    registry_file = resources.open_text("plumbus", "registry.txt")
     # Load this registry file
     POOCH.load_registry(registry_file)
 
 In this case, the ``registry.txt`` file is in the ``plumbus/`` package
 directory and should be shipped with the package (see below for instructions).
-We use `pkg_resources <https://setuptools.readthedocs.io/en/latest/pkg_resources.html#basic-resource-access>`__
-to access the ``registry.txt``, giving it the name of our Python package.
+We use `importlib.resources
+<https://docs.python.org/3/library/importlib.resources.html>`__ to access the
+``registry.txt``, giving it the name of our Python package.
 
 Registry file format
 --------------------


=====================================
doc/versions.rst
=====================================
@@ -7,6 +7,7 @@ Use the links below to access documentation for specific versions
 * `Latest release <https://www.fatiando.org/pooch/latest>`__
 * `Development <https://www.fatiando.org/pooch/dev>`__
   (reflects the current development branch on GitHub)
+* `v1.9.0 <https://www.fatiando.org/pooch/v1.9.0>`__
 * `v1.8.2 <https://www.fatiando.org/pooch/v1.8.2>`__
 * `v1.8.1 <https://www.fatiando.org/pooch/v1.8.1>`__
 * `v1.8.0 <https://www.fatiando.org/pooch/v1.8.0>`__


=====================================
env/requirements-style.txt
=====================================
@@ -1,5 +1,5 @@
-# Style checks
-black
+# Style and type checks
+black<26.0.0
 flake8
 pylint>=2.4
 pathspec


=====================================
env/requirements-types.txt
=====================================
@@ -0,0 +1,9 @@
+# Requirements to run type checks.
+mypy
+types-requests
+types-tqdm
+types-paramiko
+types-xxhash
+pytest                 # Install pytest to provide type stubs
+platformdirs >= 2.5.0  # install plaformdirs to provide type stubs
+packaging >= 20.0      # install packaging to provide type stubs


=====================================
environment.yml
=====================================
@@ -3,7 +3,7 @@ channels:
     - conda-forge
     - defaults
 dependencies:
-    - python==3.11
+    - python==3.14
     - pip
     # Run
     - requests
@@ -12,9 +12,10 @@ dependencies:
     # Optional dependencies
     - tqdm>=4.41.0,<5.0.0
     - paramiko>=2.7.0
-    - xxhash>=1.4.3
+    - python-xxhash>=1.4.3  # in conda-forge python-xxhash is the python pkg
+    - xxhash # this is the xxHash library in conda-forge
     # Build
-    - build
+    - python-build
     # Test
     - pytest
     - pytest-cov
@@ -27,8 +28,14 @@ dependencies:
     - sphinx-design==0.5.*
     # Style
     - pathspec
-    - black>=20.8b1
+    - black==25.*.*
     - flake8
     - pylint>=2.4
+    # Types
+    - mypy
+    - types-requests
+    - types-tqdm
+    - types-paramiko
+    - types-xxhash
     - pip:
       - burocrata


=====================================
pooch/__init__.py
=====================================
@@ -19,7 +19,7 @@ from .downloaders import (
 from .processors import Unzip, Untar, Decompress
 
 # This file is generated automatically by setuptools_scm
-from . import _version
+from . import _version  # type: ignore
 
 
 # Add a "v" to the version number


=====================================
pooch/core.py
=====================================
@@ -7,12 +7,14 @@
 """
 The main Pooch class and a factory function for it.
 """
+
 import os
 import time
 import contextlib
 from pathlib import Path
 import shlex
 import shutil
+from typing import Union, Optional, Any
 
 
 from .hashes import hash_matches, file_hash
@@ -26,17 +28,18 @@ from .utils import (
     unique_file_name,
 )
 from .downloaders import DOIDownloader, choose_downloader, doi_to_repository
+from .typing import PathType, PathInputType, Processor, Downloader, Action
 
 
 def retrieve(
-    url,
-    known_hash,
-    fname=None,
-    path=None,
-    processor=None,
-    downloader=None,
-    progressbar=False,
-):
+    url: str,
+    known_hash: Optional[str] = None,
+    fname: Optional[str] = None,
+    path: Optional[PathType] = None,
+    processor: Optional[Processor] = None,
+    downloader: Optional[Downloader] = None,
+    progressbar: bool = False,
+) -> str:
     """
     Download and cache a single file locally.
 
@@ -254,15 +257,15 @@ def retrieve(
 
 
 def create(
-    path,
-    base_url,
-    version=None,
-    version_dev="master",
-    env=None,
-    registry=None,
-    urls=None,
-    retry_if_failed=0,
-    allow_updates=True,
+    path: PathInputType,
+    base_url: str,
+    version: Optional[str] = None,
+    version_dev: str = "master",
+    env: Optional[str] = None,
+    registry: Optional[dict] = None,
+    urls: Optional[dict] = None,
+    retry_if_failed: int = 0,
+    allow_updates: Union[bool, str] = True,
 ):
     """
     Create a :class:`~pooch.Pooch` with sensible defaults to fetch data files.
@@ -479,13 +482,13 @@ class Pooch:
 
     def __init__(
         self,
-        path,
-        base_url,
-        registry=None,
-        urls=None,
-        retry_if_failed=0,
-        allow_updates=True,
-    ):
+        path: PathType,
+        base_url: str,
+        registry: Optional[dict[str, str]] = None,
+        urls: Optional[dict[str, str]] = None,
+        retry_if_failed: int = 0,
+        allow_updates: bool = True,
+    ) -> None:
         self.path = path
         self.base_url = base_url
         if registry is None:
@@ -498,16 +501,22 @@ class Pooch:
         self.allow_updates = allow_updates
 
     @property
-    def abspath(self):
+    def abspath(self) -> Path:
         "Absolute path to the local storage"
         return Path(os.path.abspath(os.path.expanduser(str(self.path))))
 
     @property
-    def registry_files(self):
+    def registry_files(self) -> list[str]:
         "List of file names on the registry"
         return list(self.registry)
 
-    def fetch(self, fname, processor=None, downloader=None, progressbar=False):
+    def fetch(
+        self,
+        fname: str,
+        processor: Optional[Processor] = None,
+        downloader: Optional[Downloader] = None,
+        progressbar: bool = False,
+    ) -> str:
         """
         Get the absolute path to a file in the local storage.
 
@@ -600,7 +609,7 @@ class Pooch:
 
         return str(full_path)
 
-    def _assert_file_in_registry(self, fname):
+    def _assert_file_in_registry(self, fname: str) -> None:
         """
         Check if a file is in the registry and raise :class:`ValueError` if
         it's not.
@@ -608,7 +617,7 @@ class Pooch:
         if fname not in self.registry:
             raise ValueError(f"File '{fname}' is not in the registry.")
 
-    def get_url(self, fname):
+    def get_url(self, fname: str) -> str:
         """
         Get the full URL to download a file in the registry.
 
@@ -622,7 +631,7 @@ class Pooch:
         self._assert_file_in_registry(fname)
         return self.urls.get(fname, "".join([self.base_url, fname]))
 
-    def load_registry(self, fname):
+    def load_registry(self, fname: PathType) -> None:
         """
         Load entries from a file and add them to the registry.
 
@@ -644,7 +653,7 @@ class Pooch:
         with contextlib.ExitStack() as stack:
             if hasattr(fname, "read"):
                 # It's a file object
-                fin = fname
+                fin: Any = fname
             else:
                 # It's a file path
                 fin = stack.enter_context(open(fname, encoding="utf-8"))
@@ -673,7 +682,7 @@ class Pooch:
                         self.urls[file_name] = file_url
                     self.registry[file_name] = file_checksum.lower()
 
-    def load_registry_from_doi(self):
+    def load_registry_from_doi(self) -> None:
         """
         Populate the registry using the data repository API
 
@@ -698,12 +707,17 @@ class Pooch:
 
         # Create a repository instance
         doi = self.base_url.replace("doi:", "")
-        repository = doi_to_repository(doi)
+        repository = doi_to_repository(
+            doi,
+            headers=downloader.headers,
+            timeout=downloader.timeout,
+            **downloader.kwargs,
+        )
 
         # Call registry population for this repository
         return repository.populate_registry(self)
 
-    def is_available(self, fname, downloader=None):
+    def is_available(self, fname: str, downloader: Optional[Downloader] = None):
         """
         Check availability of a remote file without downloading it.
 
@@ -740,7 +754,7 @@ class Pooch:
         return available
 
 
-def download_action(path, known_hash):
+def download_action(path: Path, known_hash: Optional[str]) -> tuple[Action, str]:
     """
     Determine the action that is needed to get the file on disk.
 
@@ -767,18 +781,20 @@ def download_action(path, known_hash):
 
     """
     if not path.exists():
-        action = "download"
-        verb = "Downloading"
-    elif not hash_matches(str(path), known_hash):
-        action = "update"
-        verb = "Updating"
-    else:
-        action = "fetch"
-        verb = "Fetching"
-    return action, verb
-
-
-def stream_download(url, fname, known_hash, downloader, pooch=None, retry_if_failed=0):
+        return "download", "Downloading"
+    if not hash_matches(str(path), known_hash):
+        return "update", "Updating"
+    return "fetch", "Fetching"
+
+
+def stream_download(
+    url: str,
+    fname: Path,
+    known_hash: Optional[str],
+    downloader: Downloader,
+    pooch: Optional[Pooch] = None,
+    retry_if_failed: int = 0,
+) -> None:
     """
     Stream the file and check that its hash matches the known one.
 


=====================================
pooch/downloaders.py
=====================================
@@ -14,16 +14,19 @@ import ftplib
 import warnings
 
 from .utils import parse_url
+from ._version import __version__  # type: ignore[import-not-found]
 
+# Mypy doesn't like assigning None like this.
+# Can just use a guard variable
 try:
-    from tqdm import tqdm
+    from tqdm.auto import tqdm
 except ImportError:
-    tqdm = None
+    tqdm = None  # type: ignore
 
 try:
     import paramiko
 except ImportError:
-    paramiko = None
+    paramiko = None  # type: ignore
 
 
 # Set the default timeout in seconds so it can be configured in a pinch for the
@@ -31,6 +34,13 @@ except ImportError:
 # See https://github.com/fatiando/pooch/issues/409
 DEFAULT_TIMEOUT = 30
 
+# Define headers that will be used by DOI downloaders when making requests.
+# Setting the user agent can bypass limit rates imposed by some services
+# like Zenodo (see #502).
+REQUESTS_HEADERS = {
+    "User-Agent": f"pooch{__version__} (https://www.fatiando.org/pooch)",
+}
+
 
 def choose_downloader(url, progressbar=False):
     """
@@ -545,9 +555,13 @@ class DOIDownloader:  # pylint: disable=too-few-public-methods
         (stderr). Requires `tqdm <https://github.com/tqdm/tqdm>`__ to be
         installed. Alternatively, an arbitrary progress bar object can be
         passed. See :ref:`custom-progressbar` for details.
-    chunk_size : int
+    chunk_size : int, optional
         Files are streamed *chunk_size* bytes at a time instead of loading
         everything into memory at one. Usually doesn't need to be changed.
+    headers : dict or None, optional
+        Headers that will be passed to :func:`requests.get`.
+        If None, default headers containing Pooch's user agent will be used.
+        If no headers should be used, pass an empty dictionary.
     **kwargs
         All keyword arguments given when creating an instance of this class
         will be passed to :func:`requests.get`.
@@ -562,31 +576,44 @@ class DOIDownloader:  # pylint: disable=too-few-public-methods
     >>> downloader = DOIDownloader()
     >>> url = "doi:10.6084/m9.figshare.14763051.v1/tiny-data.txt"
     >>> # Not using with Pooch.fetch so no need to pass an instance of Pooch
-    >>> downloader(url=url, output_file="tiny-data.txt", pooch=None)
-    >>> os.path.exists("tiny-data.txt")
+    >>> downloader(
+    ...     url=url, output_file="tiny-data.txt", pooch=None
+    ... ) # doctest: +SKIP
+    >>> os.path.exists("tiny-data.txt") # doctest: +SKIP
     True
-    >>> with open("tiny-data.txt") as f:
+    >>> with open("tiny-data.txt") as f: # doctest: +SKIP
     ...     print(f.read().strip())
     # A tiny data file for test purposes only
     1  2  3  4  5  6
-    >>> os.remove("tiny-data.txt")
+    >>> os.remove("tiny-data.txt") # doctest: +SKIP
 
     Same thing but for our Zenodo archive:
 
     >>> url = "doi:10.5281/zenodo.4924875/tiny-data.txt"
-    >>> downloader(url=url, output_file="tiny-data.txt", pooch=None)
-    >>> os.path.exists("tiny-data.txt")
+    >>> downloader(
+    ...     url=url, output_file="tiny-data.txt", pooch=None
+    ... ) # doctest: +SKIP
+    >>> os.path.exists("tiny-data.txt") # doctest: +SKIP
     True
-    >>> with open("tiny-data.txt") as f:
+    >>> with open("tiny-data.txt") as f: # doctest: +SKIP
     ...     print(f.read().strip())
     # A tiny data file for test purposes only
     1  2  3  4  5  6
-    >>> os.remove("tiny-data.txt")
+    >>> os.remove("tiny-data.txt") # doctest: +SKIP
 
     """
 
-    def __init__(self, progressbar=False, chunk_size=1024, **kwargs):
+    def __init__(
+        self,
+        progressbar=False,
+        chunk_size=1024,
+        headers=None,
+        timeout=DEFAULT_TIMEOUT,
+        **kwargs,
+    ):
         self.kwargs = kwargs
+        self.headers = headers if headers is not None else REQUESTS_HEADERS
+        self.timeout = timeout
         self.progressbar = progressbar
         self.chunk_size = chunk_size
 
@@ -611,7 +638,13 @@ class DOIDownloader:  # pylint: disable=too-few-public-methods
         """
 
         parsed_url = parse_url(url)
-        data_repository = doi_to_repository(parsed_url["netloc"])
+
+        data_repository = doi_to_repository(
+            parsed_url["netloc"],
+            headers=self.headers,
+            timeout=self.timeout,
+            **self.kwargs,
+        )
 
         # Resolve the URL
         file_name = parsed_url["path"]
@@ -622,12 +655,16 @@ class DOIDownloader:  # pylint: disable=too-few-public-methods
 
         # Instantiate the downloader object
         downloader = HTTPDownloader(
-            progressbar=self.progressbar, chunk_size=self.chunk_size, **self.kwargs
+            progressbar=self.progressbar,
+            chunk_size=self.chunk_size,
+            headers=self.headers,
+            timeout=self.timeout,
+            **self.kwargs,
         )
         downloader(download_url, output_file, pooch)
 
 
-def doi_to_url(doi):
+def doi_to_url(doi, **kwargs):
     """
     Follow a DOI link to resolve the URL of the archive.
 
@@ -635,6 +672,8 @@ def doi_to_url(doi):
     ----------
     doi : str
         The DOI of the archive.
+    **kwargs
+        All keyword arguments will be passed to :func:`requests.get`.
 
     Returns
     -------
@@ -646,16 +685,17 @@ def doi_to_url(doi):
     import requests  # pylint: disable=C0415
 
     # Use doi.org to resolve the DOI to the repository website.
-    response = requests.get(f"https://doi.org/{doi}", timeout=DEFAULT_TIMEOUT)
+    response = requests.get(
+        f"https://doi.org/{doi}",
+        **kwargs,
+    )
     url = response.url
     if 400 <= response.status_code < 600:
-        raise ValueError(
-            f"Archive with doi:{doi} not found (see {url}). Is the DOI correct?"
-        )
+        response.raise_for_status()
     return url
 
 
-def doi_to_repository(doi):
+def doi_to_repository(doi, **kwargs):
     """
     Instantiate a data repository instance from a given DOI.
 
@@ -666,6 +706,10 @@ def doi_to_repository(doi):
     ----------
     doi : str
         The DOI of the archive.
+    **kwargs
+        All keyword arguments will be passed also as ``**kwargs`` to the
+        :meth:`DataRepository.initialize` method, that will ultimately get
+        passed to :func:`requests.get`.
 
     Returns
     -------
@@ -686,7 +730,7 @@ def doi_to_repository(doi):
     ]
 
     # Extract the DOI and the repository information
-    archive_url = doi_to_url(doi)
+    archive_url = doi_to_url(doi, **kwargs)
 
     # Try the converters one by one until one of them returned a URL
     data_repository = None
@@ -695,6 +739,7 @@ def doi_to_repository(doi):
             data_repository = repo.initialize(
                 archive_url=archive_url,
                 doi=doi,
+                **kwargs,
             )
 
     if data_repository is None:
@@ -710,7 +755,7 @@ def doi_to_repository(doi):
 
 class DataRepository:  # pylint: disable=too-few-public-methods, missing-class-docstring
     @classmethod
-    def initialize(cls, doi, archive_url):  # pylint: disable=unused-argument
+    def initialize(cls, doi, archive_url, **kwargs):  # pylint: disable=unused-argument
         """
         Initialize the data repository if the given URL points to a
         corresponding repository.
@@ -764,14 +809,15 @@ class DataRepository:  # pylint: disable=too-few-public-methods, missing-class-d
 class ZenodoRepository(DataRepository):  # pylint: disable=missing-class-docstring
     base_api_url = "https://zenodo.org/api/records"
 
-    def __init__(self, doi, archive_url):
+    def __init__(self, doi, archive_url, **kwargs):
         self.archive_url = archive_url
         self.doi = doi
         self._api_response = None
         self._api_version = None
+        self.kwargs = kwargs
 
     @classmethod
-    def initialize(cls, doi, archive_url):
+    def initialize(cls, doi, archive_url, **kwargs):
         """
         Initialize the data repository if the given URL points to a
         corresponding repository.
@@ -787,6 +833,9 @@ class ZenodoRepository(DataRepository):  # pylint: disable=missing-class-docstri
             The DOI that identifies the repository
         archive_url : str
             The resolved URL for the DOI
+        **kwargs
+            All keyword arguments given when creating an instance of this class
+            will be passed to :func:`requests.get`.
         """
 
         # Check whether this is a Zenodo URL
@@ -794,7 +843,7 @@ class ZenodoRepository(DataRepository):  # pylint: disable=missing-class-docstri
         if parsed_archive_url["netloc"] != "zenodo.org":
             return None
 
-        return cls(doi, archive_url)
+        return cls(doi, archive_url, **kwargs)
 
     @property
     def api_response(self):
@@ -805,8 +854,7 @@ class ZenodoRepository(DataRepository):  # pylint: disable=missing-class-docstri
 
             article_id = self.archive_url.split("/")[-1]
             self._api_response = requests.get(
-                f"{self.base_api_url}/{article_id}",
-                timeout=DEFAULT_TIMEOUT,
+                f"{self.base_api_url}/{article_id}", **self.kwargs
             ).json()
 
         return self._api_response
@@ -915,13 +963,14 @@ class ZenodoRepository(DataRepository):  # pylint: disable=missing-class-docstri
 
 
 class FigshareRepository(DataRepository):  # pylint: disable=missing-class-docstring
-    def __init__(self, doi, archive_url):
+    def __init__(self, doi, archive_url, **kwargs):
         self.archive_url = archive_url
         self.doi = doi
         self._api_response = None
+        self.kwargs = kwargs
 
     @classmethod
-    def initialize(cls, doi, archive_url):
+    def initialize(cls, doi, archive_url, **kwargs):
         """
         Initialize the data repository if the given URL points to a
         corresponding repository.
@@ -937,6 +986,9 @@ class FigshareRepository(DataRepository):  # pylint: disable=missing-class-docst
             The DOI that identifies the repository
         archive_url : str
             The resolved URL for the DOI
+        **kwargs
+            All keyword arguments given when creating an instance of this class
+            will be passed to :func:`requests.get`.
         """
 
         # Check whether this is a Figshare URL
@@ -944,7 +996,7 @@ class FigshareRepository(DataRepository):  # pylint: disable=missing-class-docst
         if parsed_archive_url["netloc"] != "figshare.com":
             return None
 
-        return cls(doi, archive_url)
+        return cls(doi, archive_url, **kwargs)
 
     def _parse_version_from_doi(self):
         """
@@ -972,7 +1024,7 @@ class FigshareRepository(DataRepository):  # pylint: disable=missing-class-docst
             # Use the figshare API to find the article ID from the DOI
             article = requests.get(
                 f"https://api.figshare.com/v2/articles?doi={self.doi}",
-                timeout=DEFAULT_TIMEOUT,
+                **self.kwargs,
             ).json()[0]
             article_id = article["id"]
             # Parse desired version from the doi
@@ -999,7 +1051,7 @@ class FigshareRepository(DataRepository):  # pylint: disable=missing-class-docst
                     f"{article_id}/versions/{version}"
                 )
             # Make the request and return the files in the figshare repository
-            response = requests.get(api_url, timeout=DEFAULT_TIMEOUT)
+            response = requests.get(api_url, **self.kwargs)
             response.raise_for_status()
             self._api_response = response.json()["files"]
 
@@ -1043,13 +1095,14 @@ class FigshareRepository(DataRepository):  # pylint: disable=missing-class-docst
 
 
 class DataverseRepository(DataRepository):  # pylint: disable=missing-class-docstring
-    def __init__(self, doi, archive_url):
+    def __init__(self, doi, archive_url, **kwargs):
         self.archive_url = archive_url
         self.doi = doi
         self._api_response = None
+        self.kwargs = kwargs
 
     @classmethod
-    def initialize(cls, doi, archive_url):
+    def initialize(cls, doi, archive_url, **kwargs):
         """
         Initialize the data repository if the given URL points to a
         corresponding repository.
@@ -1065,21 +1118,24 @@ class DataverseRepository(DataRepository):  # pylint: disable=missing-class-docs
             The DOI that identifies the repository
         archive_url : str
             The resolved URL for the DOI
+        **kwargs
+            All keyword arguments given when creating an instance of this class
+            will be passed to :func:`requests.get`.
         """
         # Access the DOI as if this was a DataVerse instance
-        response = cls._get_api_response(doi, archive_url)
+        response = cls._get_api_response(doi, archive_url, **kwargs)
 
         # If we failed, this is probably not a DataVerse instance
         if 400 <= response.status_code < 600:
             return None
 
         # Initialize the repository and overwrite the api response
-        repository = cls(doi, archive_url)
+        repository = cls(doi, archive_url, **kwargs)
         repository.api_response = response
         return repository
 
     @classmethod
-    def _get_api_response(cls, doi, archive_url):
+    def _get_api_response(cls, doi, archive_url, **kwargs):
         """
         Perform the actual API request
 
@@ -1093,7 +1149,7 @@ class DataverseRepository(DataRepository):  # pylint: disable=missing-class-docs
         response = requests.get(
             f"{parsed['protocol']}://{parsed['netloc']}/api/datasets/"
             f":persistentId?persistentId=doi:{doi}",
-            timeout=DEFAULT_TIMEOUT,
+            **kwargs,
         )
         return response
 
@@ -1103,7 +1159,7 @@ class DataverseRepository(DataRepository):  # pylint: disable=missing-class-docs
 
         if self._api_response is None:
             self._api_response = self._get_api_response(
-                self.doi, self.archive_url
+                self.doi, self.archive_url, **self.kwargs
             )  # pragma: no cover
 
         return self._api_response


=====================================
pooch/hashes.py
=====================================
@@ -78,7 +78,13 @@ def file_hash(fname, alg="sha256"):
         )
     # Calculate the hash in chunks to avoid overloading the memory
     chunksize = 65536
-    hasher = ALGORITHMS_AVAILABLE[alg]()
+    # For hashlib algorithms, use usedforsecurity=False to support FIPS-enabled
+    # systems. xxhash algorithms don't support this parameter.
+    hasher = (
+        ALGORITHMS_AVAILABLE[alg](usedforsecurity=False)
+        if alg in hashlib.algorithms_available
+        else ALGORITHMS_AVAILABLE[alg]()
+    )
     with open(fname, "rb") as fin:
         buff = fin.read(chunksize)
         while buff:


=====================================
pooch/processors.py
=====================================
@@ -8,12 +8,14 @@
 """
 Post-processing hooks
 """
+
 import abc
 import os
 import bz2
 import gzip
 import lzma
 import shutil
+import sys
 from zipfile import ZipFile
 from tarfile import TarFile
 
@@ -253,13 +255,14 @@ class Untar(ExtractorProcessor):  # pylint: disable=too-few-public-methods
         This method receives an argument for the archive to extract and the
         destination path.
         """
+        filter_kwarg = {} if sys.version_info < (3, 12) else {"filter": "data"}
         with TarFile.open(fname, "r") as tar_file:
             if self.members is None:
                 get_logger().info(
                     "Untarring contents of '%s' to '%s'", fname, extract_dir
                 )
                 # Unpack all files from the archive into our new folder
-                tar_file.extractall(path=extract_dir)
+                tar_file.extractall(path=extract_dir, **filter_kwarg)
             else:
                 for member in self.members:
                     get_logger().info(
@@ -281,7 +284,9 @@ class Untar(ExtractorProcessor):  # pylint: disable=too-few-public-methods
                         )
                     ]
                     # Extract the data file from within the archive
-                    tar_file.extractall(members=subdir_members, path=extract_dir)
+                    tar_file.extractall(
+                        members=subdir_members, path=extract_dir, **filter_kwarg
+                    )
 
 
 class Decompress:  # pylint: disable=too-few-public-methods


=====================================
pooch/tests/test_core.py
=====================================
@@ -140,7 +140,12 @@ def test_pooch_local(data_dir_mirror):
 @pytest.mark.network
 @pytest.mark.parametrize(
     "url",
-    [BASEURL, FIGSHAREURL, ZENODOURL, DATAVERSEURL],
+    [
+        BASEURL,
+        pytest.param(FIGSHAREURL, marks=pytest.mark.figshare),
+        ZENODOURL,
+        DATAVERSEURL,
+    ],
     ids=["https", "figshare", "zenodo", "dataverse"],
 )
 def test_pooch_custom_url(url):
@@ -166,7 +171,12 @@ def test_pooch_custom_url(url):
 @pytest.mark.network
 @pytest.mark.parametrize(
     "url",
-    [BASEURL, FIGSHAREURL, ZENODOURL, DATAVERSEURL],
+    [
+        BASEURL,
+        pytest.param(FIGSHAREURL, marks=pytest.mark.figshare),
+        ZENODOURL,
+        DATAVERSEURL,
+    ],
     ids=["https", "figshare", "zenodo", "dataverse"],
 )
 def test_pooch_download(url):
@@ -627,7 +637,7 @@ def test_stream_download(fname):
 @pytest.mark.network
 @pytest.mark.parametrize(
     "url",
-    [FIGSHAREURL, ZENODOURL, DATAVERSEURL],
+    [pytest.param(FIGSHAREURL, marks=pytest.mark.figshare), ZENODOURL, DATAVERSEURL],
     ids=["figshare", "zenodo", "dataverse"],
 )
 def test_load_registry_from_doi(url):


=====================================
pooch/tests/test_downloaders.py
=====================================
@@ -12,16 +12,20 @@ import sys
 from tempfile import TemporaryDirectory
 
 import pytest
+from requests import HTTPError
+
+# Mypy doesn't like assigning None like this.
+# Can just use a guard variable
 
 try:
     import tqdm
 except ImportError:
-    tqdm = None
+    tqdm = None  # type: ignore
 
 try:
     import paramiko
 except ImportError:
-    paramiko = None
+    paramiko = None  # type: ignore
 
 from .. import Pooch
 from ..downloaders import (
@@ -34,6 +38,7 @@ from ..downloaders import (
     ZenodoRepository,
     DataverseRepository,
     doi_to_url,
+    REQUESTS_HEADERS,
 )
 from ..processors import Unzip
 from .utils import (
@@ -60,7 +65,7 @@ DATAVERSEURL = pooch_test_dataverse_url()
     "url",
     [
         BASEURL + "tiny-data.txt",  # HTTPDownloader
-        FIGSHAREURL,  # DOIDownloader
+        ZENODOURL,  # DOIDownloader
     ],
 )
 def test_progressbar_kwarg_passed(url):
@@ -100,16 +105,19 @@ def test_invalid_doi_repository():
 @pytest.mark.network
 def test_doi_url_not_found():
     "Should fail if the DOI is not found"
-    with pytest.raises(ValueError) as exc:
+    with pytest.raises(HTTPError):
         doi_to_url(doi="NOTAREALDOI")
-    assert "Is the DOI correct?" in str(exc.value)
 
 
 @pytest.mark.network
 @pytest.mark.parametrize(
     "repository,doi",
     [
-        (FigshareRepository, "10.6084/m9.figshare.14763051.v1"),
+        pytest.param(
+            FigshareRepository,
+            "10.6084/m9.figshare.14763051.v1",
+            marks=pytest.mark.figshare,
+        ),
         (ZenodoRepository, "10.5281/zenodo.4924875"),
         (DataverseRepository, "10.11588/data/TKCFEF"),
     ],
@@ -127,7 +135,7 @@ def test_figshare_url_file_not_found(repository, doi):
 @pytest.mark.network
 @pytest.mark.parametrize(
     "url",
-    [FIGSHAREURL, ZENODOURL, DATAVERSEURL],
+    [pytest.param(FIGSHAREURL, marks=pytest.mark.figshare), ZENODOURL, DATAVERSEURL],
     ids=["figshare", "zenodo", "dataverse"],
 )
 def test_doi_downloader(url):
@@ -161,6 +169,7 @@ def test_zenodo_downloader_with_slash_in_fname():
 
 
 @pytest.mark.network
+ at pytest.mark.figshare
 def test_figshare_unspecified_version():
     """
     Test if passing a Figshare url without a version warns about it, but still
@@ -180,6 +189,7 @@ def test_figshare_unspecified_version():
 
 
 @pytest.mark.network
+ at pytest.mark.figshare
 @pytest.mark.parametrize(
     "version, missing, present",
     [
@@ -266,7 +276,10 @@ def test_downloader_progressbar_fails(downloader):
 @pytest.mark.skipif(tqdm is None, reason="requires tqdm")
 @pytest.mark.parametrize(
     "url,downloader",
-    [(BASEURL, HTTPDownloader), (FIGSHAREURL, DOIDownloader)],
+    [
+        (BASEURL, HTTPDownloader),
+        pytest.param(FIGSHAREURL, DOIDownloader, marks=pytest.mark.figshare),
+    ],
     ids=["http", "figshare"],
 )
 def test_downloader_progressbar(url, downloader, capsys):
@@ -543,3 +556,27 @@ class TestZenodoAPISupport:
         # Populate registry
         downloader.populate_registry(puppy)
         assert puppy.registry == {self.file_name: f"md5:{self.file_checksum}"}
+
+
+class TestDOIDownloaderHeaders:
+    """Test the headers argument in DOIDownloader."""
+
+    def test_default_headers(self):
+        """Test the default value for headers."""
+        downloader = DOIDownloader()
+        assert downloader.headers == REQUESTS_HEADERS
+        downloader = DOIDownloader(headers=None)
+        assert downloader.headers == REQUESTS_HEADERS
+
+    def test_overwrite_headers(self):
+        """Test overwriting for headers."""
+        downloader = DOIDownloader(headers={"custom": "field"})
+        expected_headers = {
+            "custom": "field",
+        }
+        assert downloader.headers == expected_headers
+
+    def test_headers_empty_dict(self):
+        """Test passing an emtpy dict to headers."""
+        downloader = DOIDownloader(headers={})
+        assert downloader.headers == {}


=====================================
pooch/tests/test_hashes.py
=====================================
@@ -19,7 +19,7 @@ try:
 
     XXHASH_MAJOR_VERSION = int(xxhash.VERSION.split(".", maxsplit=1)[0])
 except ImportError:
-    xxhash = None
+    xxhash = None  # type: ignore[assignment]
     XXHASH_MAJOR_VERSION = 0
 
 from ..core import Pooch


=====================================
pooch/typing/__init__.py
=====================================
@@ -0,0 +1,72 @@
+# Copyright (c) 2018 The Pooch Developers.
+# Distributed under the terms of the BSD 3-Clause License.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
+#
+"""
+Custom classes for type annotations
+
+This module provides additional `PEP 484 <https://peps.python.org/pep-0484/>`_
+type aliases used in ``pooch``'s codebase.
+"""
+
+import os
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Literal,
+    Optional,
+    Protocol,
+    TypedDict,
+    Union,
+)
+
+# Import Pooch only if TYPE_CHECKING is true to avoid circular loops at runtime
+if TYPE_CHECKING:
+    from .. import Pooch
+
+
+__all__ = [
+    "Action",
+    "Downloader",
+    "PathType",
+    "PathInputType",
+    "ParsedURL",
+    "Processor",
+]
+
+
+Action = Literal["download", "fetch", "update"]
+PathType = Union[str, os.PathLike]
+PathInputType = Union[PathType, list[PathType], tuple[PathType]]
+Processor = Callable[[str, Action, Optional["Pooch"]], Any]
+
+
+class Downloader(Protocol):
+    """
+    Class used to define the type definition for the downloader function.
+    """
+
+    # pylint: disable=too-few-public-methods
+    def __call__(  # noqa: E704
+        self,
+        fname: str,
+        action: Optional[PathType],
+        pooch: Optional["Pooch"],
+        *,
+        check_only: Optional[bool] = None,
+    ) -> Any: ...
+
+
+class ParsedURL(TypedDict):
+    """
+    Type for a dictionary generated after parsing a URL.
+
+    The dictionary contains three keys: protocol, netloc and path.
+    """
+
+    protocol: str
+    netloc: str
+    path: str


=====================================
pooch/utils.py
=====================================
@@ -7,6 +7,7 @@
 """
 Misc utilities
 """
+
 import logging
 import os
 import tempfile
@@ -15,16 +16,19 @@ from pathlib import Path
 from urllib.parse import urlsplit
 from contextlib import contextmanager
 import warnings
+from typing import Optional, Any, Generator
 
 import platformdirs
 from packaging.version import Version
 
+from .typing import ParsedURL, PathType, PathInputType
+
 
 LOGGER = logging.Logger("pooch")
 LOGGER.addHandler(logging.StreamHandler())
 
 
-def file_hash(*args, **kwargs):
+def file_hash(*args, **kwargs) -> Any:
     """
     WARNING: Importing this function from pooch.utils is DEPRECATED.
     Please import from the top-level namespace (`from pooch import file_hash`)
@@ -54,7 +58,7 @@ def file_hash(*args, **kwargs):
     return new_file_hash(*args, **kwargs)
 
 
-def get_logger():
+def get_logger() -> logging.Logger:
     r"""
     Get the default event logger.
 
@@ -70,7 +74,7 @@ def get_logger():
     return LOGGER
 
 
-def os_cache(project):
+def os_cache(project: str) -> Path:
     r"""
     Default cache location based on the operating system.
 
@@ -99,7 +103,7 @@ def os_cache(project):
     return Path(platformdirs.user_cache_dir(project))
 
 
-def check_version(version, fallback="master"):
+def check_version(version: str, fallback: str = "master") -> str:
     """
     Check if a version is PEP440 compliant and there are no unreleased changes.
 
@@ -145,7 +149,7 @@ def check_version(version, fallback="master"):
     return version
 
 
-def parse_url(url):
+def parse_url(url: str) -> ParsedURL:
     """
     Parse a URL into 3 components:
 
@@ -198,7 +202,9 @@ def parse_url(url):
     return {"protocol": protocol, "netloc": netloc, "path": path}
 
 
-def cache_location(path, env=None, version=None):
+def cache_location(
+    path: PathInputType, env: Optional[str] = None, version: Optional[str] = None
+) -> Path:
     """
     Location of the cache given a base path and optional configuration.
 
@@ -235,7 +241,7 @@ def cache_location(path, env=None, version=None):
     return Path(path)
 
 
-def make_local_storage(path, env=None):
+def make_local_storage(path: PathType, env: Optional[str] = None) -> None:
     """
     Create the local cache directory and make sure it's writable.
 
@@ -277,7 +283,7 @@ def make_local_storage(path, env=None):
 
 
 @contextmanager
-def temporary_file(path=None):
+def temporary_file(path: Optional[PathType] = None) -> Generator[str, None, None]:
     """
     Create a closed and named temporary file and make sure it's cleaned up.
 
@@ -297,7 +303,7 @@ def temporary_file(path=None):
         The path to the temporary file.
 
     """
-    tmp = tempfile.NamedTemporaryFile(delete=False, dir=path)
+    tmp = tempfile.NamedTemporaryFile(delete=False, dir=path)  # type: ignore
     # Close the temp file so that it can be opened elsewhere
     tmp.close()
     try:
@@ -307,7 +313,7 @@ def temporary_file(path=None):
             os.remove(tmp.name)
 
 
-def unique_file_name(url):
+def unique_file_name(url: str) -> str:
     """
     Create a unique file name based on the given URL.
 
@@ -341,7 +347,7 @@ def unique_file_name(url):
     181a9d52e908219c2076f55145d6a344-data.txt.gz
 
     """
-    md5 = hashlib.md5(url.encode()).hexdigest()
+    md5 = hashlib.md5(url.encode(), usedforsecurity=False).hexdigest()
     fname = parse_url(url)["path"].split("/")[-1]
     # Crop the start of the file name to fit 255 characters including the hash
     # and the :


=====================================
pyproject.toml
=====================================
@@ -9,25 +9,26 @@ maintainers = [
   {name = "Leonardo Uieda", email = "leo at uieda.com"}
 ]
 readme = "README.md"
-license = {text = "BSD-3-Clause"}
+license = "BSD-3-Clause"
 keywords = ["data", "download", "caching", "http"]
 classifiers = [
     "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Science/Research",
     "Intended Audience :: Developers",
     "Intended Audience :: Education",
-    "License :: OSI Approved :: BSD License",
     "Operating System :: OS Independent",
     "Topic :: Scientific/Engineering",
     "Topic :: Software Development :: Libraries",
+    "Typing :: Typed",
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.7",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
 ]
-requires-python = ">=3.7"
+requires-python = ">=3.9"
 dependencies = [
     "platformdirs >= 2.5.0",
     "packaging >= 20.0",
@@ -38,6 +39,7 @@ dependencies = [
 progress = ["tqdm>=4.41.0,<5.0.0"]
 sftp = ["paramiko>=2.7.0"]
 xxhash = ["xxhash>=1.4.3"]
+test = ["pytest-httpserver", "pytest-localftpserver"]
 
 [project.urls]
 "Documentation" = "https://www.fatiando.org/pooch"
@@ -48,6 +50,11 @@ xxhash = ["xxhash>=1.4.3"]
 [tool.setuptools.packages]
 find = {}  # Scanning implicit namespaces is active by default
 
+[tool.setuptools.package-data]
+"pooch.tests.data" = ["*.txt", "*.zip", "*.gz", "*.xz", "*.bz2"]
+"pooch.tests.data.store" = ["*.txt"]
+"pooch.tests.data.store.subdir" = ["*.txt"]
+
 [build-system]
 requires = ["setuptools>=45", "wheel", "setuptools_scm[toml]>=6.2"]
 build-backend = "setuptools.build_meta"
@@ -60,6 +67,7 @@ write_to =  "pooch/_version.py"
 [tool.pytest.ini_options]
 markers = [
     "network: test requires network access",
+    "figshare: test make request to Figshare",
 ]
 
 [tool.burocrata]



View it on GitLab: https://salsa.debian.org/debian-gis-team/pooch/-/commit/1fe3ff6357f6cbee3deb1562365437a6ef691ed2

-- 
View it on GitLab: https://salsa.debian.org/debian-gis-team/pooch/-/commit/1fe3ff6357f6cbee3deb1562365437a6ef691ed2
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20260201/f90f0e55/attachment-0001.htm>


More information about the Pkg-grass-devel mailing list