[Git][debian-gis-team/python-geopandas][upstream] New upstream version 0.14.1

Sat Nov 11 13:30:34 GMT 2023


Bas Couwenberg pushed to branch upstream at Debian GIS Project / python-geopandas


Commits:
847b7159 by Bas Couwenberg at 2023-11-11T14:20:23+01:00
New upstream version 0.14.1
- - - - -


18 changed files:

- .github/workflows/tests.yaml
- CHANGELOG.md
- + ci/envs/312-latest-conda-forge.yaml
- ci/envs/39-latest-conda-forge_no_fiona.yaml
- doc/source/docs/user_guide/io.rst
- doc/source/gallery/plotting_basemap_background.ipynb
- doc/source/getting_started/install.rst
- geopandas/_compat.py
- geopandas/_version.py
- geopandas/explore.py
- + geopandas/io/_pyarrow_hotfix.py
- geopandas/io/arrow.py
- geopandas/io/file.py
- + geopandas/io/tests/data/arrow/test_data_v1.0.0.feather
- + geopandas/io/tests/data/arrow/test_data_v1.0.0.parquet
- geopandas/io/tests/test_file.py
- geopandas/tests/test_pandas_methods.py
- geopandas/tools/tests/test_sjoin.py


Changes:

=====================================
.github/workflows/tests.yaml
=====================================
@@ -44,6 +44,7 @@ jobs:
           - ci/envs/310-pd20-conda-forge.yaml
           - ci/envs/310-latest-conda-forge.yaml
           - ci/envs/311-latest-conda-forge.yaml
+          - ci/envs/312-latest-conda-forge.yaml
         include:
           - env: ci/envs/39-latest-conda-forge_no_fiona.yaml
             os: macos-latest


=====================================
CHANGELOG.md
=====================================
@@ -1,5 +1,11 @@
 # Changelog
 
+## Version 0.14.1 (Nov 11, 2023)
+
+- The Parquet and Feather IO functions now support the latest 1.0.0 version
+  of the GeoParquet specification (geoparquet.org) (#2663).
+- Fix `read_parquet` and `read_feather` for [CVE-2023-47248](https://www.cve.org/CVERecord?id=CVE-2023-47248>) (#3070).
+
 ## Version 0.14 (Sep 15, 2023)
 
 GeoPandas will use Shapely 2.0 by default instead of PyGEOS when both Shapely >= 2.0 and


=====================================
ci/envs/312-latest-conda-forge.yaml
=====================================
@@ -0,0 +1,35 @@
+name: test
+channels:
+  - conda-forge
+dependencies:
+  - python=3.12
+  # required
+  - pandas
+  - shapely
+  - fiona
+  - pyproj
+  - packaging
+  # testing
+  - pytest
+  - pytest-cov
+  - pytest-xdist
+  - fsspec
+  # optional
+  - pyogrio
+  # - matplotlib
+  - mapclassify
+  - folium
+  - xyzservices
+  - scipy
+  - geopy
+  # - pointpats
+  - geodatasets
+  # - SQLalchemy>=2
+  # - psycopg2
+  # - libspatialite
+  # - geoalchemy2
+  - pyarrow
+  - pip
+  - pip:
+    - matplotlib
+    - pointpats


=====================================
ci/envs/39-latest-conda-forge_no_fiona.yaml
=====================================
@@ -18,7 +18,7 @@ dependencies:
   # - fsspec  # to have one non-minimal build without fsspec
   # optional
   - rtree
-  - matplotlib
+  - matplotlib>=3.6
   - mapclassify
   - folium
   - xyzservices


=====================================
doc/source/docs/user_guide/io.rst
=====================================
@@ -276,7 +276,7 @@ the spatial information.
 
 .. note::
 
-    This is tracking version 1.0.0-beta.1 of the GeoParquet specification at:
+    This is tracking version 1.0.0 of the GeoParquet specification at:
     https://github.com/opengeospatial/geoparquet.
 
     Previous versions are still supported as well. By default, the latest


=====================================
doc/source/gallery/plotting_basemap_background.ipynb
=====================================
@@ -180,7 +180,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "By default, contextily uses the Stamen Terrain style. We can specify a\n",
+    "By default, contextily uses the OpenStreetMap HOT style. We can specify a\n",
     "different style using ``cx.providers``:\n",
     "\n"
    ]
@@ -192,7 +192,7 @@
    "outputs": [],
    "source": [
     "ax = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor=\"k\")\n",
-    "cx.add_basemap(ax, source=cx.providers.Stamen.TonerLite)\n",
+    "cx.add_basemap(ax, source=cx.providers.CartoDB.Positron)\n",
     "ax.set_axis_off()"
    ]
   },
@@ -220,8 +220,8 @@
    "outputs": [],
    "source": [
     "ax = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor=\"k\")\n",
-    "cx.add_basemap(ax, source=cx.providers.Stamen.TonerLite)\n",
-    "cx.add_basemap(ax, source=cx.providers.Stamen.TonerLabels)"
+    "cx.add_basemap(ax, source=cx.providers.CartoDB.PositronNoLabels)\n",
+    "cx.add_basemap(ax, source=cx.providers.CartoDB.PositronOnlyLabels)"
    ]
   },
   {
@@ -239,8 +239,8 @@
    "outputs": [],
    "source": [
     "ax = df_wm.plot(figsize=(10, 10), alpha=0.5, edgecolor=\"k\")\n",
-    "cx.add_basemap(ax, source=cx.providers.Stamen.Watercolor, zoom=12)\n",
-    "cx.add_basemap(ax, source=cx.providers.Stamen.TonerLabels, zoom=10)"
+    "cx.add_basemap(ax, source=cx.providers.CartoDB.PositronNoLabels, zoom=12)\n",
+    "cx.add_basemap(ax, source=cx.providers.CartoDB.PositronOnlyLabels, zoom=10)"
    ]
   }
  ],


=====================================
doc/source/getting_started/install.rst
=====================================
@@ -92,16 +92,14 @@ as well::
     When using pip to install GeoPandas, you need to make sure that all dependencies are
     installed correctly.
 
-    - `fiona`_ provides binary wheels with the dependencies included for Mac and Linux,
-      but not for Windows. Alternatively, you can install `pyogrio`_ which does
-      have wheels for Windows.
-    - `pyproj`_, `rtree`_, and `shapely`_ provide binary wheels with dependencies included
-      for Mac, Linux, and Windows.
-
-    Depending on your platform, you might need to compile and install their
-    C dependencies manually. We refer to the individual packages for more
-    details on installing those.
-    Using conda (see above) avoids the need to compile the dependencies yourself.
+    Our main dependencies (`shapely`_, `pyproj`_, `fiona`_, `pyogrio`_, `rtree`_) provide binary
+    wheels with dependencies included for Mac, Linux, and Windows.
+
+    However, depending on your platform or Python version, there might be no
+    pre-compiled wheels available, and then you need to compile and install their C
+    dependencies manually. We refer to the individual packages for more details on
+    installing those. Using conda (see above) avoids the need to compile the
+    dependencies yourself.
 
 Installing from source
 ----------------------


=====================================
geopandas/_compat.py
=====================================
@@ -18,6 +18,7 @@ PANDAS_GE_14 = Version(pd.__version__) >= Version("1.4.0rc0")
 PANDAS_GE_15 = Version(pd.__version__) >= Version("1.5.0")
 PANDAS_GE_20 = Version(pd.__version__) >= Version("2.0.0")
 PANDAS_GE_21 = Version(pd.__version__) >= Version("2.1.0")
+PANDAS_GE_22 = Version(pd.__version__) >= Version("2.2.0.dev0")
 
 
 # -----------------------------------------------------------------------------


=====================================
geopandas/_version.py
=====================================
@@ -25,9 +25,9 @@ def get_keywords() -> Dict[str, str]:
     # setup.py/versioneer.py will grep for the variable names, so they must
     # each be defined on a line of their own. _version.py will just call
     # get_keywords().
-    git_refnames = " (HEAD -> main, tag: v0.14.0)"
-    git_full = "0eb2a5ecdc3b7b595e406c9f7bf1e6435ad39828"
-    git_date = "2023-09-15 10:24:28 +0200"
+    git_refnames = " (tag: v0.14.1, 0.14.x)"
+    git_full = "9a9f0974db087ce303b94bfbeabc8ea136be0914"
+    git_date = "2023-11-11 10:29:16 +0100"
     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
     return keywords
 


=====================================
geopandas/explore.py
=====================================
@@ -93,8 +93,7 @@ def _explore(
         pass :class:`xyzservices.TileProvider` object or pass custom XYZ URL.
         The current list of built-in providers (when ``xyzservices`` is not available):
 
-        ``["OpenStreetMap", "Stamen Terrain", “Stamen Toner", “Stamen Watercolor"
-        "CartoDB positron", “CartoDB dark_matter"]``
+        ``["OpenStreetMap", "CartoDB positron", “CartoDB dark_matter"]``
 
         You can pass a custom tileset to Folium by passing a Leaflet-style URL
         to the tiles parameter: ``http://{s}.yourtiles.com/{z}/{x}/{y}.png``.
@@ -908,8 +907,7 @@ def _explore_geoseries(
         pass :class:`xyzservices.TileProvider` object or pass custom XYZ URL.
         The current list of built-in providers (when ``xyzservices`` is not available):
 
-        ``["OpenStreetMap", "Stamen Terrain", “Stamen Toner", “Stamen Watercolor"
-        "CartoDB positron", “CartoDB dark_matter"]``
+        ``["OpenStreetMap", "CartoDB positron", “CartoDB dark_matter"]``
 
         You can pass a custom tileset to Folium by passing a Leaflet-style URL
         to the tiles parameter: ``http://{s}.yourtiles.com/{z}/{x}/{y}.png``.


=====================================
geopandas/io/_pyarrow_hotfix.py
=====================================
@@ -0,0 +1,73 @@
+from packaging.version import Version
+
+import pyarrow
+
+
+_ERROR_MSG = """\
+Disallowed deserialization of 'arrow.py_extension_type':
+storage_type = {storage_type}
+serialized = {serialized}
+pickle disassembly:\n{pickle_disassembly}
+
+Reading of untrusted Parquet or Feather files with a PyExtensionType column
+allows arbitrary code execution.
+If you trust this file, you can enable reading the extension type by one of:
+
+- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)`
+- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running
+  `import pyarrow_hotfix; pyarrow_hotfix.uninstall()`
+
+We strongly recommend updating your Parquet/Feather files to use extension types
+derived from `pyarrow.ExtensionType` instead, and register this type explicitly.
+See https://arrow.apache.org/docs/dev/python/extending_types.html#defining-extension-types-user-defined-types
+for more details.
+"""
+
+
+def patch_pyarrow():
+    # starting from pyarrow 14.0.1, it has its own mechanism
+    if Version(pyarrow.__version__) >= Version("14.0.1"):
+        return
+
+    # if the user has pyarrow_hotfix (https://github.com/pitrou/pyarrow-hotfix)
+    # installed, use this instead (which also ensures it works if they had
+    # called `pyarrow_hotfix.uninstall()`)
+    try:
+        import pyarrow_hotfix  # noqa: F401
+    except ImportError:
+        pass
+    else:
+        return
+
+    # if the hotfix is already installed and enabled
+    if getattr(pyarrow, "_hotfix_installed", False):
+        return
+
+    class ForbiddenExtensionType(pyarrow.ExtensionType):
+        def __arrow_ext_serialize__(self):
+            return b""
+
+        @classmethod
+        def __arrow_ext_deserialize__(cls, storage_type, serialized):
+            import io
+            import pickletools
+
+            out = io.StringIO()
+            pickletools.dis(serialized, out)
+            raise RuntimeError(
+                _ERROR_MSG.format(
+                    storage_type=storage_type,
+                    serialized=serialized,
+                    pickle_disassembly=out.getvalue(),
+                )
+            )
+
+    pyarrow.unregister_extension_type("arrow.py_extension_type")
+    pyarrow.register_extension_type(
+        ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type")
+    )
+
+    pyarrow._hotfix_installed = True
+
+
+patch_pyarrow()


=====================================
geopandas/io/arrow.py
=====================================
@@ -12,8 +12,8 @@ from geopandas import GeoDataFrame
 import geopandas
 from .file import _expand_user
 
-METADATA_VERSION = "1.0.0-beta.1"
-SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1"]
+METADATA_VERSION = "1.0.0"
+SUPPORTED_VERSIONS = ["0.1.0", "0.4.0", "1.0.0-beta.1", "1.0.0"]
 # reference: https://github.com/opengeospatial/geoparquet
 
 # Metadata structure:
@@ -74,7 +74,7 @@ def _create_metadata(df, schema_version=None):
     Parameters
     ----------
     df : GeoDataFrame
-    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', None}
+    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', '1.0.0', None}
         GeoParquet specification version; if not provided will default to
         latest supported version.
 
@@ -295,7 +295,7 @@ def _to_parquet(
 
     Requires 'pyarrow'.
 
-    This is tracking version 1.0.0-beta.1 of the GeoParquet specification at:
+    This is tracking version 1.0.0 of the GeoParquet specification at:
     https://github.com/opengeospatial/geoparquet. Writing older versions is
     supported using the `schema_version` keyword.
 
@@ -312,7 +312,7 @@ def _to_parquet(
         output except `RangeIndex` which is stored as metadata only.
     compression : {'snappy', 'gzip', 'brotli', None}, default 'snappy'
         Name of the compression to use. Use ``None`` for no compression.
-    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', None}
+    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
         GeoParquet specification version; if not provided will default to
         latest supported version.
     **kwargs
@@ -346,7 +346,7 @@ def _to_feather(df, path, index=None, compression=None, schema_version=None, **k
 
     Requires 'pyarrow' >= 0.17.
 
-    This is tracking version 1.0.0-beta.1 of the GeoParquet specification for
+    This is tracking version 1.0.0 of the GeoParquet specification for
     the metadata at: https://github.com/opengeospatial/geoparquet. Writing
     older versions is supported using the `schema_version` keyword.
 
@@ -364,7 +364,7 @@ def _to_feather(df, path, index=None, compression=None, schema_version=None, **k
     compression : {'zstd', 'lz4', 'uncompressed'}, optional
         Name of the compression to use. Use ``"uncompressed"`` for no
         compression. By default uses LZ4 if available, otherwise uncompressed.
-    schema_version : {'0.1.0', '0.4.0', '1.0.0-beta.1', None}
+    schema_version : {'0.1.0', '0.4.0', '1.0.0', None}
         GeoParquet specification version for the metadata; if not provided
         will default to latest supported version.
     kwargs
@@ -535,7 +535,7 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
       columns, the first available geometry column will be set as the geometry
       column of the returned GeoDataFrame.
 
-    Supports versions 0.1.0, 0.4.0 and 1.0.0-beta.1 of the GeoParquet
+    Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
     specification at: https://github.com/opengeospatial/geoparquet
 
     If 'crs' key is not present in the GeoParquet metadata associated with the
@@ -587,6 +587,8 @@ def _read_parquet(path, columns=None, storage_options=None, **kwargs):
     parquet = import_optional_dependency(
         "pyarrow.parquet", extra="pyarrow is required for Parquet support."
     )
+    import geopandas.io._pyarrow_hotfix  # noqa: F401
+
     # TODO(https://github.com/pandas-dev/pandas/pull/41194): see if pandas
     # adds filesystem as a keyword and match that.
     filesystem = kwargs.pop("filesystem", None)
@@ -632,7 +634,7 @@ def _read_feather(path, columns=None, **kwargs):
       columns, the first available geometry column will be set as the geometry
       column of the returned GeoDataFrame.
 
-    Supports versions 0.1.0, 0.4.0 and 1.0.0-beta.1 of the GeoParquet
+    Supports versions 0.1.0, 0.4.0 and 1.0.0 of the GeoParquet
     specification at: https://github.com/opengeospatial/geoparquet
 
     If 'crs' key is not present in the Feather metadata associated with the
@@ -675,6 +677,7 @@ def _read_feather(path, columns=None, **kwargs):
     )
     # TODO move this into `import_optional_dependency`
     import pyarrow
+    import geopandas.io._pyarrow_hotfix  # noqa: F401
 
     if Version(pyarrow.__version__) < Version("0.17.0"):
         raise ImportError("pyarrow >= 0.17 required for Feather support")


=====================================
geopandas/io/file.py
=====================================
@@ -417,6 +417,10 @@ def _read_file_pyogrio(path_or_bytes, bbox=None, mask=None, rows=None, **kwargs)
             kwargs["max_features"] = rows
         elif isinstance(rows, slice):
             if rows.start is not None:
+                if rows.start < 0:
+                    raise ValueError(
+                        "Negative slice start not supported with the 'pyogrio' engine."
+                    )
                 kwargs["skip_features"] = rows.start
             if rows.stop is not None:
                 kwargs["max_features"] = rows.stop - (rows.start or 0)


=====================================
geopandas/io/tests/data/arrow/test_data_v1.0.0.feather
=====================================
Binary files /dev/null and b/geopandas/io/tests/data/arrow/test_data_v1.0.0.feather differ


=====================================
geopandas/io/tests/data/arrow/test_data_v1.0.0.parquet
=====================================
Binary files /dev/null and b/geopandas/io/tests/data/arrow/test_data_v1.0.0.parquet differ


=====================================
geopandas/io/tests/test_file.py
=====================================
@@ -23,8 +23,11 @@ from geopandas.tests.util import PACKAGE_DIR, validate_boro_df
 
 try:
     import pyogrio
+
+    PYOGRIO_GE_07 = Version(pyogrio.__version__) > Version("0.6.0")
 except ImportError:
     pyogrio = False
+    PYOGRIO_GE_07 = False
 
 
 try:
@@ -733,7 +736,7 @@ def test_read_file_filtered__rows_bbox(df_nybb, engine):
         1047224.3104931959,
         244317.30894023244,
     )
-    if engine == "pyogrio":
+    if engine == "pyogrio" and not PYOGRIO_GE_07:
         with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"):
             # combination bbox and rows (rows slice applied after bbox filtering!)
             filtered_df = read_file(
@@ -748,7 +751,10 @@ def test_read_file_filtered__rows_bbox(df_nybb, engine):
 
     if engine == "pyogrio":
         # TODO: support negative rows in pyogrio
-        with pytest.raises(ValueError, match="'skip_features' must be between 0 and 1"):
+        with pytest.raises(
+            ValueError,
+            match="'skip_features' must be between 0 and 1|Negative slice start",
+        ):
             filtered_df = read_file(
                 nybb_filename, bbox=bbox, rows=slice(-1, None), engine=engine
             )


=====================================
geopandas/tests/test_pandas_methods.py
=====================================
@@ -251,7 +251,10 @@ def test_astype(s, df):
     assert s.astype(str)[0] == "POINT (0 0)"
 
     res = s.astype(object)
-    if not Version(pd.__version__) == Version("2.1.0"):
+    if not (
+        (Version(pd.__version__) == Version("2.1.0"))
+        or (Version(pd.__version__) == Version("2.1.1"))
+    ):
         # https://github.com/geopandas/geopandas/issues/2948 - bug in pandas 2.1.0
         assert isinstance(res, pd.Series) and not isinstance(res, GeoSeries)
         assert res.dtype == object
@@ -682,12 +685,20 @@ def test_groupby_metadata(crs):
         lambda x: geopandas.sjoin(x, x[["geometry", "value1"]], how="inner")
     )
 
+    if compat.PANDAS_GE_22:
+        # merge sort behaviour changed in pandas #54611
+        take_indices = [0, 0, 2, 2, 1]
+        value_right = [0, 2, 0, 2, 1]
+    else:
+        take_indices = [0, 2, 0, 2, 1]
+        value_right = [0, 0, 2, 2, 1]
+
     expected = (
-        df.take([0, 2, 0, 2, 1])
+        df.take(take_indices)
         .set_index("value2", drop=False, append=True)
         .swaplevel()
         .rename(columns={"value1": "value1_left"})
-        .assign(value1_right=[0, 0, 2, 2, 1])
+        .assign(value1_right=value_right)
     )
     assert_geodataframe_equal(res.drop(columns=["index_right"]), expected)
 


=====================================
geopandas/tools/tests/test_sjoin.py
=====================================
@@ -927,6 +927,10 @@ class TestNearest:
         result5["index_right"] = result5["index_right"].astype("int64")
         assert_geodataframe_equal(result5, result4, check_like=True)
 
+    expected_index_uncapped = (
+        [1, 3, 3, 1, 2] if compat.PANDAS_GE_22 else [1, 1, 3, 3, 2]
+    )
+
     @pytest.mark.skipif(
         not (compat.USE_SHAPELY_20),
         reason=(
@@ -935,7 +939,7 @@ class TestNearest:
         ),
     )
     @pytest.mark.parametrize(
-        "max_distance,expected", [(None, [1, 1, 3, 3, 2]), (1.1, [3, 3, 1, 2])]
+        "max_distance,expected", [(None, expected_index_uncapped), (1.1, [3, 3, 1, 2])]
     )
     def test_sjoin_nearest_exclusive(self, max_distance, expected):
         geoms = shapely.points(np.arange(3), np.arange(3))



View it on GitLab: https://salsa.debian.org/debian-gis-team/python-geopandas/-/commit/847b7159455577eae96803806277b207c8ee4a25

-- 
View it on GitLab: https://salsa.debian.org/debian-gis-team/python-geopandas/-/commit/847b7159455577eae96803806277b207c8ee4a25
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20231111/5e6a5178/attachment-0001.htm>