[Git][debian-gis-team/flox][upstream] New upstream version 0.10.1

Tue Mar 25 06:39:45 GMT 2025


Antonio Valentino pushed to branch upstream at Debian GIS Project / flox


Commits:
788f62ec by Antonio Valentino at 2025-03-25T06:21:55+00:00
New upstream version 0.10.1
- - - - -


15 changed files:

- .github/workflows/ci-additional.yaml
- .github/workflows/ci.yaml
- .github/workflows/testpypi-release.yaml
- .github/workflows/upstream-dev-ci.yaml
- .readthedocs.yml
- ci/upstream-dev-env.yml
- flox/aggregate_numbagg.py
- flox/aggregations.py
- flox/core.py
- flox/dask_array_ops.py
- flox/xarray.py
- pyproject.toml
- − readthedocs.yml
- tests/test_core.py
- tests/test_xarray.py


Changes:

=====================================
.github/workflows/ci-additional.yaml
=====================================
@@ -41,7 +41,7 @@ jobs:
 
     env:
       CONDA_ENV_FILE: ci/environment.yml
-      PYTHON_VERSION: "3.12"
+      PYTHON_VERSION: "3.13"
 
     steps:
       - uses: actions/checkout at v4
@@ -77,7 +77,7 @@ jobs:
           --ignore flox/tests \
           --cov=./ --cov-report=xml
       - name: Upload code coverage to Codecov
-        uses: codecov/codecov-action at v5.1.2
+        uses: codecov/codecov-action at v5.4.0
         with:
           file: ./coverage.xml
           flags: unittests
@@ -95,7 +95,7 @@ jobs:
         shell: bash -l {0}
     env:
       CONDA_ENV_FILE: ci/environment.yml
-      PYTHON_VERSION: "3.12"
+      PYTHON_VERSION: "3.13"
 
     steps:
       - uses: actions/checkout at v4
@@ -132,7 +132,7 @@ jobs:
           python -m mypy --install-types --non-interactive --cache-dir=.mypy_cache/ --cobertura-xml-report mypy_report
 
       - name: Upload mypy coverage to Codecov
-        uses: codecov/codecov-action at v5.1.2
+        uses: codecov/codecov-action at v5.4.0
         with:
           file: mypy_report/cobertura.xml
           flags: mypy


=====================================
.github/workflows/ci.yaml
=====================================
@@ -26,14 +26,14 @@ jobs:
       matrix:
         os: ["ubuntu-latest"]
         env: ["environment"]
-        python-version: ["3.10", "3.12"]
+        python-version: ["3.10", "3.13"]
         include:
           - os: "windows-latest"
             env: "environment"
-            python-version: "3.12"
+            python-version: "3.13"
           - os: "ubuntu-latest"
             env: "no-dask" # "no-xarray", "no-numba"
-            python-version: "3.12"
+            python-version: "3.13"
           - os: "ubuntu-latest"
             env: "minimal-requirements"
             python-version: "3.10"
@@ -76,7 +76,7 @@ jobs:
           python -c "import xarray; xarray.show_versions()"
           pytest --durations=20 --durations-min=0.5 -n auto --cov=./ --cov-report=xml --hypothesis-profile ci
       - name: Upload code coverage to Codecov
-        uses: codecov/codecov-action at v5.1.2
+        uses: codecov/codecov-action at v5.4.0
         with:
           file: ./coverage.xml
           flags: unittests


=====================================
.github/workflows/testpypi-release.yaml
=====================================
@@ -23,8 +23,6 @@ jobs:
 
       - uses: actions/setup-python at v5
         name: Install Python
-        with:
-          python-version: "3.12"
 
       - name: Install dependencies
         run: |
@@ -64,8 +62,6 @@ jobs:
     steps:
       - uses: actions/setup-python at v5
         name: Install Python
-        with:
-          python-version: "3.12"
       - uses: actions/download-artifact at v4
         with:
           name: releases


=====================================
.github/workflows/upstream-dev-ci.yaml
=====================================
@@ -34,7 +34,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.12"]
+        python-version: ["3.13"]
     steps:
       - uses: actions/checkout at v4
         with:
@@ -93,7 +93,8 @@ jobs:
         id: status
         run: |
           pytest -rf -n auto --cov=./ --cov-report=xml \
-            --report-log output-${{ matrix.python-version }}-log.jsonl
+            --report-log output-${{ matrix.python-version }}-log.jsonl \
+             --hypothesis-profile ci
       - name: Generate and publish the report
         if: |
           failure()


=====================================
.readthedocs.yml
=====================================
@@ -1,18 +1,15 @@
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
 version: 2
 
+sphinx:
+  # Path to your Sphinx configuration file.
+  configuration: docs/source/conf.py
+
 build:
-  os: ubuntu-22.04
+  os: "ubuntu-lts-latest"
   tools:
-    python: "3.11"
-sphinx:
-  configuration: docs/conf.py
+    python: "mambaforge-latest"
+
+conda:
+  environment: ci/docs.yml
 
-python:
-  install:
-    - method: pip
-      path: .
-      extra_requirements:
-        - docs
+formats: []


=====================================
ci/upstream-dev-env.yml
=====================================
@@ -2,9 +2,11 @@ name: flox-tests
 channels:
   - conda-forge
 dependencies:
+  - asv_runner # for test_asv
   - cachey
   - codecov
   - pooch
+  - hypothesis
   - toolz
   # - numpy
   # - pandas


=====================================
flox/aggregate_numbagg.py
=====================================
@@ -32,6 +32,7 @@ CAST_TO = {
     "nanstd": {np.int_: np.float64},
     "nanfirst": {np.datetime64: np.int64, np.timedelta64: np.int64},
     "nanlast": {np.datetime64: np.int64, np.timedelta64: np.int64},
+    "nancount": {np.datetime64: np.int64, np.timedelta64: np.int64},
 }
 
 


=====================================
flox/aggregations.py
=====================================
@@ -590,6 +590,7 @@ class Scan:
     identity: Any
     # dtype of result
     dtype: Any = None
+    preserves_dtype: bool = False
     # "Mode" of applying binary op.
     # for np.add we apply the op directly to the `state` array and the `current` array.
     # for ffill, bfill we concat `state` to `current` and then run the scan again.
@@ -719,8 +720,9 @@ ffill = Scan(
     reduction="nanlast",
     scan="ffill",
     # Important: this must be NaN otherwise, ffill does not work.
-    identity=np.nan,
+    identity=dtypes.NA,
     mode="concat_then_scan",
+    preserves_dtype=True,
 )
 bfill = Scan(
     "bfill",
@@ -728,7 +730,8 @@ bfill = Scan(
     reduction="nanlast",
     scan="ffill",
     # Important: this must be NaN otherwise, bfill does not work.
-    identity=np.nan,
+    identity=dtypes.NA,
+    preserves_dtype=True,
     mode="concat_then_scan",
     preprocess=reverse,
     finalize=reverse,
@@ -831,12 +834,11 @@ def _initialize_aggregation(
     )
     agg.fill_value[func] = dtypes._get_fill_value(agg.dtype["final"], agg.fill_value[func])
 
-    fv = fill_value if fill_value is not None else agg.fill_value[agg.name]
     if _is_arg_reduction(agg):
         # this allows us to unravel_index easily. we have to do that nearly every time.
         agg.fill_value["numpy"] = (0,)
     else:
-        agg.fill_value["numpy"] = (fv,)
+        agg.fill_value["numpy"] = (agg.fill_value[func],)
 
     if finalize_kwargs is not None:
         assert isinstance(finalize_kwargs, dict)


=====================================
flox/core.py
=====================================
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import copy
+import datetime
 import itertools
 import logging
 import math
@@ -1465,7 +1466,7 @@ def _reduce_blockwise(
     return result
 
 
-def _normalize_indexes(array: DaskArray, flatblocks, blkshape) -> tuple:
+def _normalize_indexes(ndim: int, flatblocks: Sequence[int], blkshape: tuple[int, ...]) -> tuple:
     """
     .blocks accessor can only accept one iterable at a time,
     but can handle multiple slices.
@@ -1483,20 +1484,23 @@ def _normalize_indexes(array: DaskArray, flatblocks, blkshape) -> tuple:
         if i.ndim == 0:
             normalized.append(i.item())
         else:
-            if np.array_equal(i, np.arange(blkshape[ax])):
+            if len(i) == blkshape[ax] and np.array_equal(i, np.arange(blkshape[ax])):
                 normalized.append(slice(None))
-            elif np.array_equal(i, np.arange(i[0], i[-1] + 1)):
-                normalized.append(slice(i[0], i[-1] + 1))
+            elif _issorted(i) and np.array_equal(i, np.arange(i[0], i[-1] + 1)):
+                start = None if i[0] == 0 else i[0]
+                stop = i[-1] + 1
+                stop = None if stop == blkshape[ax] else stop
+                normalized.append(slice(start, stop))
             else:
                 normalized.append(list(i))
-    full_normalized = (slice(None),) * (array.ndim - len(normalized)) + tuple(normalized)
+    full_normalized = (slice(None),) * (ndim - len(normalized)) + tuple(normalized)
 
     # has no iterables
     noiter = list(i if not hasattr(i, "__len__") else slice(None) for i in full_normalized)
     # has all iterables
     alliter = {ax: i for ax, i in enumerate(full_normalized) if hasattr(i, "__len__")}
 
-    mesh = dict(zip(alliter.keys(), np.ix_(*alliter.values())))
+    mesh = dict(zip(alliter.keys(), np.ix_(*alliter.values())))  # type: ignore[arg-type, var-annotated]
 
     full_tuple = tuple(i if ax not in mesh else mesh[ax] for ax, i in enumerate(noiter))
 
@@ -1523,7 +1527,6 @@ def subset_to_blocks(
     -------
     dask.array
     """
-    from dask.array.slicing import normalize_index
     from dask.base import tokenize
 
     if blkshape is None:
@@ -1532,10 +1535,9 @@ def subset_to_blocks(
     if chunks_as_array is None:
         chunks_as_array = tuple(np.array(c) for c in array.chunks)
 
-    index = _normalize_indexes(array, flatblocks, blkshape)
+    index = _normalize_indexes(array.ndim, flatblocks, blkshape)
 
     # These rest is copied from dask.array.core.py with slight modifications
-    index = normalize_index(index, array.numblocks)
     index = tuple(slice(k, k + 1) if isinstance(k, Integral) else k for k in index)
 
     name = "groupby-cohort-" + tokenize(array, index)
@@ -2508,6 +2510,7 @@ def groupby_reduce(
         (func not in ["count", "any", "all"] and not is_first_last)
         # Flox's count works with non-numeric and its faster than converting.
         or (func == "count" and engine != "flox")
+        # TODO: needed for npg, move to aggregate_npg
         or (is_first_last and is_cftime)
     )
     if requires_numeric:
@@ -2709,7 +2712,11 @@ def groupby_reduce(
         if is_npdatetime:
             result = result.astype(datetime_dtype)
         elif is_cftime:
-            result = _to_pytimedelta(result, unit="us") + offset
+            asdelta = _to_pytimedelta(result, unit="us")
+            nanmask = np.isnan(result)
+            asdelta[nanmask] = datetime.timedelta(microseconds=0)
+            result = asdelta + offset
+            result[nanmask] = np.timedelta64("NaT")
 
     return (result, *groups)
 
@@ -2822,9 +2829,6 @@ def groupby_scan(
         # nothing to do, no NaNs!
         return array
 
-    is_bool_array = np.issubdtype(array.dtype, bool)
-    array = array.astype(np.int_) if is_bool_array else array
-
     if expected_groups is not None:
         raise NotImplementedError("Setting `expected_groups` and binning is not supported yet.")
     expected_groups = _validate_expected_groups(nby, expected_groups)
@@ -2854,6 +2858,11 @@ def groupby_scan(
     if array.dtype.kind in "Mm":
         cast_to = array.dtype
         array = array.view(np.int64)
+    elif array.dtype.kind == "b":
+        array = array.view(np.int8)
+        cast_to = None
+        if agg.preserves_dtype:
+            cast_to = bool
     else:
         cast_to = None
 
@@ -2868,6 +2877,7 @@ def groupby_scan(
             agg.dtype = np.result_type(array.dtype, np.uint)
     else:
         agg.dtype = array.dtype if dtype is None else dtype
+    agg.identity = xrdtypes._get_fill_value(agg.dtype, agg.identity)
 
     (single_axis,) = axis_  # type: ignore[misc]
     # avoid some roundoff error when we can.
@@ -2886,7 +2896,7 @@ def groupby_scan(
 
     if not has_dask:
         final_state = chunk_scan(inp, axis=single_axis, agg=agg, dtype=agg.dtype)
-        result = _finalize_scan(final_state)
+        result = _finalize_scan(final_state, dtype=agg.dtype)
     else:
         result = dask_groupby_scan(inp.array, inp.group_idx, axes=axis_, agg=agg)
 
@@ -2939,9 +2949,9 @@ def _zip(group_idx: np.ndarray, array: np.ndarray) -> AlignedArrays:
     return AlignedArrays(group_idx=group_idx, array=array)
 
 
-def _finalize_scan(block: ScanState) -> np.ndarray:
+def _finalize_scan(block: ScanState, dtype) -> np.ndarray:
     assert block.result is not None
-    return block.result.array
+    return block.result.array.astype(dtype, copy=False)
 
 
 def dask_groupby_scan(array, by, axes: T_Axes, agg: Scan) -> DaskArray:
@@ -2984,7 +2994,7 @@ def dask_groupby_scan(array, by, axes: T_Axes, agg: Scan) -> DaskArray:
     )
 
     # 3. Unzip and extract the final result array, discard groups
-    result = map_blocks(_finalize_scan, accumulated, dtype=agg.dtype)
+    result = map_blocks(partial(_finalize_scan, dtype=agg.dtype), accumulated, dtype=agg.dtype)
 
     assert result.chunks == array.chunks
 


=====================================
flox/dask_array_ops.py
=====================================
@@ -1,6 +1,6 @@
 import builtins
 import math
-from functools import partial
+from functools import lru_cache, partial
 from itertools import product
 from numbers import Integral
 
@@ -84,14 +84,8 @@ def partial_reduce(
     axis: tuple[int, ...],
     block_index: int | None = None,
 ):
-    numblocks = tuple(len(c) for c in chunks)
-    ndim = len(numblocks)
-    parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(numblocks)]
-    keys = product(*map(range, map(len, parts)))
-    out_chunks = [
-        tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c
-        for (i, c) in enumerate(chunks)
-    ]
+    ndim = len(chunks)
+    keys, parts, out_chunks = get_parts(tuple(split_every.items()), chunks)
     for k, p in zip(keys, product(*parts)):
         free = {i: j[0] for (i, j) in enumerate(p) if len(j) == 1 and i not in split_every}
         dummy = dict(i for i in enumerate(p) if i[0] in split_every)
@@ -101,3 +95,17 @@ def partial_reduce(
             k = (*k[:-1], block_index)
         dsk[(name,) + k] = (func, g)
     return dsk, out_chunks
+
+
+ at lru_cache
+def get_parts(split_every_items, chunks):
+    numblocks = tuple(len(c) for c in chunks)
+    split_every = dict(split_every_items)
+
+    parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(numblocks)]
+    keys = tuple(product(*map(range, map(len, parts))))
+    out_chunks = tuple(
+        tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c
+        for (i, c) in enumerate(chunks)
+    )
+    return keys, parts, out_chunks


=====================================
flox/xarray.py
=====================================
@@ -468,9 +468,9 @@ def xarray_reduce(
         ):
             levelnames = ds_broad.indexes[name].names
             if isinstance(expect3, np.ndarray):
-                # TODO: workaoround for IntervalIndex issue.
+                # TODO: workaround for IntervalIndex issue.
                 raise NotImplementedError
-            expect3 = pd.MultiIndex.from_tuples(expect3.values, names=levelnames)
+            expect3 = pd.MultiIndex.from_tuples(expect3.values.tolist(), names=levelnames)
             actual[name] = expect3
             if Version(xr.__version__) > Version("2022.03.0"):
                 actual = actual.set_coords(levelnames)


=====================================
pyproject.toml
=====================================
@@ -14,6 +14,7 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
 ]
 dependencies = [
     "pandas>=1.5",


=====================================
readthedocs.yml deleted
=====================================
@@ -1,15 +0,0 @@
-version: 2
-
-sphinx:
-  # Path to your Sphinx configuration file.
-  configuration: docs/source/conf.py
-
-build:
-  os: "ubuntu-lts-latest"
-  tools:
-    python: "mambaforge-latest"
-
-conda:
-  environment: ci/docs.yml
-
-formats: []


=====================================
tests/test_core.py
=====================================
@@ -5,7 +5,7 @@ import logging
 import warnings
 from collections.abc import Callable
 from functools import partial, reduce
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 from unittest.mock import MagicMock, patch
 
 import numpy as np
@@ -1538,7 +1538,7 @@ def test_normalize_block_indexing_1d(flatblocks, expected):
     nblocks = 5
     array = dask.array.ones((nblocks,), chunks=(1,))
     expected = tuple(np.array(i) if isinstance(i, list) else i for i in expected)
-    actual = _normalize_indexes(array, flatblocks, array.blocks.shape)
+    actual = _normalize_indexes(array.ndim, flatblocks, array.blocks.shape)
     assert_equal_tuple(expected, actual)
 
 
@@ -1550,17 +1550,17 @@ def test_normalize_block_indexing_1d(flatblocks, expected):
         ((1, 2, 3), (0, slice(1, 4))),
         ((1, 3), (0, [1, 3])),
         ((0, 1, 3), (0, [0, 1, 3])),
-        (tuple(range(10)), (slice(0, 2), slice(None))),
-        ((0, 1, 3, 5, 6, 8), (slice(0, 2), [0, 1, 3])),
+        (tuple(range(10)), (slice(None, 2), slice(None))),
+        ((0, 1, 3, 5, 6, 8), (slice(None, 2), [0, 1, 3])),
         ((0, 3, 4, 5, 6, 8, 24), np.ix_([0, 1, 4], [0, 1, 3, 4])),
     ),
 )
-def test_normalize_block_indexing_2d(flatblocks, expected):
+def test_normalize_block_indexing_2d(flatblocks: tuple[int, ...], expected: tuple[Any, ...]) -> None:
     nblocks = 5
     ndim = 2
     array = dask.array.ones((nblocks,) * ndim, chunks=(1,) * ndim)
     expected = tuple(np.array(i) if isinstance(i, list) else i for i in expected)
-    actual = _normalize_indexes(array, flatblocks, array.blocks.shape)
+    actual = _normalize_indexes(array.ndim, flatblocks, array.blocks.shape)
     assert_equal_tuple(expected, actual)
 
 
@@ -1749,15 +1749,15 @@ def test_validate_reindex() -> None:
 
 
 @requires_dask
-def test_1d_blockwise_sort_optimization():
+def test_1d_blockwise_sort_optimization() -> None:
     # Make sure for resampling problems sorting isn't done.
     time = pd.Series(pd.date_range("2020-09-01", "2020-12-31 23:59", freq="3h"))
     array = dask.array.ones((len(time),), chunks=(224,))
 
-    actual, _ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count")
+    actual, *_ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count")
     assert all("getitem" not in k for k in actual.dask)
 
-    actual, _ = groupby_reduce(
+    actual, *_ = groupby_reduce(
         array,
         time.dt.dayofyear.values[::-1],
         sort=True,
@@ -1766,7 +1766,7 @@ def test_1d_blockwise_sort_optimization():
     )
     assert any("getitem" in k for k in actual.dask.layers)
 
-    actual, _ = groupby_reduce(
+    actual, *_ = groupby_reduce(
         array,
         time.dt.dayofyear.values[::-1],
         sort=False,
@@ -1777,7 +1777,7 @@ def test_1d_blockwise_sort_optimization():
 
 
 @requires_dask
-def test_negative_index_factorize_race_condition():
+def test_negative_index_factorize_race_condition() -> None:
     # shape = (10, 2000)
     # chunks = ((shape[0]-1,1), 10)
     shape = (101, 174000)
@@ -1804,17 +1804,17 @@ def test_negative_index_factorize_race_condition():
 
 
 @pytest.mark.parametrize("sort", [True, False])
-def test_expected_index_conversion_passthrough_range_index(sort):
+def test_expected_index_conversion_passthrough_range_index(sort) -> None:
     index = pd.RangeIndex(100)
-    actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,))
+    actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,))  # type: ignore[call-overload]
     assert actual[0] is index
 
 
-def test_method_check_numpy():
+def test_method_check_numpy() -> None:
     bins = [-2, -1, 0, 1, 2]
     field = np.ones((5, 3))
     by = np.array([[-1.5, -1.5, 0.5, 1.5, 1.5] * 3]).reshape(5, 3)
-    actual, _ = groupby_reduce(
+    actual, *_ = groupby_reduce(
         field,
         by,
         expected_groups=pd.IntervalIndex.from_breaks(bins),
@@ -1825,7 +1825,7 @@ def test_method_check_numpy():
     expected = np.array([6, np.nan, 3, 6])
     assert_equal(actual, expected)
 
-    actual, _ = groupby_reduce(
+    actual, *_ = groupby_reduce(
         field,
         by,
         expected_groups=pd.IntervalIndex.from_breaks(bins),
@@ -1845,7 +1845,7 @@ def test_method_check_numpy():
 
 
 @pytest.mark.parametrize("dtype", [None, np.float64])
-def test_choose_engine(dtype):
+def test_choose_engine(dtype) -> None:
     numbagg_possible = HAS_NUMBAGG and dtype is None
     default = "numbagg" if numbagg_possible else "numpy"
     mean = _initialize_aggregation(
@@ -1887,10 +1887,10 @@ def test_choose_engine(dtype):
     assert _choose_engine(np.array([1, 1, 2, 2]), agg=argmax) == "numpy"
 
 
-def test_xarray_fill_value_behaviour():
+def test_xarray_fill_value_behaviour() -> None:
     bar = np.array([1, 2, 3, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan])
     times = np.arange(0, 20, 2)
-    actual, _ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),))
+    actual, *_ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),))
     nan = np.nan
     # fmt: off
     expected = np.array(
@@ -1905,7 +1905,7 @@ def test_xarray_fill_value_behaviour():
 @pytest.mark.parametrize("func", ["nanquantile", "quantile"])
 @pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False])
 @pytest.mark.parametrize("by_ndim", [1, 2])
-def test_multiple_quantiles(q, chunk, func, by_ndim):
+def test_multiple_quantiles(q, chunk, func, by_ndim) -> None:
     array = np.array([[1, -1, np.nan, 3, 4, 10, 5], [1, np.nan, np.nan, 3, 4, np.nan, np.nan]])
     labels = np.array([0, 0, 0, 1, 0, 1, 1])
     if by_ndim == 2:
@@ -1916,11 +1916,11 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
     if chunk:
         array = dask.array.from_array(array, chunks=(1,) + (-1,) * by_ndim)
 
-    actual, _ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis)
+    actual, *_ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis)
     sorted_array = array[..., [0, 1, 2, 4, 3, 5, 6]]
     f = partial(getattr(np, func), q=q, axis=axis, keepdims=True)
     if chunk:
-        sorted_array = sorted_array.compute()
+        sorted_array = sorted_array.compute()  # type: ignore[attr-defined]
     expected = np.concatenate((f(sorted_array[..., :4]), f(sorted_array[..., 4:])), axis=-1)
     if by_ndim == 2:
         expected = expected.squeeze(axis=-2)
@@ -1928,7 +1928,7 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
 
 
 @pytest.mark.parametrize("dtype", ["U3", "S3"])
-def test_nanlen_string(dtype, engine):
+def test_nanlen_string(dtype, engine) -> None:
     array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype)
     by = np.array([0, 0, 1, 2, 1, 0])
     expected = np.array([3, 2, 1], dtype=np.intp)
@@ -1936,18 +1936,17 @@ def test_nanlen_string(dtype, engine):
     assert_equal(expected, actual)
 
 
-def test_cumusm():
+def test_cumusm() -> None:
     array = np.array([1, 1, 1], dtype=np.uint64)
     by = np.array([0] * array.shape[-1])
-    kwargs = {"func": "nancumsum", "axis": -1}
     expected = np.nancumsum(array, axis=-1)
 
-    actual = groupby_scan(array, by, **kwargs)
+    actual = groupby_scan(array, by, func="nancumsum", axis=-1)
     assert_equal(expected, actual)
 
     if has_dask:
         da = dask.array.from_array(array, chunks=2)
-        actual = groupby_scan(da, by, **kwargs)
+        actual = groupby_scan(da, by, func="nancumsum", axis=-1)
         assert_equal(expected, actual)
 
 
@@ -1962,7 +1961,7 @@ def test_cumusm():
 @pytest.mark.parametrize("size", ((1, 12), (12,), (12, 9)))
 @pytest.mark.parametrize("add_nan_by", [True, False])
 @pytest.mark.parametrize("func", ["ffill", "bfill"])
-def test_ffill_bfill(chunks, size, add_nan_by, func):
+def test_ffill_bfill(chunks, size, add_nan_by, func) -> None:
     array, by = gen_array_by(size, func)
     if chunks:
         array = dask.array.from_array(array, chunks=chunks)
@@ -1976,11 +1975,11 @@ def test_ffill_bfill(chunks, size, add_nan_by, func):
 
 
 @requires_dask
-def test_blockwise_nans():
+def test_blockwise_nans() -> None:
     array = dask.array.ones((1, 10), chunks=2)
     by = np.array([-1, 0, -1, 1, -1, 2, -1, 3, 4, 4])
-    actual, actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5))
-    expected, expected_groups = flox.groupby_reduce(
+    actual, *actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5))
+    expected, *expected_groups = flox.groupby_reduce(
         array.compute(), by, func="sum", expected_groups=pd.RangeIndex(0, 5)
     )
     assert_equal(expected_groups, actual_groups)
@@ -1989,11 +1988,11 @@ def test_blockwise_nans():
 
 @pytest.mark.parametrize("func", ["sum", "prod", "count", "nansum"])
 @pytest.mark.parametrize("engine", ["flox", "numpy"])
-def test_agg_dtypes(func, engine):
+def test_agg_dtypes(func, engine) -> None:
     # regression test for GH388
     counts = np.array([0, 2, 1, 0, 1])
     group = np.array([1, 1, 1, 2, 2])
-    actual, _ = groupby_reduce(
+    actual, *_ = groupby_reduce(
         counts, group, expected_groups=(np.array([1, 2]),), func=func, dtype="uint8", engine=engine
     )
     expected = _get_array_func(func)(counts, dtype="uint8")
@@ -2001,38 +2000,56 @@ def test_agg_dtypes(func, engine):
 
 
 @requires_dask
-def test_blockwise_avoid_rechunk():
+def test_blockwise_avoid_rechunk() -> None:
     array = dask.array.zeros((6,), chunks=(2, 4), dtype=np.int64)
     by = np.array(["1", "1", "0", "", "0", ""], dtype="<U1")
-    actual, groups = groupby_reduce(array, by, func="first")
-    assert_equal(groups, ["", "0", "1"])
+    actual, *groups = groupby_reduce(array, by, func="first")
+    assert_equal(groups, [["", "0", "1"]])
     assert_equal(actual, np.array([0, 0, 0], dtype=np.int64))
 
 
-def test_datetime_minmax(engine):
+def test_datetime_minmax(engine) -> None:
     # GH403
     array = np.array([np.datetime64("2000-01-01"), np.datetime64("2000-01-02"), np.datetime64("2000-01-03")])
     by = np.array([0, 0, 1])
-    actual, _ = flox.groupby_reduce(array, by, func="nanmin", engine=engine)
+    actual, *_ = flox.groupby_reduce(array, by, func="nanmin", engine=engine)
     expected = array[[0, 2]]
     assert_equal(expected, actual)
 
     expected = array[[1, 2]]
-    actual, _ = flox.groupby_reduce(array, by, func="nanmax", engine=engine)
+    actual, *_ = flox.groupby_reduce(array, by, func="nanmax", engine=engine)
     assert_equal(expected, actual)
 
 
 @pytest.mark.parametrize("func", ["first", "last", "nanfirst", "nanlast"])
-def test_datetime_timedelta_first_last(engine, func):
+def test_datetime_timedelta_first_last(engine, func) -> None:
     import flox
 
     idx = 0 if "first" in func else -1
+    idx1 = 2 if "first" in func else -1
 
+    ## datetime
     dt = pd.date_range("2001-01-01", freq="d", periods=5).values
     by = np.ones(dt.shape, dtype=int)
-    actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
+    actual, *_ = flox.groupby_reduce(dt, by, func=func, engine=engine)
     assert_equal(actual, dt[[idx]])
 
+    # missing group
+    by = np.array([0, 2, 3, 3, 3])
+    actual, *_ = flox.groupby_reduce(
+        dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA
+    )
+    assert_equal(actual, [dt[0], np.datetime64("NaT"), dt[1], dt[idx1]])
+
+    ## timedelta
     dt = dt - dt[0]
-    actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
+    by = np.ones(dt.shape, dtype=int)
+    actual, *_ = flox.groupby_reduce(dt, by, func=func, engine=engine)
     assert_equal(actual, dt[[idx]])
+
+    # missing group
+    by = np.array([0, 2, 3, 3, 3])
+    actual, *_ = flox.groupby_reduce(
+        dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA
+    )
+    assert_equal(actual, [dt[0], np.timedelta64("NaT"), dt[1], dt[idx1]])


=====================================
tests/test_xarray.py
=====================================
@@ -6,6 +6,7 @@ import pytest
 xr = pytest.importorskip("xarray")
 # isort: on
 
+from flox import xrdtypes as dtypes
 from flox.xarray import rechunk_for_blockwise, xarray_reduce
 
 from . import (
@@ -193,13 +194,25 @@ def test_validate_expected_groups(expected_groups):
 
 
 @requires_cftime
+ at pytest.mark.parametrize("indexer", [slice(None), pytest.param(slice(12), id="missing-group")])
+ at pytest.mark.parametrize("expected_groups", [None, [0, 1, 2, 3]])
 @pytest.mark.parametrize("func", ["first", "last", "min", "max", "count"])
-def test_xarray_reduce_cftime_var(engine, func):
+def test_xarray_reduce_cftime_var(engine, indexer, expected_groups, func):
     times = xr.date_range("1980-09-01 00:00", "1982-09-18 00:00", freq="ME", calendar="noleap")
     ds = xr.Dataset({"var": ("time", times)}, coords={"time": np.repeat(np.arange(4), 6)})
+    ds = ds.isel(time=indexer)
 
-    actual = xarray_reduce(ds, ds.time, func=func)
+    actual = xarray_reduce(
+        ds,
+        ds.time,
+        func=func,
+        fill_value=dtypes.NA if func in ["first", "last"] else np.nan,
+        engine=engine,
+        expected_groups=expected_groups,
+    )
     expected = getattr(ds.groupby("time"), func)()
+    if expected_groups is not None:
+        expected = expected.reindex(time=expected_groups)
     xr.testing.assert_identical(actual, expected)
 
 



View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/788f62ec4113eb26e0f60f468ff1a8f82b01eaa9

-- 
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/788f62ec4113eb26e0f60f468ff1a8f82b01eaa9
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20250325/4932aab2/attachment-0001.htm>