[Git][debian-gis-team/flox][upstream] New upstream version 0.10.1
Antonio Valentino (@antonio.valentino)
gitlab at salsa.debian.org
Tue Mar 25 06:39:45 GMT 2025
Antonio Valentino pushed to branch upstream at Debian GIS Project / flox
Commits:
788f62ec by Antonio Valentino at 2025-03-25T06:21:55+00:00
New upstream version 0.10.1
- - - - -
15 changed files:
- .github/workflows/ci-additional.yaml
- .github/workflows/ci.yaml
- .github/workflows/testpypi-release.yaml
- .github/workflows/upstream-dev-ci.yaml
- .readthedocs.yml
- ci/upstream-dev-env.yml
- flox/aggregate_numbagg.py
- flox/aggregations.py
- flox/core.py
- flox/dask_array_ops.py
- flox/xarray.py
- pyproject.toml
- − readthedocs.yml
- tests/test_core.py
- tests/test_xarray.py
Changes:
=====================================
.github/workflows/ci-additional.yaml
=====================================
@@ -41,7 +41,7 @@ jobs:
env:
CONDA_ENV_FILE: ci/environment.yml
- PYTHON_VERSION: "3.12"
+ PYTHON_VERSION: "3.13"
steps:
- uses: actions/checkout at v4
@@ -77,7 +77,7 @@ jobs:
--ignore flox/tests \
--cov=./ --cov-report=xml
- name: Upload code coverage to Codecov
- uses: codecov/codecov-action at v5.1.2
+ uses: codecov/codecov-action at v5.4.0
with:
file: ./coverage.xml
flags: unittests
@@ -95,7 +95,7 @@ jobs:
shell: bash -l {0}
env:
CONDA_ENV_FILE: ci/environment.yml
- PYTHON_VERSION: "3.12"
+ PYTHON_VERSION: "3.13"
steps:
- uses: actions/checkout at v4
@@ -132,7 +132,7 @@ jobs:
python -m mypy --install-types --non-interactive --cache-dir=.mypy_cache/ --cobertura-xml-report mypy_report
- name: Upload mypy coverage to Codecov
- uses: codecov/codecov-action at v5.1.2
+ uses: codecov/codecov-action at v5.4.0
with:
file: mypy_report/cobertura.xml
flags: mypy
=====================================
.github/workflows/ci.yaml
=====================================
@@ -26,14 +26,14 @@ jobs:
matrix:
os: ["ubuntu-latest"]
env: ["environment"]
- python-version: ["3.10", "3.12"]
+ python-version: ["3.10", "3.13"]
include:
- os: "windows-latest"
env: "environment"
- python-version: "3.12"
+ python-version: "3.13"
- os: "ubuntu-latest"
env: "no-dask" # "no-xarray", "no-numba"
- python-version: "3.12"
+ python-version: "3.13"
- os: "ubuntu-latest"
env: "minimal-requirements"
python-version: "3.10"
@@ -76,7 +76,7 @@ jobs:
python -c "import xarray; xarray.show_versions()"
pytest --durations=20 --durations-min=0.5 -n auto --cov=./ --cov-report=xml --hypothesis-profile ci
- name: Upload code coverage to Codecov
- uses: codecov/codecov-action at v5.1.2
+ uses: codecov/codecov-action at v5.4.0
with:
file: ./coverage.xml
flags: unittests
=====================================
.github/workflows/testpypi-release.yaml
=====================================
@@ -23,8 +23,6 @@ jobs:
- uses: actions/setup-python at v5
name: Install Python
- with:
- python-version: "3.12"
- name: Install dependencies
run: |
@@ -64,8 +62,6 @@ jobs:
steps:
- uses: actions/setup-python at v5
name: Install Python
- with:
- python-version: "3.12"
- uses: actions/download-artifact at v4
with:
name: releases
=====================================
.github/workflows/upstream-dev-ci.yaml
=====================================
@@ -34,7 +34,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- python-version: ["3.12"]
+ python-version: ["3.13"]
steps:
- uses: actions/checkout at v4
with:
@@ -93,7 +93,8 @@ jobs:
id: status
run: |
pytest -rf -n auto --cov=./ --cov-report=xml \
- --report-log output-${{ matrix.python-version }}-log.jsonl
+ --report-log output-${{ matrix.python-version }}-log.jsonl \
+ --hypothesis-profile ci
- name: Generate and publish the report
if: |
failure()
=====================================
.readthedocs.yml
=====================================
@@ -1,18 +1,15 @@
-# Read the Docs configuration file
-# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
-
version: 2
+sphinx:
+ # Path to your Sphinx configuration file.
+ configuration: docs/source/conf.py
+
build:
- os: ubuntu-22.04
+ os: "ubuntu-lts-latest"
tools:
- python: "3.11"
-sphinx:
- configuration: docs/conf.py
+ python: "mambaforge-latest"
+
+conda:
+ environment: ci/docs.yml
-python:
- install:
- - method: pip
- path: .
- extra_requirements:
- - docs
+formats: []
=====================================
ci/upstream-dev-env.yml
=====================================
@@ -2,9 +2,11 @@ name: flox-tests
channels:
- conda-forge
dependencies:
+ - asv_runner # for test_asv
- cachey
- codecov
- pooch
+ - hypothesis
- toolz
# - numpy
# - pandas
=====================================
flox/aggregate_numbagg.py
=====================================
@@ -32,6 +32,7 @@ CAST_TO = {
"nanstd": {np.int_: np.float64},
"nanfirst": {np.datetime64: np.int64, np.timedelta64: np.int64},
"nanlast": {np.datetime64: np.int64, np.timedelta64: np.int64},
+ "nancount": {np.datetime64: np.int64, np.timedelta64: np.int64},
}
=====================================
flox/aggregations.py
=====================================
@@ -590,6 +590,7 @@ class Scan:
identity: Any
# dtype of result
dtype: Any = None
+ preserves_dtype: bool = False
# "Mode" of applying binary op.
# for np.add we apply the op directly to the `state` array and the `current` array.
# for ffill, bfill we concat `state` to `current` and then run the scan again.
@@ -719,8 +720,9 @@ ffill = Scan(
reduction="nanlast",
scan="ffill",
# Important: this must be NaN otherwise, ffill does not work.
- identity=np.nan,
+ identity=dtypes.NA,
mode="concat_then_scan",
+ preserves_dtype=True,
)
bfill = Scan(
"bfill",
@@ -728,7 +730,8 @@ bfill = Scan(
reduction="nanlast",
scan="ffill",
# Important: this must be NaN otherwise, bfill does not work.
- identity=np.nan,
+ identity=dtypes.NA,
+ preserves_dtype=True,
mode="concat_then_scan",
preprocess=reverse,
finalize=reverse,
@@ -831,12 +834,11 @@ def _initialize_aggregation(
)
agg.fill_value[func] = dtypes._get_fill_value(agg.dtype["final"], agg.fill_value[func])
- fv = fill_value if fill_value is not None else agg.fill_value[agg.name]
if _is_arg_reduction(agg):
# this allows us to unravel_index easily. we have to do that nearly every time.
agg.fill_value["numpy"] = (0,)
else:
- agg.fill_value["numpy"] = (fv,)
+ agg.fill_value["numpy"] = (agg.fill_value[func],)
if finalize_kwargs is not None:
assert isinstance(finalize_kwargs, dict)
=====================================
flox/core.py
=====================================
@@ -1,6 +1,7 @@
from __future__ import annotations
import copy
+import datetime
import itertools
import logging
import math
@@ -1465,7 +1466,7 @@ def _reduce_blockwise(
return result
-def _normalize_indexes(array: DaskArray, flatblocks, blkshape) -> tuple:
+def _normalize_indexes(ndim: int, flatblocks: Sequence[int], blkshape: tuple[int, ...]) -> tuple:
"""
.blocks accessor can only accept one iterable at a time,
but can handle multiple slices.
@@ -1483,20 +1484,23 @@ def _normalize_indexes(array: DaskArray, flatblocks, blkshape) -> tuple:
if i.ndim == 0:
normalized.append(i.item())
else:
- if np.array_equal(i, np.arange(blkshape[ax])):
+ if len(i) == blkshape[ax] and np.array_equal(i, np.arange(blkshape[ax])):
normalized.append(slice(None))
- elif np.array_equal(i, np.arange(i[0], i[-1] + 1)):
- normalized.append(slice(i[0], i[-1] + 1))
+ elif _issorted(i) and np.array_equal(i, np.arange(i[0], i[-1] + 1)):
+ start = None if i[0] == 0 else i[0]
+ stop = i[-1] + 1
+ stop = None if stop == blkshape[ax] else stop
+ normalized.append(slice(start, stop))
else:
normalized.append(list(i))
- full_normalized = (slice(None),) * (array.ndim - len(normalized)) + tuple(normalized)
+ full_normalized = (slice(None),) * (ndim - len(normalized)) + tuple(normalized)
# has no iterables
noiter = list(i if not hasattr(i, "__len__") else slice(None) for i in full_normalized)
# has all iterables
alliter = {ax: i for ax, i in enumerate(full_normalized) if hasattr(i, "__len__")}
- mesh = dict(zip(alliter.keys(), np.ix_(*alliter.values())))
+ mesh = dict(zip(alliter.keys(), np.ix_(*alliter.values()))) # type: ignore[arg-type, var-annotated]
full_tuple = tuple(i if ax not in mesh else mesh[ax] for ax, i in enumerate(noiter))
@@ -1523,7 +1527,6 @@ def subset_to_blocks(
-------
dask.array
"""
- from dask.array.slicing import normalize_index
from dask.base import tokenize
if blkshape is None:
@@ -1532,10 +1535,9 @@ def subset_to_blocks(
if chunks_as_array is None:
chunks_as_array = tuple(np.array(c) for c in array.chunks)
- index = _normalize_indexes(array, flatblocks, blkshape)
+ index = _normalize_indexes(array.ndim, flatblocks, blkshape)
# These rest is copied from dask.array.core.py with slight modifications
- index = normalize_index(index, array.numblocks)
index = tuple(slice(k, k + 1) if isinstance(k, Integral) else k for k in index)
name = "groupby-cohort-" + tokenize(array, index)
@@ -2508,6 +2510,7 @@ def groupby_reduce(
(func not in ["count", "any", "all"] and not is_first_last)
# Flox's count works with non-numeric and its faster than converting.
or (func == "count" and engine != "flox")
+ # TODO: needed for npg, move to aggregate_npg
or (is_first_last and is_cftime)
)
if requires_numeric:
@@ -2709,7 +2712,11 @@ def groupby_reduce(
if is_npdatetime:
result = result.astype(datetime_dtype)
elif is_cftime:
- result = _to_pytimedelta(result, unit="us") + offset
+ asdelta = _to_pytimedelta(result, unit="us")
+ nanmask = np.isnan(result)
+ asdelta[nanmask] = datetime.timedelta(microseconds=0)
+ result = asdelta + offset
+ result[nanmask] = np.timedelta64("NaT")
return (result, *groups)
@@ -2822,9 +2829,6 @@ def groupby_scan(
# nothing to do, no NaNs!
return array
- is_bool_array = np.issubdtype(array.dtype, bool)
- array = array.astype(np.int_) if is_bool_array else array
-
if expected_groups is not None:
raise NotImplementedError("Setting `expected_groups` and binning is not supported yet.")
expected_groups = _validate_expected_groups(nby, expected_groups)
@@ -2854,6 +2858,11 @@ def groupby_scan(
if array.dtype.kind in "Mm":
cast_to = array.dtype
array = array.view(np.int64)
+ elif array.dtype.kind == "b":
+ array = array.view(np.int8)
+ cast_to = None
+ if agg.preserves_dtype:
+ cast_to = bool
else:
cast_to = None
@@ -2868,6 +2877,7 @@ def groupby_scan(
agg.dtype = np.result_type(array.dtype, np.uint)
else:
agg.dtype = array.dtype if dtype is None else dtype
+ agg.identity = xrdtypes._get_fill_value(agg.dtype, agg.identity)
(single_axis,) = axis_ # type: ignore[misc]
# avoid some roundoff error when we can.
@@ -2886,7 +2896,7 @@ def groupby_scan(
if not has_dask:
final_state = chunk_scan(inp, axis=single_axis, agg=agg, dtype=agg.dtype)
- result = _finalize_scan(final_state)
+ result = _finalize_scan(final_state, dtype=agg.dtype)
else:
result = dask_groupby_scan(inp.array, inp.group_idx, axes=axis_, agg=agg)
@@ -2939,9 +2949,9 @@ def _zip(group_idx: np.ndarray, array: np.ndarray) -> AlignedArrays:
return AlignedArrays(group_idx=group_idx, array=array)
-def _finalize_scan(block: ScanState) -> np.ndarray:
+def _finalize_scan(block: ScanState, dtype) -> np.ndarray:
assert block.result is not None
- return block.result.array
+ return block.result.array.astype(dtype, copy=False)
def dask_groupby_scan(array, by, axes: T_Axes, agg: Scan) -> DaskArray:
@@ -2984,7 +2994,7 @@ def dask_groupby_scan(array, by, axes: T_Axes, agg: Scan) -> DaskArray:
)
# 3. Unzip and extract the final result array, discard groups
- result = map_blocks(_finalize_scan, accumulated, dtype=agg.dtype)
+ result = map_blocks(partial(_finalize_scan, dtype=agg.dtype), accumulated, dtype=agg.dtype)
assert result.chunks == array.chunks
=====================================
flox/dask_array_ops.py
=====================================
@@ -1,6 +1,6 @@
import builtins
import math
-from functools import partial
+from functools import lru_cache, partial
from itertools import product
from numbers import Integral
@@ -84,14 +84,8 @@ def partial_reduce(
axis: tuple[int, ...],
block_index: int | None = None,
):
- numblocks = tuple(len(c) for c in chunks)
- ndim = len(numblocks)
- parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(numblocks)]
- keys = product(*map(range, map(len, parts)))
- out_chunks = [
- tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c
- for (i, c) in enumerate(chunks)
- ]
+ ndim = len(chunks)
+ keys, parts, out_chunks = get_parts(tuple(split_every.items()), chunks)
for k, p in zip(keys, product(*parts)):
free = {i: j[0] for (i, j) in enumerate(p) if len(j) == 1 and i not in split_every}
dummy = dict(i for i in enumerate(p) if i[0] in split_every)
@@ -101,3 +95,17 @@ def partial_reduce(
k = (*k[:-1], block_index)
dsk[(name,) + k] = (func, g)
return dsk, out_chunks
+
+
+ at lru_cache
+def get_parts(split_every_items, chunks):
+ numblocks = tuple(len(c) for c in chunks)
+ split_every = dict(split_every_items)
+
+ parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(numblocks)]
+ keys = tuple(product(*map(range, map(len, parts))))
+ out_chunks = tuple(
+ tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c
+ for (i, c) in enumerate(chunks)
+ )
+ return keys, parts, out_chunks
=====================================
flox/xarray.py
=====================================
@@ -468,9 +468,9 @@ def xarray_reduce(
):
levelnames = ds_broad.indexes[name].names
if isinstance(expect3, np.ndarray):
- # TODO: workaoround for IntervalIndex issue.
+ # TODO: workaround for IntervalIndex issue.
raise NotImplementedError
- expect3 = pd.MultiIndex.from_tuples(expect3.values, names=levelnames)
+ expect3 = pd.MultiIndex.from_tuples(expect3.values.tolist(), names=levelnames)
actual[name] = expect3
if Version(xr.__version__) > Version("2022.03.0"):
actual = actual.set_coords(levelnames)
=====================================
pyproject.toml
=====================================
@@ -14,6 +14,7 @@ classifiers = [
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
]
dependencies = [
"pandas>=1.5",
=====================================
readthedocs.yml deleted
=====================================
@@ -1,15 +0,0 @@
-version: 2
-
-sphinx:
- # Path to your Sphinx configuration file.
- configuration: docs/source/conf.py
-
-build:
- os: "ubuntu-lts-latest"
- tools:
- python: "mambaforge-latest"
-
-conda:
- environment: ci/docs.yml
-
-formats: []
=====================================
tests/test_core.py
=====================================
@@ -5,7 +5,7 @@ import logging
import warnings
from collections.abc import Callable
from functools import partial, reduce
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
from unittest.mock import MagicMock, patch
import numpy as np
@@ -1538,7 +1538,7 @@ def test_normalize_block_indexing_1d(flatblocks, expected):
nblocks = 5
array = dask.array.ones((nblocks,), chunks=(1,))
expected = tuple(np.array(i) if isinstance(i, list) else i for i in expected)
- actual = _normalize_indexes(array, flatblocks, array.blocks.shape)
+ actual = _normalize_indexes(array.ndim, flatblocks, array.blocks.shape)
assert_equal_tuple(expected, actual)
@@ -1550,17 +1550,17 @@ def test_normalize_block_indexing_1d(flatblocks, expected):
((1, 2, 3), (0, slice(1, 4))),
((1, 3), (0, [1, 3])),
((0, 1, 3), (0, [0, 1, 3])),
- (tuple(range(10)), (slice(0, 2), slice(None))),
- ((0, 1, 3, 5, 6, 8), (slice(0, 2), [0, 1, 3])),
+ (tuple(range(10)), (slice(None, 2), slice(None))),
+ ((0, 1, 3, 5, 6, 8), (slice(None, 2), [0, 1, 3])),
((0, 3, 4, 5, 6, 8, 24), np.ix_([0, 1, 4], [0, 1, 3, 4])),
),
)
-def test_normalize_block_indexing_2d(flatblocks, expected):
+def test_normalize_block_indexing_2d(flatblocks: tuple[int, ...], expected: tuple[Any, ...]) -> None:
nblocks = 5
ndim = 2
array = dask.array.ones((nblocks,) * ndim, chunks=(1,) * ndim)
expected = tuple(np.array(i) if isinstance(i, list) else i for i in expected)
- actual = _normalize_indexes(array, flatblocks, array.blocks.shape)
+ actual = _normalize_indexes(array.ndim, flatblocks, array.blocks.shape)
assert_equal_tuple(expected, actual)
@@ -1749,15 +1749,15 @@ def test_validate_reindex() -> None:
@requires_dask
-def test_1d_blockwise_sort_optimization():
+def test_1d_blockwise_sort_optimization() -> None:
# Make sure for resampling problems sorting isn't done.
time = pd.Series(pd.date_range("2020-09-01", "2020-12-31 23:59", freq="3h"))
array = dask.array.ones((len(time),), chunks=(224,))
- actual, _ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count")
+ actual, *_ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count")
assert all("getitem" not in k for k in actual.dask)
- actual, _ = groupby_reduce(
+ actual, *_ = groupby_reduce(
array,
time.dt.dayofyear.values[::-1],
sort=True,
@@ -1766,7 +1766,7 @@ def test_1d_blockwise_sort_optimization():
)
assert any("getitem" in k for k in actual.dask.layers)
- actual, _ = groupby_reduce(
+ actual, *_ = groupby_reduce(
array,
time.dt.dayofyear.values[::-1],
sort=False,
@@ -1777,7 +1777,7 @@ def test_1d_blockwise_sort_optimization():
@requires_dask
-def test_negative_index_factorize_race_condition():
+def test_negative_index_factorize_race_condition() -> None:
# shape = (10, 2000)
# chunks = ((shape[0]-1,1), 10)
shape = (101, 174000)
@@ -1804,17 +1804,17 @@ def test_negative_index_factorize_race_condition():
@pytest.mark.parametrize("sort", [True, False])
-def test_expected_index_conversion_passthrough_range_index(sort):
+def test_expected_index_conversion_passthrough_range_index(sort) -> None:
index = pd.RangeIndex(100)
- actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,))
+ actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,)) # type: ignore[call-overload]
assert actual[0] is index
-def test_method_check_numpy():
+def test_method_check_numpy() -> None:
bins = [-2, -1, 0, 1, 2]
field = np.ones((5, 3))
by = np.array([[-1.5, -1.5, 0.5, 1.5, 1.5] * 3]).reshape(5, 3)
- actual, _ = groupby_reduce(
+ actual, *_ = groupby_reduce(
field,
by,
expected_groups=pd.IntervalIndex.from_breaks(bins),
@@ -1825,7 +1825,7 @@ def test_method_check_numpy():
expected = np.array([6, np.nan, 3, 6])
assert_equal(actual, expected)
- actual, _ = groupby_reduce(
+ actual, *_ = groupby_reduce(
field,
by,
expected_groups=pd.IntervalIndex.from_breaks(bins),
@@ -1845,7 +1845,7 @@ def test_method_check_numpy():
@pytest.mark.parametrize("dtype", [None, np.float64])
-def test_choose_engine(dtype):
+def test_choose_engine(dtype) -> None:
numbagg_possible = HAS_NUMBAGG and dtype is None
default = "numbagg" if numbagg_possible else "numpy"
mean = _initialize_aggregation(
@@ -1887,10 +1887,10 @@ def test_choose_engine(dtype):
assert _choose_engine(np.array([1, 1, 2, 2]), agg=argmax) == "numpy"
-def test_xarray_fill_value_behaviour():
+def test_xarray_fill_value_behaviour() -> None:
bar = np.array([1, 2, 3, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan])
times = np.arange(0, 20, 2)
- actual, _ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),))
+ actual, *_ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),))
nan = np.nan
# fmt: off
expected = np.array(
@@ -1905,7 +1905,7 @@ def test_xarray_fill_value_behaviour():
@pytest.mark.parametrize("func", ["nanquantile", "quantile"])
@pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False])
@pytest.mark.parametrize("by_ndim", [1, 2])
-def test_multiple_quantiles(q, chunk, func, by_ndim):
+def test_multiple_quantiles(q, chunk, func, by_ndim) -> None:
array = np.array([[1, -1, np.nan, 3, 4, 10, 5], [1, np.nan, np.nan, 3, 4, np.nan, np.nan]])
labels = np.array([0, 0, 0, 1, 0, 1, 1])
if by_ndim == 2:
@@ -1916,11 +1916,11 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
if chunk:
array = dask.array.from_array(array, chunks=(1,) + (-1,) * by_ndim)
- actual, _ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis)
+ actual, *_ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis)
sorted_array = array[..., [0, 1, 2, 4, 3, 5, 6]]
f = partial(getattr(np, func), q=q, axis=axis, keepdims=True)
if chunk:
- sorted_array = sorted_array.compute()
+ sorted_array = sorted_array.compute() # type: ignore[attr-defined]
expected = np.concatenate((f(sorted_array[..., :4]), f(sorted_array[..., 4:])), axis=-1)
if by_ndim == 2:
expected = expected.squeeze(axis=-2)
@@ -1928,7 +1928,7 @@ def test_multiple_quantiles(q, chunk, func, by_ndim):
@pytest.mark.parametrize("dtype", ["U3", "S3"])
-def test_nanlen_string(dtype, engine):
+def test_nanlen_string(dtype, engine) -> None:
array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype)
by = np.array([0, 0, 1, 2, 1, 0])
expected = np.array([3, 2, 1], dtype=np.intp)
@@ -1936,18 +1936,17 @@ def test_nanlen_string(dtype, engine):
assert_equal(expected, actual)
-def test_cumusm():
+def test_cumusm() -> None:
array = np.array([1, 1, 1], dtype=np.uint64)
by = np.array([0] * array.shape[-1])
- kwargs = {"func": "nancumsum", "axis": -1}
expected = np.nancumsum(array, axis=-1)
- actual = groupby_scan(array, by, **kwargs)
+ actual = groupby_scan(array, by, func="nancumsum", axis=-1)
assert_equal(expected, actual)
if has_dask:
da = dask.array.from_array(array, chunks=2)
- actual = groupby_scan(da, by, **kwargs)
+ actual = groupby_scan(da, by, func="nancumsum", axis=-1)
assert_equal(expected, actual)
@@ -1962,7 +1961,7 @@ def test_cumusm():
@pytest.mark.parametrize("size", ((1, 12), (12,), (12, 9)))
@pytest.mark.parametrize("add_nan_by", [True, False])
@pytest.mark.parametrize("func", ["ffill", "bfill"])
-def test_ffill_bfill(chunks, size, add_nan_by, func):
+def test_ffill_bfill(chunks, size, add_nan_by, func) -> None:
array, by = gen_array_by(size, func)
if chunks:
array = dask.array.from_array(array, chunks=chunks)
@@ -1976,11 +1975,11 @@ def test_ffill_bfill(chunks, size, add_nan_by, func):
@requires_dask
-def test_blockwise_nans():
+def test_blockwise_nans() -> None:
array = dask.array.ones((1, 10), chunks=2)
by = np.array([-1, 0, -1, 1, -1, 2, -1, 3, 4, 4])
- actual, actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5))
- expected, expected_groups = flox.groupby_reduce(
+ actual, *actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5))
+ expected, *expected_groups = flox.groupby_reduce(
array.compute(), by, func="sum", expected_groups=pd.RangeIndex(0, 5)
)
assert_equal(expected_groups, actual_groups)
@@ -1989,11 +1988,11 @@ def test_blockwise_nans():
@pytest.mark.parametrize("func", ["sum", "prod", "count", "nansum"])
@pytest.mark.parametrize("engine", ["flox", "numpy"])
-def test_agg_dtypes(func, engine):
+def test_agg_dtypes(func, engine) -> None:
# regression test for GH388
counts = np.array([0, 2, 1, 0, 1])
group = np.array([1, 1, 1, 2, 2])
- actual, _ = groupby_reduce(
+ actual, *_ = groupby_reduce(
counts, group, expected_groups=(np.array([1, 2]),), func=func, dtype="uint8", engine=engine
)
expected = _get_array_func(func)(counts, dtype="uint8")
@@ -2001,38 +2000,56 @@ def test_agg_dtypes(func, engine):
@requires_dask
-def test_blockwise_avoid_rechunk():
+def test_blockwise_avoid_rechunk() -> None:
array = dask.array.zeros((6,), chunks=(2, 4), dtype=np.int64)
by = np.array(["1", "1", "0", "", "0", ""], dtype="<U1")
- actual, groups = groupby_reduce(array, by, func="first")
- assert_equal(groups, ["", "0", "1"])
+ actual, *groups = groupby_reduce(array, by, func="first")
+ assert_equal(groups, [["", "0", "1"]])
assert_equal(actual, np.array([0, 0, 0], dtype=np.int64))
-def test_datetime_minmax(engine):
+def test_datetime_minmax(engine) -> None:
# GH403
array = np.array([np.datetime64("2000-01-01"), np.datetime64("2000-01-02"), np.datetime64("2000-01-03")])
by = np.array([0, 0, 1])
- actual, _ = flox.groupby_reduce(array, by, func="nanmin", engine=engine)
+ actual, *_ = flox.groupby_reduce(array, by, func="nanmin", engine=engine)
expected = array[[0, 2]]
assert_equal(expected, actual)
expected = array[[1, 2]]
- actual, _ = flox.groupby_reduce(array, by, func="nanmax", engine=engine)
+ actual, *_ = flox.groupby_reduce(array, by, func="nanmax", engine=engine)
assert_equal(expected, actual)
@pytest.mark.parametrize("func", ["first", "last", "nanfirst", "nanlast"])
-def test_datetime_timedelta_first_last(engine, func):
+def test_datetime_timedelta_first_last(engine, func) -> None:
import flox
idx = 0 if "first" in func else -1
+ idx1 = 2 if "first" in func else -1
+ ## datetime
dt = pd.date_range("2001-01-01", freq="d", periods=5).values
by = np.ones(dt.shape, dtype=int)
- actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
+ actual, *_ = flox.groupby_reduce(dt, by, func=func, engine=engine)
assert_equal(actual, dt[[idx]])
+ # missing group
+ by = np.array([0, 2, 3, 3, 3])
+ actual, *_ = flox.groupby_reduce(
+ dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA
+ )
+ assert_equal(actual, [dt[0], np.datetime64("NaT"), dt[1], dt[idx1]])
+
+ ## timedelta
dt = dt - dt[0]
- actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
+ by = np.ones(dt.shape, dtype=int)
+ actual, *_ = flox.groupby_reduce(dt, by, func=func, engine=engine)
assert_equal(actual, dt[[idx]])
+
+ # missing group
+ by = np.array([0, 2, 3, 3, 3])
+ actual, *_ = flox.groupby_reduce(
+ dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA
+ )
+ assert_equal(actual, [dt[0], np.timedelta64("NaT"), dt[1], dt[idx1]])
=====================================
tests/test_xarray.py
=====================================
@@ -6,6 +6,7 @@ import pytest
xr = pytest.importorskip("xarray")
# isort: on
+from flox import xrdtypes as dtypes
from flox.xarray import rechunk_for_blockwise, xarray_reduce
from . import (
@@ -193,13 +194,25 @@ def test_validate_expected_groups(expected_groups):
@requires_cftime
+ at pytest.mark.parametrize("indexer", [slice(None), pytest.param(slice(12), id="missing-group")])
+ at pytest.mark.parametrize("expected_groups", [None, [0, 1, 2, 3]])
@pytest.mark.parametrize("func", ["first", "last", "min", "max", "count"])
-def test_xarray_reduce_cftime_var(engine, func):
+def test_xarray_reduce_cftime_var(engine, indexer, expected_groups, func):
times = xr.date_range("1980-09-01 00:00", "1982-09-18 00:00", freq="ME", calendar="noleap")
ds = xr.Dataset({"var": ("time", times)}, coords={"time": np.repeat(np.arange(4), 6)})
+ ds = ds.isel(time=indexer)
- actual = xarray_reduce(ds, ds.time, func=func)
+ actual = xarray_reduce(
+ ds,
+ ds.time,
+ func=func,
+ fill_value=dtypes.NA if func in ["first", "last"] else np.nan,
+ engine=engine,
+ expected_groups=expected_groups,
+ )
expected = getattr(ds.groupby("time"), func)()
+ if expected_groups is not None:
+ expected = expected.reindex(time=expected_groups)
xr.testing.assert_identical(actual, expected)
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/788f62ec4113eb26e0f60f468ff1a8f82b01eaa9
--
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/788f62ec4113eb26e0f60f468ff1a8f82b01eaa9
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20250325/4932aab2/attachment-0001.htm>
More information about the Pkg-grass-devel
mailing list