[Git][debian-gis-team/flox][upstream] New upstream version 0.8.7
Antonio Valentino (@antonio.valentino)
gitlab at salsa.debian.org
Sat Jan 13 15:20:31 GMT 2024
Antonio Valentino pushed to branch upstream at Debian GIS Project / flox
Commits:
ec9de155 by Antonio Valentino at 2024-01-12T07:44:59+00:00
New upstream version 0.8.7
- - - - -
4 changed files:
- flox/aggregations.py
- flox/core.py
- flox/xrutils.py
- tests/test_core.py
Changes:
=====================================
flox/aggregations.py
=====================================
@@ -133,9 +133,10 @@ def _get_fill_value(dtype, fill_value):
return fill_value
-def _atleast_1d(inp):
+def _atleast_1d(inp, min_length: int = 1):
if xrutils.is_scalar(inp):
- inp = (inp,)
+ inp = (inp,) * min_length
+ assert len(inp) >= min_length
return inp
=====================================
flox/core.py
=====================================
@@ -340,9 +340,10 @@ def find_group_cohorts(labels, chunks, expected_groups: None | pd.RangeIndex = N
# TODO: we can optimize this to loop over chunk_cohorts instead
# by zeroing out rows that are already in a cohort
for rowidx in order:
- cohort_ = containment.indices[
+ cohidx = containment.indices[
slice(containment.indptr[rowidx], containment.indptr[rowidx + 1])
]
+ cohort_ = present_labels[cohidx]
cohort = [elem for elem in cohort_ if elem not in merged_keys]
if not cohort:
continue
@@ -803,29 +804,11 @@ def chunk_reduce(
dict
"""
- if not (isinstance(func, str) or callable(func)):
- funcs = func
- else:
- funcs = (func,)
+ funcs = _atleast_1d(func)
nfuncs = len(funcs)
-
- if isinstance(dtype, Sequence):
- dtypes = dtype
- else:
- dtypes = (dtype,) * nfuncs
- assert len(dtypes) >= nfuncs
-
- if isinstance(fill_value, Sequence):
- fill_values = fill_value
- else:
- fill_values = (fill_value,) * nfuncs
- assert len(fill_values) >= nfuncs
-
- if isinstance(kwargs, Sequence):
- kwargss = kwargs
- else:
- kwargss = ({},) * nfuncs
- assert len(kwargss) >= nfuncs
+ dtypes = _atleast_1d(dtype, nfuncs)
+ fill_values = _atleast_1d(fill_value, nfuncs)
+ kwargss = _atleast_1d({}, nfuncs) if kwargs is None else kwargs
if isinstance(axis, Sequence):
axes: T_Axes = axis
@@ -862,7 +845,8 @@ def chunk_reduce(
# do this *before* possible broadcasting below.
# factorize_ has already taken care of offsetting
- seen_groups = _unique(group_idx)
+ if engine == "numbagg":
+ seen_groups = _unique(group_idx)
order = "C"
if nax > 1:
@@ -1551,12 +1535,9 @@ def dask_groupby_agg(
groups = _extract_unknown_groups(reduced, dtype=by.dtype)
group_chunks = ((np.nan,),)
else:
- if expected_groups is None:
- expected_groups_ = _get_expected_groups(by_input, sort=sort)
- else:
- expected_groups_ = expected_groups
- groups = (expected_groups_.to_numpy(),)
- group_chunks = ((len(expected_groups_),),)
+ assert expected_groups is not None
+ groups = (expected_groups.to_numpy(),)
+ group_chunks = ((len(expected_groups),),)
elif method == "cohorts":
chunks_cohorts = find_group_cohorts(
@@ -2063,10 +2044,7 @@ def groupby_reduce(
is_bool_array = np.issubdtype(array.dtype, bool)
array = array.astype(int) if is_bool_array else array
- if isinstance(isbin, Sequence):
- isbins = isbin
- else:
- isbins = (isbin,) * nby
+ isbins = _atleast_1d(isbin, nby)
_assert_by_is_aligned(array.shape, bys)
=====================================
flox/xrutils.py
=====================================
@@ -84,7 +84,7 @@ class ReprObject:
def is_scalar(value: Any, include_0d: bool = True) -> bool:
"""Whether to treat a value as a scalar.
- Any non-iterable, string, or 0-D array
+ Any non-iterable, string, dict, or 0-D array
"""
NON_NUMPY_SUPPORTED_ARRAY_TYPES = (dask_array_type, pd.Index)
@@ -92,7 +92,7 @@ def is_scalar(value: Any, include_0d: bool = True) -> bool:
include_0d = getattr(value, "ndim", None) == 0
return (
include_0d
- or isinstance(value, (str, bytes))
+ or isinstance(value, (str, bytes, dict))
or not (
isinstance(value, (Iterable,) + NON_NUMPY_SUPPORTED_ARRAY_TYPES)
or hasattr(value, "__array_function__")
=====================================
tests/test_core.py
=====================================
@@ -857,6 +857,16 @@ def test_find_group_cohorts(expected, labels, chunks: tuple[int]) -> None:
assert actual == expected, (actual, expected)
+ at requires_dask
+def test_find_cohorts_missing_groups():
+ by = np.array([np.nan, np.nan, np.nan, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, np.nan, np.nan])
+ kwargs = {"func": "sum", "expected_groups": [0, 1, 2], "fill_value": 123}
+ array = dask.array.ones_like(by, chunks=(3,))
+ actual, _ = groupby_reduce(array, by, method="cohorts", **kwargs)
+ expected, _ = groupby_reduce(array.compute(), by, **kwargs)
+ assert_equal(expected, actual)
+
+
@pytest.mark.parametrize("chunksize", [12, 13, 14, 24, 36, 48, 72, 71])
def test_verify_complex_cohorts(chunksize: int) -> None:
time = pd.Series(pd.date_range("2016-01-01", "2018-12-31 23:59", freq="H"))
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/ec9de1554032f46abe31c0982256a72ba79869aa
--
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/ec9de1554032f46abe31c0982256a72ba79869aa
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20240113/9594ca84/attachment-0001.htm>
More information about the Pkg-grass-devel
mailing list