[Git][debian-gis-team/flox][master] 4 commits: New upstream version 0.9.13
Antonio Valentino (@antonio.valentino)
gitlab at salsa.debian.org
Sat Sep 21 12:07:08 BST 2024
Antonio Valentino pushed to branch master at Debian GIS Project / flox
Commits:
508e943e by Antonio Valentino at 2024-09-21T10:56:41+00:00
New upstream version 0.9.13
- - - - -
485362ab by Antonio Valentino at 2024-09-21T10:56:44+00:00
Update upstream source from tag 'upstream/0.9.13'
Update to upstream version '0.9.13'
with Debian dir 2a16667ace3056c70faead5389174a8460603cd6
- - - - -
9931d458 by Antonio Valentino at 2024-09-21T10:57:25+00:00
New upstream release
- - - - -
1f968487 by Antonio Valentino at 2024-09-21T10:57:48+00:00
Set distribution to unstable
- - - - -
7 changed files:
- .github/workflows/ci.yaml
- + ci/env-numpy1.yml
- debian/changelog
- flox/core.py
- flox/xrdtypes.py
- tests/conftest.py
- tests/test_core.py
Changes:
=====================================
.github/workflows/ci.yaml
=====================================
@@ -37,6 +37,9 @@ jobs:
- os: "ubuntu-latest"
env: "minimal-requirements"
python-version: "3.10"
+ - os: "windows-latest"
+ env: "env-numpy1"
+ python-version: "3.10"
steps:
- uses: actions/checkout at v4
with:
=====================================
ci/env-numpy1.yml
=====================================
@@ -0,0 +1,30 @@
+name: flox-tests
+channels:
+ - conda-forge
+dependencies:
+ - asv
+ - cachey
+ - cftime
+ - codecov
+ - cubed>=0.14.3
+ - dask-core
+ - pandas
+ - numpy<2
+ - scipy
+ - lxml # for mypy coverage report
+ - matplotlib
+ - pip
+ - pytest
+ - pytest-cov
+ - pytest-pretty
+ - pytest-xdist
+ - syrupy
+ - pre-commit
+ - numpy_groupies>=0.9.19
+ - pooch
+ - toolz
+ - numba
+ - numbagg>=0.3
+ - hypothesis
+ - pip:
+ - git+https://github.com/dcherian/xarray.git@flox-preserve-dtype
=====================================
debian/changelog
=====================================
@@ -1,3 +1,9 @@
+flox (0.9.13-1) unstable; urgency=medium
+
+ * New upstream release.
+
+ -- Antonio Valentino <antonio.valentino at tiscali.it> Sat, 21 Sep 2024 10:57:30 +0000
+
flox (0.9.11-1) unstable; urgency=medium
* New upstream release.
=====================================
flox/core.py
=====================================
@@ -642,6 +642,7 @@ def rechunk_for_blockwise(array: DaskArray, axis: T_Axis, labels: np.ndarray) ->
DaskArray
Rechunked array
"""
+ # TODO: this should be unnecessary?
labels = factorize_((labels,), axes=())[0]
chunks = array.chunks[axis]
newchunks = _get_optimal_chunks_for_groups(chunks, labels)
@@ -1493,8 +1494,9 @@ def _normalize_indexes(array: DaskArray, flatblocks, blkshape) -> tuple:
def subset_to_blocks(
array: DaskArray,
flatblocks: Sequence[int],
- blkshape: tuple[int] | None = None,
+ blkshape: tuple[int, ...] | None = None,
reindexer=identity,
+ chunks_as_array: tuple[np.ndarray, ...] | None = None,
) -> DaskArray:
"""
Advanced indexing of .blocks such that we always get a regular array back.
@@ -1517,6 +1519,9 @@ def subset_to_blocks(
if blkshape is None:
blkshape = array.blocks.shape
+ if chunks_as_array is None:
+ chunks_as_array = tuple(np.array(c) for c in array.chunks)
+
index = _normalize_indexes(array, flatblocks, blkshape)
if all(not isinstance(i, np.ndarray) and i == slice(None) for i in index):
@@ -1530,7 +1535,7 @@ def subset_to_blocks(
new_keys = array._key_array[index]
squeezed = tuple(np.squeeze(i) if isinstance(i, np.ndarray) else i for i in index)
- chunks = tuple(tuple(np.array(c)[i].tolist()) for c, i in zip(array.chunks, squeezed))
+ chunks = tuple(tuple(c[i].tolist()) for c, i in zip(chunks_as_array, squeezed))
keys = itertools.product(*(range(len(c)) for c in chunks))
layer: Graph = {(name,) + key: (reindexer, tuple(new_keys[key].tolist())) for key in keys}
@@ -1725,6 +1730,7 @@ def dask_groupby_agg(
reduced_ = []
groups_ = []
+ chunks_as_array = tuple(np.array(c) for c in array.chunks)
for blks, cohort in chunks_cohorts.items():
cohort_index = pd.Index(cohort)
reindexer = (
@@ -1732,7 +1738,7 @@ def dask_groupby_agg(
if do_simple_combine
else identity
)
- reindexed = subset_to_blocks(intermediate, blks, block_shape, reindexer)
+ reindexed = subset_to_blocks(intermediate, blks, block_shape, reindexer, chunks_as_array)
# now that we have reindexed, we can set reindex=True explicitlly
reduced_.append(
tree_reduce(
@@ -2418,7 +2424,7 @@ def groupby_reduce(
)
is_bool_array = np.issubdtype(array.dtype, bool)
- array = array.astype(np.intp) if is_bool_array else array
+ array = array.astype(np.int_) if is_bool_array else array
isbins = _atleast_1d(isbin, nby)
@@ -2623,7 +2629,8 @@ def groupby_reduce(
partial_agg = partial(dask_groupby_agg, **kwargs)
- if method == "blockwise" and by_.ndim == 1:
+ # if preferred method is already blockwise, no need to rechunk
+ if preferred_method != "blockwise" and method == "blockwise" and by_.ndim == 1:
array = rechunk_for_blockwise(array, axis=-1, labels=by_)
result, groups = partial_agg(
@@ -2776,7 +2783,7 @@ def groupby_scan(
return array
is_bool_array = np.issubdtype(array.dtype, bool)
- array = array.astype(np.intp) if is_bool_array else array
+ array = array.astype(np.int_) if is_bool_array else array
if expected_groups is not None:
raise NotImplementedError("Setting `expected_groups` and binning is not supported yet.")
@@ -2810,9 +2817,9 @@ def groupby_scan(
# it defaults to the dtype of a, unless a
# has an integer dtype with a precision less than that of the default platform integer.
if array.dtype.kind == "i":
- agg.dtype = np.result_type(array.dtype, np.intp)
+ agg.dtype = np.result_type(array.dtype, np.int_)
elif array.dtype.kind == "u":
- agg.dtype = np.result_type(array.dtype, np.uintp)
+ agg.dtype = np.result_type(array.dtype, np.uint)
else:
agg.dtype = array.dtype if dtype is None else dtype
=====================================
flox/xrdtypes.py
=====================================
@@ -179,9 +179,9 @@ def _maybe_promote_int(dtype) -> np.dtype:
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)
if dtype.kind == "i":
- dtype = np.result_type(dtype, np.intp)
+ dtype = np.result_type(dtype, np.int_)
elif dtype.kind == "u":
- dtype = np.result_type(dtype, np.uintp)
+ dtype = np.result_type(dtype, np.uint)
return dtype
=====================================
tests/conftest.py
=====================================
@@ -12,6 +12,7 @@ settings.register_profile(
settings.register_profile(
"default",
max_examples=300,
+ deadline=500,
suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow],
verbosity=Verbosity.verbose,
)
=====================================
tests/test_core.py
=====================================
@@ -1997,3 +1997,12 @@ def test_agg_dtypes(func, engine):
)
expected = _get_array_func(func)(counts, dtype="uint8")
assert actual.dtype == np.uint8 == expected.dtype
+
+
+ at requires_dask
+def test_blockwise_avoid_rechunk():
+ array = dask.array.zeros((6,), chunks=(2, 4), dtype=np.int64)
+ by = np.array(["1", "1", "0", "", "0", ""], dtype="<U1")
+ actual, groups = groupby_reduce(array, by, func="first")
+ assert_equal(groups, ["", "0", "1"])
+ assert_equal(actual, np.array([0, 0, 0], dtype=np.int64))
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/compare/8f7d5f21175aac8e012cb080872b6b3e0c9cb799...1f9684876534c0728e8aec5dbac7409442dca858
--
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/compare/8f7d5f21175aac8e012cb080872b6b3e0c9cb799...1f9684876534c0728e8aec5dbac7409442dca858
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20240921/515269cc/attachment-0001.htm>
More information about the Pkg-grass-devel
mailing list