[Git][debian-gis-team/flox][upstream] New upstream version 0.9.14
Antonio Valentino (@antonio.valentino)
gitlab at salsa.debian.org
Sat Nov 9 16:49:43 GMT 2024
Antonio Valentino pushed to branch upstream at Debian GIS Project / flox
Commits:
7325aff5 by Antonio Valentino at 2024-11-09T16:42:47+00:00
New upstream version 0.9.14
- - - - -
15 changed files:
- .github/workflows/benchmarks.yml
- .github/workflows/ci-additional.yaml
- .github/workflows/ci.yaml
- .github/workflows/upstream-dev-ci.yaml
- .pre-commit-config.yaml
- ci/env-numpy1.yml
- ci/environment.yml
- ci/no-dask.yml
- ci/no-numba.yml
- flox/aggregate_numbagg.py
- flox/aggregations.py
- flox/core.py
- flox/xarray.py
- flox/xrutils.py
- tests/test_core.py
Changes:
=====================================
.github/workflows/benchmarks.yml
=====================================
@@ -22,7 +22,7 @@ jobs:
fetch-depth: 0
- name: Set up conda environment
- uses: mamba-org/setup-micromamba at v1
+ uses: mamba-org/setup-micromamba at v2
with:
environment-name: flox-bench
create-args: >-
=====================================
.github/workflows/ci-additional.yaml
=====================================
@@ -53,7 +53,7 @@ jobs:
echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Setup micromamba
- uses: mamba-org/setup-micromamba at v1
+ uses: mamba-org/setup-micromamba at v2
with:
environment-file: ${{env.CONDA_ENV_FILE}}
environment-name: flox-tests
@@ -77,7 +77,7 @@ jobs:
--ignore flox/tests \
--cov=./ --cov-report=xml
- name: Upload code coverage to Codecov
- uses: codecov/codecov-action at v4.5.0
+ uses: codecov/codecov-action at v4.6.0
with:
file: ./coverage.xml
flags: unittests
@@ -106,7 +106,7 @@ jobs:
run: |
echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Setup micromamba
- uses: mamba-org/setup-micromamba at v1
+ uses: mamba-org/setup-micromamba at v2
with:
environment-file: ${{env.CONDA_ENV_FILE}}
environment-name: flox-tests
@@ -132,7 +132,7 @@ jobs:
python -m mypy --install-types --non-interactive --cache-dir=.mypy_cache/ --cobertura-xml-report mypy_report
- name: Upload mypy coverage to Codecov
- uses: codecov/codecov-action at v4.5.0
+ uses: codecov/codecov-action at v4.6.0
with:
file: mypy_report/cobertura.xml
flags: mypy
=====================================
.github/workflows/ci.yaml
=====================================
@@ -48,7 +48,7 @@ jobs:
run: |
echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV
- name: Set up conda environment
- uses: mamba-org/setup-micromamba at v1
+ uses: mamba-org/setup-micromamba at v2
with:
environment-file: ci/${{ matrix.env }}.yml
environment-name: flox-tests
@@ -76,7 +76,7 @@ jobs:
python -c "import xarray; xarray.show_versions()"
pytest --durations=20 --durations-min=0.5 -n auto --cov=./ --cov-report=xml --hypothesis-profile ci
- name: Upload code coverage to Codecov
- uses: codecov/codecov-action at v4.5.0
+ uses: codecov/codecov-action at v4.6.0
with:
file: ./coverage.xml
flags: unittests
@@ -102,10 +102,10 @@ jobs:
steps:
- uses: actions/checkout at v4
with:
- repository: "dcherian/xarray"
+ repository: "pydata/xarray"
fetch-depth: 0 # Fetch all history for all branches and tags.
- name: Set up conda environment
- uses: mamba-org/setup-micromamba at v1
+ uses: mamba-org/setup-micromamba at v2
with:
environment-file: ci/requirements/environment.yml
environment-name: xarray-tests
@@ -116,7 +116,6 @@ jobs:
pint>=0.22
- name: Install xarray
run: |
- git checkout flox-preserve-dtype
python -m pip install --no-deps .
- name: Install upstream flox
run: |
=====================================
.github/workflows/upstream-dev-ci.yaml
=====================================
@@ -43,7 +43,7 @@ jobs:
run: |
echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV
- name: Set up conda environment
- uses: mamba-org/setup-micromamba at v1
+ uses: mamba-org/setup-micromamba at v2
with:
environment-name: flox-tests
init-shell: bash
=====================================
.pre-commit-config.yaml
=====================================
@@ -4,7 +4,7 @@ ci:
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
- rev: "v0.6.4"
+ rev: "v0.6.9"
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
@@ -16,7 +16,7 @@ repos:
- id: prettier
- repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.6.0
+ rev: v5.0.0
hooks:
- id: check-yaml
- id: trailing-whitespace
@@ -45,12 +45,12 @@ repos:
- tomli
- repo: https://github.com/abravalheri/validate-pyproject
- rev: v0.19
+ rev: v0.20.2
hooks:
- id: validate-pyproject
- repo: https://github.com/rhysd/actionlint
- rev: v1.7.1
+ rev: v1.7.3
hooks:
- id: actionlint
files: ".github/workflows/"
=====================================
ci/env-numpy1.yml
=====================================
@@ -27,4 +27,4 @@ dependencies:
- numbagg>=0.3
- hypothesis
- pip:
- - git+https://github.com/dcherian/xarray.git@flox-preserve-dtype
+ - git+https://github.com/dcherian/xarray.git
=====================================
ci/environment.yml
=====================================
@@ -27,4 +27,4 @@ dependencies:
- numbagg>=0.3
- hypothesis
- pip:
- - git+https://github.com/dcherian/xarray.git@flox-preserve-dtype
+ - git+https://github.com/dcherian/xarray.git
=====================================
ci/no-dask.yml
=====================================
@@ -22,4 +22,4 @@ dependencies:
- numba
- numbagg>=0.3
- pip:
- - git+https://github.com/dcherian/xarray.git@flox-preserve-dtype
+ - git+https://github.com/dcherian/xarray.git
=====================================
ci/no-numba.yml
=====================================
@@ -19,8 +19,9 @@ dependencies:
- pytest-pretty
- pytest-xdist
- syrupy
- - xarray
- pre-commit
- numpy_groupies>=0.9.19
- pooch
- toolz
+ - pip:
+ - git+https://github.com/dcherian/xarray.git
=====================================
flox/aggregate_numbagg.py
=====================================
@@ -30,6 +30,8 @@ CAST_TO = {
"nanmean": {np.int_: np.float64},
"nanvar": {np.int_: np.float64},
"nanstd": {np.int_: np.float64},
+ "nanfirst": {np.datetime64: np.int64, np.timedelta64: np.int64},
+ "nanlast": {np.datetime64: np.int64, np.timedelta64: np.int64},
}
@@ -51,7 +53,7 @@ def _numbagg_wrapper(
if cast_to:
for from_, to_ in cast_to.items():
if np.issubdtype(array.dtype, from_):
- array = array.astype(to_)
+ array = array.astype(to_, copy=False)
func_ = getattr(numbagg.grouped, f"group_{func}")
=====================================
flox/aggregations.py
=====================================
@@ -149,9 +149,9 @@ class Dim:
class Aggregation:
def __init__(
self,
- name,
+ name: str,
*,
- numpy: str | FuncTuple | None = None,
+ numpy: str | None = None,
chunk: str | FuncTuple | None,
combine: str | FuncTuple | None,
preprocess: Callable | None = None,
@@ -217,7 +217,7 @@ class Aggregation:
self.preprocess = preprocess
# Use "chunk_reduce" or "chunk_argreduce"
self.reduction_type = reduction_type
- self.numpy: FuncTuple = (numpy,) if numpy else (self.name,)
+ self.numpy: FuncTuple = (numpy,) if numpy is not None else (self.name,)
# initialize blockwise reduction
self.chunk: OptionalFuncTuple = _atleast_1d(chunk)
# how to aggregate results after first round of reduction
=====================================
flox/core.py
=====================================
@@ -45,6 +45,10 @@ from .aggregations import (
)
from .cache import memoize
from .xrutils import (
+ _contains_cftime_datetimes,
+ _datetime_nanmin,
+ _to_pytimedelta,
+ datetime_to_numeric,
is_chunked_array,
is_duck_array,
is_duck_cubed_array,
@@ -2473,7 +2477,8 @@ def groupby_reduce(
has_dask = is_duck_dask_array(array) or is_duck_dask_array(by_)
has_cubed = is_duck_cubed_array(array) or is_duck_cubed_array(by_)
- if _is_first_last_reduction(func):
+ is_first_last = _is_first_last_reduction(func)
+ if is_first_last:
if has_dask and nax != 1:
raise ValueError(
"For dask arrays: first, last, nanfirst, nanlast reductions are "
@@ -2486,6 +2491,24 @@ def groupby_reduce(
"along a single axis or when reducing across all dimensions of `by`."
)
+ # Flox's count works with non-numeric and its faster than converting.
+ is_npdatetime = array.dtype.kind in "Mm"
+ is_cftime = _contains_cftime_datetimes(array)
+ requires_numeric = (
+ (func not in ["count", "any", "all"] and not is_first_last)
+ or (func == "count" and engine != "flox")
+ or (is_first_last and is_cftime)
+ )
+ if requires_numeric:
+ if is_npdatetime:
+ offset = _datetime_nanmin(array)
+ # xarray always uses np.datetime64[ns] for np.datetime64 data
+ dtype = "timedelta64[ns]"
+ array = datetime_to_numeric(array, offset)
+ elif is_cftime:
+ offset = array.min()
+ array = datetime_to_numeric(array, offset, datetime_unit="us")
+
if nax == 1 and by_.ndim > 1 and expected_ is None:
# When we reduce along all axes, we are guaranteed to see all
# groups in the final combine stage, so everything works.
@@ -2671,6 +2694,14 @@ def groupby_reduce(
if is_bool_array and (_is_minmax_reduction(func) or _is_first_last_reduction(func)):
result = result.astype(bool)
+
+ # Output of count has an int dtype.
+ if requires_numeric and func != "count":
+ if is_npdatetime:
+ return result.astype(dtype) + offset
+ elif is_cftime:
+ return _to_pytimedelta(result, unit="us") + offset
+
return (result, *groups)
=====================================
flox/xarray.py
=====================================
@@ -7,7 +7,6 @@ import numpy as np
import pandas as pd
import xarray as xr
from packaging.version import Version
-from xarray.core.duck_array_ops import _datetime_nanmin
from .aggregations import Aggregation, Dim, _atleast_1d, quantile_new_dims_func
from .core import (
@@ -18,7 +17,6 @@ from .core import (
)
from .core import rechunk_for_blockwise as rechunk_array_for_blockwise
from .core import rechunk_for_cohorts as rechunk_array_for_cohorts
-from .xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric
if TYPE_CHECKING:
from xarray.core.types import T_DataArray, T_Dataset
@@ -366,22 +364,6 @@ def xarray_reduce(
if "nan" not in func and func not in ["all", "any", "count"]:
func = f"nan{func}"
- # Flox's count works with non-numeric and its faster than converting.
- requires_numeric = func not in ["count", "any", "all"] or (
- func == "count" and kwargs["engine"] != "flox"
- )
- if requires_numeric:
- is_npdatetime = array.dtype.kind in "Mm"
- is_cftime = _contains_cftime_datetimes(array)
- if is_npdatetime:
- offset = _datetime_nanmin(array)
- # xarray always uses np.datetime64[ns] for np.datetime64 data
- dtype = "timedelta64[ns]"
- array = datetime_to_numeric(array, offset)
- elif is_cftime:
- offset = array.min()
- array = datetime_to_numeric(array, offset, datetime_unit="us")
-
result, *groups = groupby_reduce(array, *by, func=func, **kwargs)
# Transpose the new quantile dimension to the end. This is ugly.
@@ -395,13 +377,6 @@ def xarray_reduce(
# output dim order: (*broadcast_dims, *group_dims, quantile_dim)
result = np.moveaxis(result, 0, -1)
- # Output of count has an int dtype.
- if requires_numeric and func != "count":
- if is_npdatetime:
- return result.astype(dtype) + offset
- elif is_cftime:
- return _to_pytimedelta(result, unit="us") + offset
-
return result
# These data variables do not have any of the core dimension,
=====================================
flox/xrutils.py
=====================================
@@ -345,6 +345,28 @@ def _contains_cftime_datetimes(array) -> bool:
return False
+def _datetime_nanmin(array):
+ """nanmin() function for datetime64.
+
+ Caveats that this function deals with:
+
+ - In numpy < 1.18, min() on datetime64 incorrectly ignores NaT
+ - numpy nanmin() don't work on datetime64 (all versions at the moment of writing)
+ - dask min() does not work on datetime64 (all versions at the moment of writing)
+ """
+ from .xrdtypes import is_datetime_like
+
+ dtype = array.dtype
+ assert is_datetime_like(dtype)
+ # (NaT).astype(float) does not produce NaN...
+ array = np.where(pd.isnull(array), np.nan, array.astype(float))
+ array = min(array, skipna=True)
+ if isinstance(array, float):
+ array = np.array(array)
+ # ...but (NaN).astype("M8") does produce NaT
+ return array.astype(dtype)
+
+
def _select_along_axis(values, idx, axis):
other_ind = np.ix_(*[np.arange(s) for s in idx.shape])
sl = other_ind[:axis] + (idx,) + other_ind[axis:]
=====================================
tests/test_core.py
=====================================
@@ -2006,3 +2006,19 @@ def test_blockwise_avoid_rechunk():
actual, groups = groupby_reduce(array, by, func="first")
assert_equal(groups, ["", "0", "1"])
assert_equal(actual, np.array([0, 0, 0], dtype=np.int64))
+
+
+ at pytest.mark.parametrize("func", ["first", "last", "nanfirst", "nanlast"])
+def test_datetime_timedelta_first_last(engine, func):
+ import flox
+
+ idx = 0 if "first" in func else -1
+
+ dt = pd.date_range("2001-01-01", freq="d", periods=5).values
+ by = np.ones(dt.shape, dtype=int)
+ actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
+ assert_equal(actual, dt[[idx]])
+
+ dt = dt - dt[0]
+ actual, _ = flox.groupby_reduce(dt, by, func=func, engine=engine)
+ assert_equal(actual, dt[[idx]])
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/7325aff589f4dee162bdc519417396071a05d069
--
View it on GitLab: https://salsa.debian.org/debian-gis-team/flox/-/commit/7325aff589f4dee162bdc519417396071a05d069
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/pkg-grass-devel/attachments/20241109/ad554053/attachment-0001.htm>
More information about the Pkg-grass-devel
mailing list