[med-svn] [Git][med-team/hdmf][master] 4 commits: New upstream version 3.14.5
Étienne Mollier (@emollier)
gitlab at salsa.debian.org
Sat Nov 2 14:26:06 GMT 2024
Étienne Mollier pushed to branch master at Debian Med / hdmf
Commits:
14ca7830 by Étienne Mollier at 2024-11-02T15:19:36+01:00
New upstream version 3.14.5
- - - - -
b1fd80bc by Étienne Mollier at 2024-11-02T15:20:02+01:00
Update upstream source from tag 'upstream/3.14.5'
Update to upstream version '3.14.5'
with Debian dir 687fac28d2558727f0adb688500da43e3f9fd077
- - - - -
b2c6e1fe by Étienne Mollier at 2024-11-02T15:23:39+01:00
d/s/lintian-overrides: delete: false positive has been fixed.
- - - - -
0536be3b by Étienne Mollier at 2024-11-02T15:25:30+01:00
d/changelog: ready for upload to unstable.
- - - - -
12 changed files:
- CHANGELOG.md
- PKG-INFO
- debian/changelog
- − debian/source/lintian-overrides
- src/hdmf/_version.py
- src/hdmf/backends/hdf5/h5tools.py
- src/hdmf/build/objectmapper.py
- src/hdmf/container.py
- src/hdmf/utils.py
- src/hdmf/validate/validator.py
- tests/unit/build_tests/test_io_map.py
- tests/unit/test_io_hdf5_h5tools.py
Changes:
=====================================
CHANGELOG.md
=====================================
@@ -1,5 +1,16 @@
# HDMF Changelog
+## HDMF 3.14.5 (October 6, 2024)
+
+### Enhancements
+- Added support for overriding backend configurations of `h5py.Dataset` objects in `Container.set_data_io`. @pauladkisson [#1172](https://github.com/hdmf-dev/hdmf/pull/1172)
+
+### Bug fixes
+- Fixed bug in writing of string arrays to an HDF5 file that were read from an HDF5 file that was introduced in 3.14.4. @rly @stephprince
+ [#1189](https://github.com/hdmf-dev/hdmf/pull/1189)
+- Fixed export of scalar datasets with a compound data type. @stephprince [#1185](https://github.com/hdmf-dev/hdmf/pull/1185)
+- Fixed mamba-related error in conda-based GitHub Actions. @rly [#1194](https://github.com/hdmf-dev/hdmf/pull/1194)
+
## HDMF 3.14.4 (September 4, 2024)
### Enhancements
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.3
Name: hdmf
-Version: 3.14.4
+Version: 3.14.5
Summary: A hierarchical data modeling framework for modern science data standards
Project-URL: Homepage, https://github.com/hdmf-dev/hdmf
Project-URL: Bug Tracker, https://github.com/hdmf-dev/hdmf/issues
=====================================
debian/changelog
=====================================
@@ -1,3 +1,10 @@
+hdmf (3.14.5-1) unstable; urgency=medium
+
+ * New upstream version 3.14.5
+ * d/s/lintian-overrides: delete: false positive has been fixed.
+
+ -- Étienne Mollier <emollier at debian.org> Sat, 02 Nov 2024 15:25:18 +0100
+
hdmf (3.14.4-1) unstable; urgency=medium
* New upstream version 3.14.4
=====================================
debian/source/lintian-overrides deleted
=====================================
@@ -1,2 +0,0 @@
-# False positive caused by #1077324.
-uses-deprecated-python-stdlib uu (deprecated in Python 3.11, removed in Python 3.13) [*container.py:*]
=====================================
src/hdmf/_version.py
=====================================
@@ -12,5 +12,5 @@ __version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
-__version__ = version = '3.14.4'
-__version_tuple__ = version_tuple = (3, 14, 4)
+__version__ = version = '3.14.5'
+__version_tuple__ = version_tuple = (3, 14, 5)
=====================================
src/hdmf/backends/hdf5/h5tools.py
=====================================
@@ -700,6 +700,10 @@ class HDF5IO(HDMFIO):
kwargs['dtype'] = d.dtype
elif h5obj.dtype.kind == 'V': # scalar compound data type
kwargs['data'] = np.array(scalar, dtype=h5obj.dtype)
+ cpd_dt = h5obj.dtype
+ ref_cols = [check_dtype(ref=cpd_dt[i]) or check_dtype(vlen=cpd_dt[i]) for i in range(len(cpd_dt))]
+ d = BuilderH5TableDataset(h5obj, self, ref_cols)
+ kwargs['dtype'] = HDF5IO.__compound_dtype_to_list(h5obj.dtype, d.dtype)
else:
kwargs["data"] = scalar
else:
=====================================
src/hdmf/build/objectmapper.py
=====================================
@@ -602,7 +602,10 @@ class ObjectMapper(metaclass=ExtenderMeta):
def __convert_string(self, value, spec):
"""Convert string types to the specified dtype."""
def __apply_string_type(value, string_type):
- if isinstance(value, (list, tuple, np.ndarray, DataIO)):
+ # NOTE: if a user passes a h5py.Dataset that is not wrapped with a hdmf.utils.StrDataset,
+ # then this conversion may not be correct. Users should unpack their string h5py.Datasets
+ # into a numpy array (or wrap them in StrDataset) before passing them to a container object.
+ if hasattr(value, '__iter__') and not isinstance(value, (str, bytes)):
return [__apply_string_type(item, string_type) for item in value]
else:
return string_type(value)
=====================================
src/hdmf/container.py
=====================================
@@ -2,7 +2,7 @@ import types
from abc import abstractmethod
from collections import OrderedDict
from copy import deepcopy
-from typing import Type
+from typing import Type, Optional
from uuid import uuid4
from warnings import warn
import os
@@ -11,7 +11,7 @@ import h5py
import numpy as np
import pandas as pd
-from .data_utils import DataIO, append_data, extend_data
+from .data_utils import DataIO, append_data, extend_data, AbstractDataChunkIterator
from .utils import docval, get_docval, getargs, ExtenderMeta, get_data_shape, popargs, LabelledDict
from .term_set import TermSet, TermSetWrapper
@@ -826,7 +826,14 @@ class Container(AbstractContainer):
out += '\n' + indent + right_br
return out
- def set_data_io(self, dataset_name: str, data_io_class: Type[DataIO], data_io_kwargs: dict = None, **kwargs):
+ def set_data_io(
+ self,
+ dataset_name: str,
+ data_io_class: Type[DataIO],
+ data_io_kwargs: dict = None,
+ data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None,
+ data_chunk_iterator_kwargs: dict = None, **kwargs
+ ):
"""
Apply DataIO object to a dataset field of the Container.
@@ -838,9 +845,18 @@ class Container(AbstractContainer):
Class to use for DataIO, e.g. H5DataIO or ZarrDataIO
data_io_kwargs: dict
keyword arguments passed to the constructor of the DataIO class.
+ data_chunk_iterator_class: Type[AbstractDataChunkIterator]
+ Class to use for DataChunkIterator. If None, no DataChunkIterator is used.
+ data_chunk_iterator_kwargs: dict
+ keyword arguments passed to the constructor of the DataChunkIterator class.
**kwargs:
DEPRECATED. Use data_io_kwargs instead.
kwargs are passed to the constructor of the DataIO class.
+
+ Notes
+ -----
+ If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in
+ the DataIO. This allows for rewriting the backend configuration of hdf5 datasets.
"""
if kwargs or (data_io_kwargs is None):
warn(
@@ -851,8 +867,11 @@ class Container(AbstractContainer):
)
data_io_kwargs = kwargs
data = self.fields.get(dataset_name)
+ data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict()
if data is None:
raise ValueError(f"{dataset_name} is None and cannot be wrapped in a DataIO class")
+ if data_chunk_iterator_class is not None:
+ data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs)
self.fields[dataset_name] = data_io_class(data=data, **data_io_kwargs)
@@ -896,7 +915,13 @@ class Data(AbstractContainer):
dataio.data = self.__data
self.__data = dataio
- def set_data_io(self, data_io_class: Type[DataIO], data_io_kwargs: dict) -> None:
+ def set_data_io(
+ self,
+ data_io_class: Type[DataIO],
+ data_io_kwargs: dict,
+ data_chunk_iterator_class: Optional[Type[AbstractDataChunkIterator]] = None,
+ data_chunk_iterator_kwargs: dict = None,
+ ) -> None:
"""
Apply DataIO object to the data held by this Data object.
@@ -906,8 +931,21 @@ class Data(AbstractContainer):
The DataIO to apply to the data held by this Data.
data_io_kwargs: dict
The keyword arguments to pass to the DataIO.
+ data_chunk_iterator_class: Type[AbstractDataChunkIterator]
+ The DataChunkIterator to use for the DataIO. If None, no DataChunkIterator is used.
+ data_chunk_iterator_kwargs: dict
+ The keyword arguments to pass to the DataChunkIterator.
+
+ Notes
+ -----
+ If data_chunk_iterator_class is not None, the data is wrapped in the DataChunkIterator before being wrapped in
+ the DataIO. This allows for rewriting the backend configuration of hdf5 datasets.
"""
- self.__data = data_io_class(data=self.__data, **data_io_kwargs)
+ data_chunk_iterator_kwargs = data_chunk_iterator_kwargs or dict()
+ data = self.__data
+ if data_chunk_iterator_class is not None:
+ data = data_chunk_iterator_class(data=data, **data_chunk_iterator_kwargs)
+ self.__data = data_io_class(data=data, **data_io_kwargs)
@docval({'name': 'func', 'type': types.FunctionType, 'doc': 'a function to transform *data*'})
def transform(self, **kwargs):
=====================================
src/hdmf/utils.py
=====================================
@@ -1140,7 +1140,7 @@ class LabelledDict(dict):
@docval_macro('array_data')
class StrDataset(h5py.Dataset):
- """Wrapper to decode strings on reading the dataset"""
+ """Wrapper to decode strings on reading the dataset. Use only for h5py 3+."""
def __init__(self, dset, encoding, errors='strict'):
self.dset = dset
if encoding is None:
=====================================
src/hdmf/validate/validator.py
=====================================
@@ -147,7 +147,7 @@ def get_type(data, builder_dtype=None):
# Case for h5py.Dataset and other I/O specific array types
else:
# Compound dtype
- if builder_dtype and len(builder_dtype) > 1:
+ if builder_dtype and isinstance(builder_dtype, list):
dtypes = []
string_formats = []
for i in range(len(builder_dtype)):
@@ -441,7 +441,7 @@ class DatasetValidator(BaseStorageValidator):
except EmptyArrayError:
# do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
pass
- if builder.dtype is not None and len(builder.dtype) > 1 and len(np.shape(builder.data)) == 0:
+ if isinstance(builder.dtype, list) and len(np.shape(builder.data)) == 0:
shape = () # scalar compound dataset
elif isinstance(builder.dtype, list):
shape = (len(builder.data), ) # only 1D datasets with compound types are supported
=====================================
tests/unit/build_tests/test_io_map.py
=====================================
@@ -1,4 +1,4 @@
-from hdmf.utils import docval, getargs
+from hdmf.utils import StrDataset, docval, getargs
from hdmf import Container, Data
from hdmf.backends.hdf5 import H5DataIO
from hdmf.build import (GroupBuilder, DatasetBuilder, ObjectMapper, BuildManager, TypeMap, LinkBuilder,
@@ -7,12 +7,15 @@ from hdmf.build import (GroupBuilder, DatasetBuilder, ObjectMapper, BuildManager
from hdmf.spec import (GroupSpec, AttributeSpec, DatasetSpec, SpecCatalog, SpecNamespace, NamespaceCatalog, RefSpec,
LinkSpec)
from hdmf.testing import TestCase
+import h5py
from abc import ABCMeta, abstractmethod
import unittest
import numpy as np
from tests.unit.helpers.utils import CORE_NAMESPACE, create_test_type_map
+H5PY_3 = h5py.__version__.startswith('3')
+
class Bar(Container):
@@ -460,6 +463,132 @@ class TestMapStrings(TestCase):
np.testing.assert_array_equal(builder.get('data').data, str_array_3d)
np.testing.assert_array_equal(builder.get('attr_array'), str_array_3d)
+ @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+")
+ def test_build_1d_h5py_3_dataset(self):
+ bar_spec = GroupSpec(
+ doc='A test group specification with a data type',
+ data_type_def='Bar',
+ datasets=[
+ DatasetSpec(
+ doc='an example dataset',
+ dtype='text',
+ name='data',
+ shape=(None, ),
+ attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+ )
+ ],
+ attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+ shape=(None, ))],
+ )
+ type_map = self.customSetUp(bar_spec)
+ type_map.register_map(Bar, BarMapper)
+ # create in-memory hdf5 file that is discarded after closing
+ with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+ str_array_1d = np.array(
+ ['aa', 'bb', 'cc', 'dd'],
+ dtype=h5py.special_dtype(vlen=str)
+ )
+ # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+
+ dataset = StrDataset(f.create_dataset('data', data=str_array_1d), None)
+ bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+ builder = type_map.build(bar_inst)
+ np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+ np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
+ @unittest.skipIf(not H5PY_3, "Use StrDataset only for h5py 3+")
+ def test_build_3d_h5py_3_dataset(self):
+ bar_spec = GroupSpec(
+ doc='A test group specification with a data type',
+ data_type_def='Bar',
+ datasets=[
+ DatasetSpec(
+ doc='an example dataset',
+ dtype='text',
+ name='data',
+ shape=(None, None, None),
+ attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+ )
+ ],
+ attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+ shape=(None, None, None))],
+ )
+ type_map = self.customSetUp(bar_spec)
+ type_map.register_map(Bar, BarMapper)
+ # create in-memory hdf5 file that is discarded after closing
+ with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+ str_array_3d = np.array(
+ [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]],
+ dtype=h5py.special_dtype(vlen=str)
+ )
+ # wrap the dataset in a StrDataset to mimic how HDF5IO would read this dataset with h5py 3+
+ dataset = StrDataset(f.create_dataset('data', data=str_array_3d), None)
+ bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+ builder = type_map.build(bar_inst)
+ np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+ np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
+ @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3")
+ def test_build_1d_h5py_2_dataset(self):
+ bar_spec = GroupSpec(
+ doc='A test group specification with a data type',
+ data_type_def='Bar',
+ datasets=[
+ DatasetSpec(
+ doc='an example dataset',
+ dtype='text',
+ name='data',
+ shape=(None, ),
+ attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+ )
+ ],
+ attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+ shape=(None, ))],
+ )
+ type_map = self.customSetUp(bar_spec)
+ type_map.register_map(Bar, BarMapper)
+ # create in-memory hdf5 file that is discarded after closing
+ with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+ str_array_1d = np.array(
+ ['aa', 'bb', 'cc', 'dd'],
+ dtype=h5py.special_dtype(vlen=str)
+ )
+ dataset = f.create_dataset('data', data=str_array_1d)
+ bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+ builder = type_map.build(bar_inst)
+ np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+ np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
+ @unittest.skipIf(H5PY_3, "Create dataset differently for h5py < 3")
+ def test_build_3d_h5py_2_dataset(self):
+ bar_spec = GroupSpec(
+ doc='A test group specification with a data type',
+ data_type_def='Bar',
+ datasets=[
+ DatasetSpec(
+ doc='an example dataset',
+ dtype='text',
+ name='data',
+ shape=(None, None, None),
+ attributes=[AttributeSpec(name='attr2', doc='an example integer attribute', dtype='int')],
+ )
+ ],
+ attributes=[AttributeSpec(name='attr_array', doc='an example array attribute', dtype='text',
+ shape=(None, None, None))],
+ )
+ type_map = self.customSetUp(bar_spec)
+ type_map.register_map(Bar, BarMapper)
+ # create in-memory hdf5 file that is discarded after closing
+ with h5py.File("test.h5", "w", driver="core", backing_store=False) as f:
+ str_array_3d = np.array(
+ [[['aa', 'bb'], ['cc', 'dd']], [['ee', 'ff'], ['gg', 'hh']]],
+ dtype=h5py.special_dtype(vlen=str)
+ )
+ dataset = f.create_dataset('data', data=str_array_3d)
+ bar_inst = Bar('my_bar', dataset, 'value1', 10, attr_array=dataset)
+ builder = type_map.build(bar_inst)
+ np.testing.assert_array_equal(builder.get('data').data, dataset[:])
+ np.testing.assert_array_equal(builder.get('attr_array'), dataset[:])
+
def test_build_dataio(self):
bar_spec = GroupSpec('A test group specification with a data type',
data_type_def='Bar',
=====================================
tests/unit/test_io_hdf5_h5tools.py
=====================================
@@ -3801,6 +3801,11 @@ class TestContainerSetDataIO(TestCase):
self.data2 = kwargs["data2"]
self.obj = ContainerWithData("name", [1, 2, 3, 4, 5], None)
+ self.file_path = get_temp_filepath()
+
+ def tearDown(self):
+ if os.path.exists(self.file_path):
+ os.remove(self.file_path)
def test_set_data_io(self):
self.obj.set_data_io("data1", H5DataIO, data_io_kwargs=dict(chunks=True))
@@ -3823,6 +3828,31 @@ class TestContainerSetDataIO(TestCase):
self.assertIsInstance(self.obj.data1, H5DataIO)
self.assertTrue(self.obj.data1.io_settings["chunks"])
+ def test_set_data_io_h5py_dataset(self):
+ file = File(self.file_path, 'w')
+ data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,))
+ class ContainerWithData(Container):
+ __fields__ = ('data',)
+
+ @docval(
+ {"name": "name", "doc": "name", "type": str},
+ {'name': 'data', 'doc': 'field1 doc', 'type': h5py.Dataset},
+ )
+ def __init__(self, **kwargs):
+ super().__init__(name=kwargs["name"])
+ self.data = kwargs["data"]
+
+ container = ContainerWithData("name", data)
+ container.set_data_io(
+ "data",
+ H5DataIO,
+ data_io_kwargs=dict(chunks=(2,)),
+ data_chunk_iterator_class=DataChunkIterator,
+ )
+
+ self.assertIsInstance(container.data, H5DataIO)
+ self.assertEqual(container.data.io_settings["chunks"], (2,))
+ file.close()
class TestDataSetDataIO(TestCase):
@@ -3831,8 +3861,30 @@ class TestDataSetDataIO(TestCase):
pass
self.data = MyData("my_data", [1, 2, 3])
+ self.file_path = get_temp_filepath()
+
+ def tearDown(self):
+ if os.path.exists(self.file_path):
+ os.remove(self.file_path)
def test_set_data_io(self):
self.data.set_data_io(H5DataIO, dict(chunks=True))
assert isinstance(self.data.data, H5DataIO)
assert self.data.data.io_settings["chunks"]
+
+ def test_set_data_io_h5py_dataset(self):
+ file = File(self.file_path, 'w')
+ data = file.create_dataset('data', data=[1, 2, 3, 4, 5], chunks=(3,))
+ class MyData(Data):
+ pass
+
+ my_data = MyData("my_data", data)
+ my_data.set_data_io(
+ H5DataIO,
+ data_io_kwargs=dict(chunks=(2,)),
+ data_chunk_iterator_class=DataChunkIterator,
+ )
+
+ self.assertIsInstance(my_data.data, H5DataIO)
+ self.assertEqual(my_data.data.io_settings["chunks"], (2,))
+ file.close()
View it on GitLab: https://salsa.debian.org/med-team/hdmf/-/compare/fa968f39a0e1a02f44d46800b3923be4e11b8261...0536be3b8bce2a1559c0dad3059fe1ba8a6d72a7
--
View it on GitLab: https://salsa.debian.org/med-team/hdmf/-/compare/fa968f39a0e1a02f44d46800b3923be4e11b8261...0536be3b8bce2a1559c0dad3059fe1ba8a6d72a7
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241102/66a1be42/attachment-0001.htm>
More information about the debian-med-commit
mailing list