[med-svn] [Git][med-team/hdmf][master] 3 commits: New upstream version 3.1.1
Nilesh Patra (@nilesh)
gitlab at salsa.debian.org
Thu Aug 5 15:15:15 BST 2021
Nilesh Patra pushed to branch master at Debian Med / hdmf
Commits:
854d86ba by Nilesh Patra at 2021-08-05T14:14:28+00:00
New upstream version 3.1.1
- - - - -
7fe65d19 by Nilesh Patra at 2021-08-05T14:14:29+00:00
Update manpage
- - - - -
11cc62d7 by Nilesh Patra at 2021-08-05T14:14:30+00:00
Interim changelog entry
- - - - -
19 changed files:
- PKG-INFO
- debian/changelog
- debian/validate_hdmf_spec.1
- requirements-dev.txt
- requirements.txt
- setup.cfg
- setup.py
- src/hdmf.egg-info/PKG-INFO
- src/hdmf.egg-info/SOURCES.txt
- src/hdmf/_version.py
- src/hdmf/common/alignedtable.py
- + src/hdmf/common/hierarchicaltable.py
- src/hdmf/common/table.py
- src/hdmf/container.py
- src/hdmf/data_utils.py
- src/hdmf/utils.py
- tests/unit/common/test_alignedtable.py
- + tests/unit/common/test_linkedtables.py
- tests/unit/common/test_table.py
Changes:
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: hdmf
-Version: 3.0.1
+Version: 3.1.1
Summary: A package for standardizing hierarchical object data
Home-page: https://github.com/hdmf-dev/hdmf
Author: Andrew Tritt
@@ -20,6 +20,7 @@ Classifier: Operating System :: Microsoft :: Windows
Classifier: Operating System :: MacOS
Classifier: Operating System :: Unix
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
+Requires-Python: >=3.7
Description-Content-Type: text/x-rst; charset=UTF-8
========================================
=====================================
debian/changelog
=====================================
@@ -1,4 +1,4 @@
-hdmf (3.0.1-1) UNRELEASED; urgency=medium
+hdmf (3.1.1-1) UNRELEASED; urgency=medium
* Team Upload.
* Fix copyright
@@ -6,8 +6,9 @@ hdmf (3.0.1-1) UNRELEASED; urgency=medium
* Update manpage
* d/tests: Add non-superficial autopkgtests
use same build time tests in autopkgtests
+ * New upstream version 3.1.1
- -- Nilesh Patra <nilesh at debian.org> Fri, 09 Jul 2021 22:29:50 +0530
+ -- Nilesh Patra <nilesh at debian.org> Thu, 05 Aug 2021 19:37:28 +0530
hdmf (2.3.0-1) unstable; urgency=medium
=====================================
debian/validate_hdmf_spec.1
=====================================
@@ -1,5 +1,5 @@
.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.16.
-.TH VALIDATE_HDMF_SPEC "1" "July 2021" "validate_hdmf_spec 3.0.1" "User Commands"
+.TH VALIDATE_HDMF_SPEC "1" "August 2021" "validate_hdmf_spec 3.1.1" "User Commands"
.SH NAME
validate_hdmf_spec \- Hierarchical Data Modeling Framework
.SH DESCRIPTION
=====================================
requirements-dev.txt
=====================================
@@ -6,5 +6,5 @@ flake8==3.9.2
flake8-debugger==4.0.0
flake8-print==4.0.0
importlib-metadata==4.6.1
-python-dateutil==2.8.1
-tox==3.23.1
+python-dateutil==2.8.2
+tox==3.24.0
=====================================
requirements.txt
=====================================
@@ -1,8 +1,8 @@
# pinned dependencies to reproduce an entire development environment to use HDMF
h5py==3.3.0
-numpy==1.21.0
+numpy==1.21.1
scipy==1.7.0
-pandas==1.3.0
+pandas==1.3.1
ruamel.yaml==0.17.10
jsonschema==3.2.0
-setuptools==57.1.0
+setuptools==57.4.0
=====================================
setup.cfg
=====================================
@@ -1,6 +1,3 @@
-[bdist_wheel]
-universal = 1
-
[versioneer]
vcs = git
versionfile_source = src/hdmf/_version.py
=====================================
setup.py
=====================================
@@ -38,6 +38,7 @@ setup_args = {
'packages': pkgs,
'package_dir': {'': 'src'},
'package_data': {'hdmf': ["%s/*.yaml" % schema_dir, "%s/*.json" % schema_dir]},
+ 'python_requires': '>=3.7',
'classifiers': [
"Programming Language :: Python",
"Programming Language :: Python :: 3.7",
=====================================
src/hdmf.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: hdmf
-Version: 3.0.1
+Version: 3.1.1
Summary: A package for standardizing hierarchical object data
Home-page: https://github.com/hdmf-dev/hdmf
Author: Andrew Tritt
@@ -20,6 +20,7 @@ Classifier: Operating System :: Microsoft :: Windows
Classifier: Operating System :: MacOS
Classifier: Operating System :: Unix
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
+Requires-Python: >=3.7
Description-Content-Type: text/x-rst; charset=UTF-8
========================================
=====================================
src/hdmf.egg-info/SOURCES.txt
=====================================
@@ -44,6 +44,7 @@ src/hdmf/build/objectmapper.py
src/hdmf/build/warnings.py
src/hdmf/common/__init__.py
src/hdmf/common/alignedtable.py
+src/hdmf/common/hierarchicaltable.py
src/hdmf/common/multi.py
src/hdmf/common/resources.py
src/hdmf/common/sparse.py
@@ -99,6 +100,7 @@ tests/unit/common/test_alignedtable.py
tests/unit/common/test_common.py
tests/unit/common/test_common_io.py
tests/unit/common/test_generate_table.py
+tests/unit/common/test_linkedtables.py
tests/unit/common/test_multi.py
tests/unit/common/test_resources.py
tests/unit/common/test_sparse.py
=====================================
src/hdmf/_version.py
=====================================
@@ -8,11 +8,11 @@ import json
version_json = '''
{
- "date": "2021-07-07T09:42:02-0700",
+ "date": "2021-07-29T16:55:01-0700",
"dirty": false,
"error": null,
- "full-revisionid": "935d9838bb4268768e9eaab2e56f7d5c936ef1f4",
- "version": "3.0.1"
+ "full-revisionid": "df31c59aa396a9920077eb3970d966e9d0f7a75b",
+ "version": "3.1.1"
}
''' # END VERSION_JSON
=====================================
src/hdmf/common/alignedtable.py
=====================================
@@ -20,6 +20,10 @@ class AlignedDynamicTable(DynamicTable):
defines a 2-level table in which the main data is stored in the main table implemented by this type
and additional columns of the table are grouped into categories, with each category being'
represented by a separate DynamicTable stored within the group.
+
+ NOTE: To remain compatible with DynamicTable, the attribute colnames represents only the
+ columns of the main table (not including the category tables). To get the full list of
+ column names, use the get_colnames() function instead.
"""
__fields__ = ({'name': 'category_tables', 'child': True}, )
@@ -209,6 +213,28 @@ class AlignedDynamicTable(DynamicTable):
for category, values in category_data.items():
self.category_tables[category].add_row(**values)
+ @docval({'name': 'include_category_tables', 'type': bool,
+ 'doc': "Ignore sub-category tables and just look at the main table", 'default': False},
+ {'name': 'ignore_category_ids', 'type': bool,
+ 'doc': "Ignore id columns of sub-category tables", 'default': False})
+ def get_colnames(self, **kwargs):
+ """Get the full list of names of columns for this table
+
+ :returns: List of tuples (str, str) where the first string is the name of the DynamicTable
+ that contains the column and the second string is the name of the column. If
+ include_category_tables is False, then a list of column names is returned.
+ """
+ if not getargs('include_category_tables', kwargs):
+ return self.colnames
+ else:
+ ignore_category_ids = getargs('ignore_category_ids', kwargs)
+ columns = [(self.name, c) for c in self.colnames]
+ for category in self.category_tables.values():
+ if not ignore_category_ids:
+ columns += [(category.name, 'id'), ]
+ columns += [(category.name, c) for c in category.colnames]
+ return columns
+
@docval({'name': 'ignore_category_ids', 'type': bool,
'doc': "Ignore id columns of sub-category tables", 'default': False})
def to_dataframe(self, **kwargs):
@@ -225,21 +251,62 @@ class AlignedDynamicTable(DynamicTable):
def __getitem__(self, item):
"""
- :param item: Selection defining the items of interest. This may be a
+ Called to implement standard array slicing syntax.
- * **int, list, array, slice** : Return one or multiple row of the table as a DataFrame
- * **string** : Return a single category table as a DynamicTable or a single column of the
- primary table as a
- * **tuple**: Get a column, row, or cell from a particular category. The tuple is expected to consist
- of (category, selection) where category may be a string with the name of the sub-category
- or None (or the name of this AlignedDynamicTable) if we want to slice into the main table.
+ Same as ``self.get(item)``. See :py:meth:`~hdmf.common.alignedtable.AlignedDynamicTable.get` for details.
+ """
+ return self.get(item)
- :returns: DataFrame when retrieving a row or category. Returns scalar when selecting a cell.
- Returns a VectorData/VectorIndex when retrieving a single column.
+ def get(self, item, **kwargs):
+ """
+ Access elements (rows, columns, category tables etc.) from the table. Instead of calling
+ this function directly, the class also implements standard array slicing syntax
+ via :py:meth:`~hdmf.common.alignedtable.AlignedDynamicTable.__getitem__`
+ (which calls this function). For example, instead of calling
+ ``self.get(item=slice(2,5))`` we may use the often more convenient form of ``self[2:5]`` instead.
+
+ :param item: Selection defining the items of interest. This may be either a:
+
+ * **int, list, array, slice** : Return one or multiple row of the table as a pandas.DataFrame. For example:
+ * ``self[0]`` : Select the first row of the table
+ * ``self[[0,3]]`` : Select the first and fourth row of the table
+ * ``self[1:4]`` : Select the rows with index 1,2,3 from the table
+
+ * **string** : Return a column from the main table or a category table. For example:
+ * ``self['column']`` : Return the column from the main table.
+ * ``self['my_category']`` : Returns a DataFrame of the ``my_category`` category table.
+ This is a shorthand for ``self.get_category('my_category').to_dataframe()``.
+
+ * **tuple**: Get a column, row, or cell from a particular category table.
+ The tuple is expected to consist of the following elements:
+
+ * ``category``: string with the name of the category. To select from the main
+ table use ``self.name`` or ``None``.
+ * ``column``: string with the name of the column, and
+ * ``row``: integer index of the row.
+
+ The tuple itself then may take the following forms:
+
+ * Select a single column from a table via:
+ * ``self[category, column]``
+ * Select a single full row of a given category table via:
+ * ``self[row, category]`` (recommended, for consistency with DynamicTable)
+ * ``self[category, row]``
+ * Select a single cell via:
+ * ``self[row, (category, column)]`` (recommended, for consistency with DynamicTable)
+ * ``self[row, category, column]``
+ * ``self[category, column, row]``
+
+ :returns: Depending on the type of selection the function returns a:
+
+ * **pandas.DataFrame**: when retrieving a row or category table
+ * **array** : when retrieving a single column
+ * **single value** : when retrieving a single cell. The data type and shape will depend on the
+ data type and shape of the cell/column.
"""
if isinstance(item, (int, list, np.ndarray, slice)):
# get a single full row from all tables
- dfs = ([super().__getitem__(item).reset_index(), ] +
+ dfs = ([super().get(item, **kwargs).reset_index(), ] +
[category[item].reset_index() for category in self.category_tables.values()])
names = [self.name, ] + list(self.category_tables.keys())
res = pd.concat(dfs, axis=1, keys=names)
@@ -248,14 +315,101 @@ class AlignedDynamicTable(DynamicTable):
elif isinstance(item, str) or item is None:
if item in self.colnames:
# get a specific column
- return super().__getitem__(item)
+ return super().get(item, **kwargs)
else:
# get a single category
return self.get_category(item).to_dataframe()
elif isinstance(item, tuple):
if len(item) == 2:
- return self.get_category(item[0])[item[1]]
+ # DynamicTable allows selection of cells via the syntax [int, str], i.e,. [row_index, columnname]
+ # We support this syntax here as well with the additional caveat that in AlignedDynamicTable
+ # columns are identified by tuples of strings. As such [int, str] refers not to a cell but
+ # a single row in a particular category table (i.e., [row_index, category]). To select a cell
+ # the second part of the item then is a tuple of strings, i.e., [row_index, (category, column)]
+ if isinstance(item[0], (int, np.integer)):
+ # Select a single cell or row of a sub-table based on row-index(item[0])
+ # and the category (if item[1] is a string) or column (if item[1] is a tuple of (category, column)
+ re = self[item[0]][item[1]]
+ # re is a pandas.Series or pandas.Dataframe. If we selected a single cell
+ # (i.e., item[2] was a tuple defining a particular column) then return the value of the cell
+ if re.size == 1:
+ re = re.values[0]
+ # If we selected a single cell from a ragged column then we need to change the list to a tuple
+ if isinstance(re, list):
+ re = tuple(re)
+ # We selected a row of a whole table (i.e., item[2] identified only the category table,
+ # but not a particular column).
+ # Change the result from a pandas.Series to a pandas.DataFrame for consistency with DynamicTable
+ if isinstance(re, pd.Series):
+ re = re.to_frame()
+ return re
+ else:
+ return self.get_category(item[0])[item[1]]
elif len(item) == 3:
- return self.get_category(item[0])[item[1]][item[2]]
+ if isinstance(item[0], (int, np.integer)):
+ return self.get_category(item[1])[item[2]][item[0]]
+ else:
+ return self.get_category(item[0])[item[1]][item[2]]
else:
- raise ValueError("Expected tuple of length 2 or 3 with (category, column, row) as value.")
+ raise ValueError("Expected tuple of length 2 of the form [category, column], [row, category], "
+ "[row, (category, column)] or a tuple of length 3 of the form "
+ "[category, column, row], [row, category, column]")
+
+ @docval({'name': 'ignore_category_tables', 'type': bool,
+ 'doc': "Ignore the category tables and only check in the main table columns", 'default': False},
+ allow_extra=False)
+ def has_foreign_columns(self, **kwargs):
+ """
+ Does the table contain DynamicTableRegion columns
+
+ :returns: True if the table or any of the category tables contains a DynamicTableRegion column, else False
+ """
+ ignore_category_tables = getargs('ignore_category_tables', kwargs)
+ if super().has_foreign_columns():
+ return True
+ if not ignore_category_tables:
+ for table in self.category_tables.values():
+ if table.has_foreign_columns():
+ return True
+ return False
+
+ @docval({'name': 'ignore_category_tables', 'type': bool,
+ 'doc': "Ignore the category tables and only check in the main table columns", 'default': False},
+ allow_extra=False)
+ def get_foreign_columns(self, **kwargs):
+ """
+ Determine the names of all columns that link to another DynamicTable, i.e.,
+ find all DynamicTableRegion type columns. Similar to a foreign key in a
+ database, a DynamicTableRegion column references elements in another table.
+
+ :returns: List of tuples (str, str) where the first string is the name of the
+ category table (or None if the column is in the main table) and the
+ second string is the column name.
+ """
+ ignore_category_tables = getargs('ignore_category_tables', kwargs)
+ col_names = [(None, col_name) for col_name in super().get_foreign_columns()]
+ if not ignore_category_tables:
+ for table in self.category_tables.values():
+ col_names += [(table.name, col_name) for col_name in table.get_foreign_columns()]
+ return col_names
+
+ @docval(*get_docval(DynamicTable.get_linked_tables),
+ {'name': 'ignore_category_tables', 'type': bool,
+ 'doc': "Ignore the category tables and only check in the main table columns", 'default': False},
+ allow_extra=False)
+ def get_linked_tables(self, **kwargs):
+ """
+ Get a list of the full list of all tables that are being linked to directly or indirectly
+ from this table via foreign DynamicTableColumns included in this table or in any table that
+ can be reached through DynamicTableRegion columns
+
+
+ Returns: List of dicts with the following keys:
+ * 'source_table' : The source table containing the DynamicTableRegion column
+ * 'source_column' : The relevant DynamicTableRegion column in the 'source_table'
+ * 'target_table' : The target DynamicTable; same as source_column.table.
+
+ """
+ ignore_category_tables = getargs('ignore_category_tables', kwargs)
+ other_tables = None if ignore_category_tables else list(self.category_tables.values())
+ return super().get_linked_tables(other_tables=other_tables)
=====================================
src/hdmf/common/hierarchicaltable.py
=====================================
@@ -0,0 +1,248 @@
+"""
+Module providing additional functionality for dealing with hierarchically nested tables, i.e.,
+tables containing DynamicTableRegion references.
+"""
+import pandas as pd
+import numpy as np
+from hdmf.common.table import DynamicTable, DynamicTableRegion, VectorIndex
+from hdmf.common.alignedtable import AlignedDynamicTable
+from hdmf.utils import docval, getargs
+
+
+ at docval({'name': 'dynamic_table', 'type': DynamicTable,
+ 'doc': 'DynamicTable object to be converted to a hierarchical pandas.Dataframe'},
+ returns="Hierarchical pandas.DataFrame with usually a pandas.MultiIndex on both the index and columns.",
+ rtype='pandas.DataFrame',
+ is_method=False)
+def to_hierarchical_dataframe(dynamic_table):
+ """
+ Create a hierarchical pandas.DataFrame that represents all data from a collection of linked DynamicTables.
+
+ **LIMITATIONS:** Currently this function only supports DynamicTables with a single DynamicTableRegion column.
+ If a table has more than one DynamicTableRegion column then the function will expand only the
+ first DynamicTableRegion column found for each table. Any additional DynamicTableRegion columns will remain
+ nested.
+
+ **NOTE:** Some useful functions for further processing of the generated
+ DataFrame include:
+
+ * pandas.DataFrame.reset_index to turn the data from the pandas.MultiIndex into columns
+ * :py:meth:`~hdmf.common.hierarchicaltable.drop_id_columns` to remove all 'id' columns
+ * :py:meth:`~hdmf.common.hierarchicaltable.flatten_column_index` to flatten the column index
+ """
+ # TODO: Need to deal with the case where we have more than one DynamicTableRegion column in a given table
+ # Get the references column
+ foreign_columns = dynamic_table.get_foreign_columns()
+ # if table does not contain any DynamicTableRegion columns then we can just convert it to a dataframe
+ if len(foreign_columns) == 0:
+ return dynamic_table.to_dataframe()
+ hcol_name = foreign_columns[0] # We only denormalize the first foreign column for now
+ hcol = dynamic_table[hcol_name] # Either a VectorIndex pointing to a DynamicTableRegion or a DynamicTableRegion
+ # Get the target DynamicTable that hcol is pointing to. If hcol is a VectorIndex then we first need
+ # to get the target of it before we look up the table.
+ hcol_target = hcol.table if isinstance(hcol, DynamicTableRegion) else hcol.target.table
+
+ # Create the data variables we need to collect the data for our output dataframe and associated index
+ index = []
+ data = []
+ columns = None
+ index_names = None
+
+ # First we here get a list of DataFrames, one for each row of the column we need to process.
+ # If hcol is a VectorIndex (i.e., our column is a ragged array of row indices), then simply loading
+ # the data from the VectorIndex will do the trick. If we have a regular DynamicTableRegion column,
+ # then we need to load the elements ourselves (using slice syntax to make sure we get DataFrames)
+ # one-row-at-a-time
+ if isinstance(hcol, VectorIndex):
+ rows = hcol.get(slice(None), index=False, df=True)
+ else:
+ rows = [hcol[i:(i+1)] for i in range(len(hcol))]
+ # Retrieve the columns we need to iterate over from our input table. For AlignedDynamicTable we need to
+ # use the get_colnames function instead of the colnames property to ensure we get all columns not just
+ # the columns from the main table
+ dynamic_table_colnames = (dynamic_table.get_colnames(include_category_tables=True, ignore_category_ids=False)
+ if isinstance(dynamic_table, AlignedDynamicTable)
+ else dynamic_table.colnames)
+
+ # Case 1: Our DynamicTableRegion column points to a DynamicTable that itself does not contain
+ # any DynamicTableRegion references (i.e., we have reached the end of our table hierarchy).
+ # If this is the case than we need to de-normalize the data and flatten the hierarchy
+ if not hcol_target.has_foreign_columns():
+ # Iterate over all rows, where each row is described by a DataFrame with one-or-more rows
+ for row_index, row_df in enumerate(rows):
+ # Since each row contains a pandas.DataFrame (with possible multiple rows), we
+ # next need to iterate over all rows in that table to denormalize our data
+ for row in row_df.itertuples(index=True):
+ # Determine the column data for our row. Each selected row from our target table
+ # becomes a row in our flattened table
+ data.append(row)
+ # Determine the multi-index tuple for our row, consisting of: i) id of the row in this
+ # table, ii) all columns (except the hierarchical column we are flattening), and
+ # iii) the index (i.e., id) from our target row
+ index_data = ([dynamic_table.id[row_index], ] +
+ [dynamic_table[row_index, colname]
+ for colname in dynamic_table_colnames if colname != hcol_name])
+ index.append(tuple(index_data))
+
+ # Determine the names for our index and columns of our output table
+ # We need to do this even if our table was empty (i.e. even is len(rows)==0)
+ # NOTE: While for a regular DynamicTable the "colnames" property will give us the full list of column names,
+ # for AlignedDynamicTable we need to use the get_colnames() function instead to make sure we include
+ # the category table columns as well.
+ index_names = ([(dynamic_table.name, 'id')] +
+ [(dynamic_table.name, colname)
+ for colname in dynamic_table_colnames if colname != hcol_name])
+ # Determine the name of our columns
+ hcol_iter_columns = (hcol_target.get_colnames(include_category_tables=True, ignore_category_ids=False)
+ if isinstance(hcol_target, AlignedDynamicTable)
+ else hcol_target.colnames)
+ columns = pd.MultiIndex.from_tuples([(hcol_target.name, 'id'), ] +
+ [(hcol_target.name, c) for c in hcol_iter_columns],
+ names=('source_table', 'label'))
+
+ # Case 2: Our DynamicTableRegion columns points to another table with a DynamicTableRegion, i.e.,
+ # we need to recursively resolve more levels of the table hieararchy
+ else:
+ # First we need to recursively flatten the hierarchy by calling 'to_hierarchical_dataframe()'
+ # (i.e., this function) on the target of our hierarchical column
+ hcol_hdf = to_hierarchical_dataframe(hcol_target)
+ # Iterate over all rows, where each row is described by a DataFrame with one-or-more rows
+ for row_index, row_df_level1 in enumerate(rows):
+ # Since each row contains a pandas.DataFrame (with possible multiple rows), we
+ # next need to iterate over all rows in that table to denormalize our data
+ for row_df_level2 in row_df_level1.itertuples(index=True):
+ # Since our target is itself a a DynamicTable with a DynamicTableRegion columns,
+ # each target row itself may expand into multiple rows in the flattened hcol_hdf.
+ # So we now need to look up the rows in hcol_hdf that correspond to the rows in
+ # row_df_level2.
+ # NOTE: In this look-up we assume that the ids (and hence the index) of
+ # each row in the table are in fact unique.
+ for row_tuple_level3 in hcol_hdf.loc[[row_df_level2[0]]].itertuples(index=True):
+ # Determine the column data for our row.
+ data.append(row_tuple_level3[1:])
+ # Determine the multi-index tuple for our row,
+ index_data = ([dynamic_table.id[row_index], ] +
+ [dynamic_table[row_index, colname]
+ for colname in dynamic_table_colnames if colname != hcol_name] +
+ list(row_tuple_level3[0]))
+ index.append(tuple(index_data))
+ # Determine the names for our index and columns of our output table
+ # We need to do this even if our table was empty (i.e. even is len(rows)==0)
+ index_names = ([(dynamic_table.name, "id")] +
+ [(dynamic_table.name, colname)
+ for colname in dynamic_table_colnames if colname != hcol_name] +
+ hcol_hdf.index.names)
+ columns = hcol_hdf.columns
+
+ # Construct the pandas dataframe with the hierarchical multi-index
+ multi_index = pd.MultiIndex.from_tuples(index, names=index_names)
+ out_df = pd.DataFrame(data=data, index=multi_index, columns=columns)
+ return out_df
+
+
+def __get_col_name(col):
+ """
+ Internal helper function to get the actual name of a pandas DataFrame column from a
+ column name that may consists of an arbitrary sequence of tuples. The function
+ will return the last value of the innermost tuple.
+ """
+ curr_val = col
+ while isinstance(curr_val, tuple):
+ curr_val = curr_val[-1]
+ return curr_val
+
+
+def __flatten_column_name(col):
+ """
+ Internal helper function used to iteratively flatten a nested tuple
+
+ :param col: Column name to flatten
+ :type col: Tuple or String
+
+ :returns: If col is a tuple then the result is a flat tuple otherwise col is returned as is
+ """
+ if isinstance(col, tuple):
+ re = col
+ while np.any([isinstance(v, tuple) for v in re]):
+ temp = []
+ for v in re:
+ if isinstance(v, tuple):
+ temp += list(v)
+ else:
+ temp += [v, ]
+ re = temp
+ return tuple(re)
+ else:
+ return col
+
+
+ at docval({'name': 'dataframe', 'type': pd.DataFrame,
+ 'doc': 'Pandas dataframe to update (usually generated by the to_hierarchical_dataframe function)'},
+ {'name': 'inplace', 'type': 'bool', 'doc': 'Update the dataframe inplace or return a modified copy',
+ 'default': False},
+ returns="pandas.DataFrame with the id columns removed",
+ rtype='pandas.DataFrame',
+ is_method=False)
+def drop_id_columns(**kwargs):
+ """
+ Drop all columns named 'id' from the table.
+
+ In case a column name is a tuple the function will drop any column for which
+ the inner-most name is 'id'. The 'id' columns of DynamicTable is in many cases
+ not necessary for analysis or display. This function allow us to easily filter
+ all those columns.
+
+ :raises TypeError: In case that dataframe parameter is not a pandas.Dataframe.
+ """
+ dataframe, inplace = getargs('dataframe', 'inplace', kwargs)
+ col_name = 'id'
+ drop_labels = []
+ for col in dataframe.columns:
+ if __get_col_name(col) == col_name:
+ drop_labels.append(col)
+ re = dataframe.drop(labels=drop_labels, axis=1, inplace=inplace)
+ return dataframe if inplace else re
+
+
+ at docval({'name': 'dataframe', 'type': pd.DataFrame,
+ 'doc': 'Pandas dataframe to update (usually generated by the to_hierarchical_dataframe function)'},
+ {'name': 'max_levels', 'type': (int, np.integer),
+ 'doc': 'Maximum number of levels to use in the resulting column Index. NOTE: When '
+ 'limiting the number of levels the function simply removes levels from the '
+ 'beginning. As such, removing levels may result in columns with duplicate names.'
+ 'Value must be >0.',
+ 'default': None},
+ {'name': 'inplace', 'type': 'bool', 'doc': 'Update the dataframe inplace or return a modified copy',
+ 'default': False},
+ returns="pandas.DataFrame with a regular pandas.Index columns rather and a pandas.MultiIndex",
+ rtype='pandas.DataFrame',
+ is_method=False)
+def flatten_column_index(**kwargs):
+ """
+ Flatten the column index of a pandas DataFrame.
+
+ The functions changes the dataframe.columns from a pandas.MultiIndex to a normal Index,
+ with each column usually being identified by a tuple of strings. This function is
+ typically used in conjunction with DataFrames generated
+ by :py:meth:`~hdmf.common.hierarchicaltable.to_hierarchical_dataframe`
+
+ :raises ValueError: In case the num_levels is not >0
+ :raises TypeError: In case that dataframe parameter is not a pandas.Dataframe.
+ """
+ dataframe, max_levels, inplace = getargs('dataframe', 'max_levels', 'inplace', kwargs)
+ if max_levels is not None and max_levels <= 0:
+ raise ValueError('max_levels must be greater than 0')
+ # Compute the new column names
+ col_names = [__flatten_column_name(col) for col in dataframe.columns.values]
+ # Apply the max_levels filter. Make sure to do this only for columns that are actually tuples
+ # in order not to accidentally shorten the actual string name of columns
+ if max_levels is None:
+ select_levels = slice(None)
+ elif max_levels == 1:
+ select_levels = -1
+ else: # max_levels > 1
+ select_levels = slice(-max_levels, None)
+ col_names = [col[select_levels] if isinstance(col, tuple) else col for col in col_names]
+ re = dataframe if inplace else dataframe.copy()
+ re.columns = col_names
+ return re
=====================================
src/hdmf/common/table.py
=====================================
@@ -5,6 +5,7 @@ the storage and use of dynamic data tables as part of the hdmf-common schema
import re
from collections import OrderedDict
+from typing import NamedTuple, Union
from warnings import warn
import numpy as np
@@ -47,9 +48,21 @@ class VectorData(Data):
self.append(val)
def get(self, key, **kwargs):
+ """
+ Retrieve elements from this VectorData
+
+ :param key: Selection of the elements
+ :param **kwargs: Ignored
+ """
return super().get(key)
def extend(self, ar, **kwargs):
+ """Add all elements of the iterable arg to the end of this VectorData.
+
+ Each subclass of VectorData should have its own extend method to ensure functionality and efficiency.
+
+ :param arg: The iterable to add to the end of this VectorData
+ """
#################################################################################
# Each subclass of VectorData should have its own extend method to ensure
# functionality AND efficiency of the extend operation. However, because currently
@@ -805,14 +818,14 @@ class DynamicTable(Container):
:param key: Key defining which elements of the table to select. This may be one of the following:
1) string with the name of the column to select
- 2) a tuple consisting of (str, int) where the string identifies the column to select by name
- and the int selects the row
+ 2) a tuple consisting of (int, str) where the int selects the row and the string identifies the
+ column to select by name
3) int, list of ints, array, or slice selecting a set of full rows in the table. If an int is used, then
scalars are returned for each column that has a single value. If a list, array, or slice is used and
df=False, then lists are returned for each column, even if the list, array, or slice resolves to a
single row.
- :return: 1) If key is a string, then return array with the data of the selected column
+ :return: 1) If key is a string, then return the VectorData object representing the column with the string name
2) If key is a tuple of (int, str), then return the scalar value of the selected cell
3) If key is an int, list, np.ndarray, or slice, then return pandas.DataFrame or lists
consisting of one or more rows
@@ -927,6 +940,7 @@ class DynamicTable(Container):
else: # scalar, don't wrap
df_input[k] = coldata[k]
ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index))
+ ret.name = self.name
return ret
def __get_selection_as_df(self, coldata):
@@ -951,6 +965,7 @@ class DynamicTable(Container):
else:
df_input[k] = coldata[k]
ret = pd.DataFrame(df_input, index=pd.Index(name=self.id.name, data=id_index))
+ ret.name = self.name
return ret
def __contains__(self, val):
@@ -959,6 +974,72 @@ class DynamicTable(Container):
"""
return val in self.__colids or val in self.__indices
+ def get_foreign_columns(self):
+ """
+ Determine the names of all columns that link to another DynamicTable, i.e.,
+ find all DynamicTableRegion type columns. Similar to a foreign key in a
+ database, a DynamicTableRegion column references elements in another table.
+
+ :returns: List of strings with the column names
+ """
+ col_names = []
+ for col_index, col in enumerate(self.columns):
+ if isinstance(col, DynamicTableRegion):
+ col_names.append(col.name)
+ return col_names
+
+ def has_foreign_columns(self):
+ """
+ Does the table contain DynamicTableRegion columns
+
+ :returns: True if the table contains a DynamicTableRegion column, else False
+ """
+ for col_index, col in enumerate(self.columns):
+ if isinstance(col, DynamicTableRegion):
+ return True
+ return False
+
+ @docval({'name': 'other_tables', 'type': (list, tuple, set),
+ 'doc': "List of additional tables to consider in the search. Usually this "
+ "parameter is used for internal purposes, e.g., when we need to "
+ "consider AlignedDynamicTable", 'default': None},
+ allow_extra=False)
+ def get_linked_tables(self, **kwargs):
+ """
+ Get a list of the full list of all tables that are being linked to directly or indirectly
+ from this table via foreign DynamicTableColumns included in this table or in any table that
+ can be reached through DynamicTableRegion columns
+
+ Returns: List of NamedTuple objects with:
+ * 'source_table' : The source table containing the DynamicTableRegion column
+ * 'source_column' : The relevant DynamicTableRegion column in the 'source_table'
+ * 'target_table' : The target DynamicTable; same as source_column.table.
+ """
+ link_type = NamedTuple('DynamicTableLink',
+ [('source_table', DynamicTable),
+ ('source_column', Union[DynamicTableRegion, VectorIndex]),
+ ('target_table', DynamicTable)])
+ curr_tables = [self, ] # Set of tables
+ other_tables = getargs('other_tables', kwargs)
+ if other_tables is not None:
+ curr_tables += other_tables
+ curr_index = 0
+ foreign_cols = []
+ while curr_index < len(curr_tables):
+ for col_index, col in enumerate(curr_tables[curr_index].columns):
+ if isinstance(col, DynamicTableRegion):
+ foreign_cols.append(link_type(source_table=curr_tables[curr_index],
+ source_column=col,
+ target_table=col.table))
+ curr_table_visited = False
+ for t in curr_tables:
+ if t is col.table:
+ curr_table_visited = True
+ if not curr_table_visited:
+ curr_tables.append(col.table)
+ curr_index += 1
+ return foreign_cols
+
@docval({'name': 'exclude', 'type': set, 'doc': 'Set of column names to exclude from the dataframe',
'default': None},
{'name': 'index', 'type': bool,
@@ -1127,12 +1208,20 @@ class DynamicTableRegion(VectorData):
"""
Subset the DynamicTableRegion
- :param arg: 1) tuple consisting of (str, int) where the string defines the column to select
- and the int selects the row, 2) int or slice to select a subset of rows
+ :param arg: Key defining which elements of the table to select. This may be one of the following:
+
+ 1) string with the name of the column to select
+ 2) a tuple consisting of (int, str) where the int selects the row and the string identifies the
+ column to select by name
+ 3) int, list of ints, array, or slice selecting a set of full rows in the table. If an int is used, then
+ scalars are returned for each column that has a single value. If a list, array, or slice is used and
+ df=False, then lists are returned for each column, even if the list, array, or slice resolves to a
+ single row.
+
:param index: Boolean indicating whether to return indices of the DTR (default False)
:param df: Boolean indicating whether to return the result as a pandas DataFrame (default True)
- :return: Result from self.table[....] with the appropriate selection based on the
+ :return: Result from self.table[...] with the appropriate selection based on the
rows selected by this DynamicTableRegion
"""
if not df and not index:
@@ -1144,6 +1233,8 @@ class DynamicTableRegion(VectorData):
arg1 = arg[0]
arg2 = arg[1]
return self.table[self.data[arg1], arg2]
+ elif isinstance(arg, str):
+ return self.table[arg]
elif np.issubdtype(type(arg), np.integer):
if arg >= len(self.data):
raise IndexError('index {} out of bounds for data of length {}'.format(arg, len(self.data)))
=====================================
src/hdmf/container.py
=====================================
@@ -550,6 +550,12 @@ class Data(AbstractContainer):
self.__data = append_data(self.__data, arg)
def extend(self, arg):
+ """
+ The extend_data method adds all the elements of the iterable arg to the
+ end of the data of this Data container.
+
+ :param arg: The iterable to add to the end of this VectorData
+ """
self.__data = extend_data(self.__data, arg)
=====================================
src/hdmf/data_utils.py
=====================================
@@ -27,6 +27,11 @@ def append_data(data, arg):
def extend_data(data, arg):
+ """Add all the elements of the iterable arg to the end of data.
+
+ :param data: The array to extend
+ :type data: list, DataIO, np.ndarray, h5py.Dataset
+ """
if isinstance(data, (list, DataIO)):
data.extend(arg)
return data
=====================================
src/hdmf/utils.py
=====================================
@@ -492,6 +492,8 @@ def docval(*validator, **options): # noqa: C901
:param is_method: True if this is decorating an instance or class method, False otherwise (Default=True)
:param enforce_shape: Enforce the dimensions of input arrays (Default=True)
:param validator: :py:func:`dict` objects specifying the method parameters
+ :param allow_extra: Allow extra arguments (Default=False)
+ :param allow_positional: Allow positional arguments (Default=True)
:param options: additional options for documenting and validating method parameters
'''
enforce_type = options.pop('enforce_type', True)
=====================================
tests/unit/common/test_alignedtable.py
=====================================
@@ -3,13 +3,19 @@ from pandas.testing import assert_frame_equal
import warnings
from hdmf.backends.hdf5 import HDF5IO
-from hdmf.common import DynamicTable, VectorData, get_manager, AlignedDynamicTable
+from hdmf.common import DynamicTable, VectorData, get_manager, AlignedDynamicTable, DynamicTableRegion
from hdmf.testing import TestCase, remove_test_file
class TestAlignedDynamicTableContainer(TestCase):
"""
Test the AlignedDynamicTable Container class.
+
+ NOTE: Functions specific to linked tables, specifically the:
+ * has_foreign_columns
+ * get_foreign_columns
+ * get_linked_tables
+ methods are tested in the test_linkedtables.TestLinkedAlignedDynamicTables class instead of here.
"""
def setUp(self):
warnings.simplefilter("always") # Trigger all warnings
@@ -410,10 +416,22 @@ class TestAlignedDynamicTableContainer(TestCase):
self.assertListEqual(temp['test1', 'c1'][:].tolist(), (np.arange(num_rows) + 3).tolist())
# Test getting a specific cell
self.assertEqual(temp[None, 'main_c1', 1], 3)
+ self.assertEqual(temp[1, None, 'main_c1'], 3)
# Test bad selection tuple
with self.assertRaisesWith(ValueError,
- "Expected tuple of length 2 or 3 with (category, column, row) as value."):
+ "Expected tuple of length 2 of the form [category, column], [row, category], "
+ "[row, (category, column)] or a tuple of length 3 of the form "
+ "[category, column, row], [row, category, column]"):
temp[('main_c1',)]
+ # Test selecting a single cell or row of a category table by having a
+ # [int, str] or [int, (str, str)] type selection
+ # Select row 0 from category 'test1'
+ re = temp[0, 'test1']
+ self.assertListEqual(re.columns.to_list(), ['id', 'c1', 'c2'])
+ self.assertListEqual(re.index.names, [('test_aligned_table', 'id')])
+ self.assertListEqual(re.values.tolist()[0], [0, 3, 4])
+ # Select a single cell from a columm
+ self.assertEqual(temp[1, ('test_aligned_table', 'main_c1')], 3)
def test_to_dataframe(self):
"""Test that the to_dataframe method works"""
@@ -497,3 +515,74 @@ class TestAlignedDynamicTableContainer(TestCase):
msg = "Category is an AlignedDynamicTable. Nesting of AlignedDynamicTable is currently not supported."
with self.assertRaisesWith(ValueError, msg):
adt.add_category(adt_category)
+
+ def test_dynamictable_region_to_aligneddynamictable(self):
+ """
+ Test to ensure data is being retrieved correctly when pointing to an AlignedDynamicTable.
+ In particular, make sure that all columns are being used, including those of the
+ category tables, not just the ones from the main table.
+ """
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ VectorData(name='a2', description='c1', data=np.arange(4))])
+ dtr = DynamicTableRegion(name='test', description='test', data=np.arange(4), table=temp_aligned_table)
+ dtr_df = dtr[:]
+ # Full number of rows
+ self.assertEqual(len(dtr_df), 4)
+ # Test num columns: 2 columns from the main table, 2 columns from the category, 1 id columns from the category
+ self.assertEqual(len(dtr_df.columns), 5)
+ # Test that the data is correct
+ for i, v in enumerate([('my_aligned_table', 'a1'), ('my_aligned_table', 'a2'),
+ ('t1', 'id'), ('t1', 'c1'), ('t1', 'c2')]):
+ self.assertTupleEqual(dtr_df.columns[i], v)
+ # Test the column data
+ for c in dtr_df.columns:
+ self.assertListEqual(dtr_df[c].to_list(), list(range(4)))
+
+ def test_get_colnames(self):
+ """
+ Test the AlignedDynamicTable.get_colnames function
+ """
+ category_names = ['test1', 'test2', 'test3']
+ num_rows = 10
+ categories = [DynamicTable(name=val,
+ description=val+" description",
+ columns=[VectorData(name=t,
+ description=val+t+' description',
+ data=np.arange(num_rows)) for t in ['c1', 'c2', 'c3']]
+ ) for val in category_names]
+ adt = AlignedDynamicTable(
+ name='test_aligned_table',
+ description='Test aligned container',
+ category_tables=categories,
+ columns=[VectorData(name='main_' + t,
+ description='main_'+t+'_description',
+ data=np.arange(num_rows)) for t in ['c1', 'c2', 'c3']])
+ # Default, only get the colnames of the main table. Same as adt.colnames property
+ expected_colnames = ('main_c1', 'main_c2', 'main_c3')
+ self.assertTupleEqual(adt.get_colnames(), expected_colnames)
+ # Same as default because if we don't include the catgories than ignore_category_ids has no effect
+ self.assertTupleEqual(adt.get_colnames(include_category_tables=False, ignore_category_ids=True),
+ expected_colnames)
+ # Full set of columns
+ expected_colnames = [('test_aligned_table', 'main_c1'), ('test_aligned_table', 'main_c2'),
+ ('test_aligned_table', 'main_c3'), ('test1', 'id'), ('test1', 'c1'),
+ ('test1', 'c2'), ('test1', 'c3'), ('test2', 'id'), ('test2', 'c1'),
+ ('test2', 'c2'), ('test2', 'c3'), ('test3', 'id'), ('test3', 'c1'),
+ ('test3', 'c2'), ('test3', 'c3')]
+ self.assertListEqual(adt.get_colnames(include_category_tables=True, ignore_category_ids=False),
+ expected_colnames)
+ # All columns without the id columns of the category tables
+ expected_colnames = [('test_aligned_table', 'main_c1'), ('test_aligned_table', 'main_c2'),
+ ('test_aligned_table', 'main_c3'), ('test1', 'c1'), ('test1', 'c2'),
+ ('test1', 'c3'), ('test2', 'c1'), ('test2', 'c2'), ('test2', 'c3'),
+ ('test3', 'c1'), ('test3', 'c2'), ('test3', 'c3')]
+ self.assertListEqual(adt.get_colnames(include_category_tables=True, ignore_category_ids=True),
+ expected_colnames)
=====================================
tests/unit/common/test_linkedtables.py
=====================================
@@ -0,0 +1,716 @@
+"""
+Module for testing functions specific to tables containing DynamicTableRegion columns
+"""
+
+import numpy as np
+from hdmf.common import DynamicTable, AlignedDynamicTable, VectorData, DynamicTableRegion, VectorIndex
+from hdmf.testing import TestCase
+from hdmf.utils import docval, popargs, get_docval, call_docval_func
+from hdmf.common.hierarchicaltable import to_hierarchical_dataframe, drop_id_columns, flatten_column_index
+from pandas.testing import assert_frame_equal
+
+
+class DynamicTableSingleDTR(DynamicTable):
+ """Test table class that references a single foreign table"""
+ __columns__ = (
+ {'name': 'child_table_ref1',
+ 'description': 'Column with a references to the next level in the hierarchy',
+ 'required': True,
+ 'index': True,
+ 'table': True},
+ )
+
+ @docval({'name': 'name', 'type': str, 'doc': 'The name of the table'},
+ {'name': 'child_table1',
+ 'type': DynamicTable,
+ 'doc': 'the child DynamicTable this DynamicTableSingleDTR point to.'},
+ *get_docval(DynamicTable.__init__, 'id', 'columns', 'colnames'))
+ def __init__(self, **kwargs):
+ # Define default name and description settings
+ kwargs['description'] = (kwargs['name'] + " DynamicTableSingleDTR")
+ # Initialize the DynamicTable
+ call_docval_func(super(DynamicTableSingleDTR, self).__init__, kwargs)
+ if self['child_table_ref1'].target.table is None:
+ self['child_table_ref1'].target.table = popargs('child_table1', kwargs)
+
+
+class DynamicTableMultiDTR(DynamicTable):
+ """Test table class that references multiple related tables"""
+ __columns__ = (
+ {'name': 'child_table_ref1',
+ 'description': 'Column with a references to the next level in the hierarchy',
+ 'required': True,
+ 'index': True,
+ 'table': True},
+ {'name': 'child_table_ref2',
+ 'description': 'Column with a references to the next level in the hierarchy',
+ 'required': True,
+ 'index': True,
+ 'table': True},
+ )
+
+ @docval({'name': 'name', 'type': str, 'doc': 'The name of the table'},
+ {'name': 'child_table1',
+ 'type': DynamicTable,
+ 'doc': 'the child DynamicTable this DynamicTableSingleDTR point to.'},
+ {'name': 'child_table2',
+ 'type': DynamicTable,
+ 'doc': 'the child DynamicTable this DynamicTableSingleDTR point to.'},
+ *get_docval(DynamicTable.__init__, 'id', 'columns', 'colnames'))
+ def __init__(self, **kwargs):
+ # Define default name and description settings
+ kwargs['description'] = (kwargs['name'] + " DynamicTableSingleDTR")
+ # Initialize the DynamicTable
+ call_docval_func(super(DynamicTableMultiDTR, self).__init__, kwargs)
+ if self['child_table_ref1'].target.table is None:
+ self['child_table_ref1'].target.table = popargs('child_table1', kwargs)
+ if self['child_table_ref2'].target.table is None:
+ self['child_table_ref2'].target.table = popargs('child_table2', kwargs)
+
+
+class TestLinkedAlignedDynamicTables(TestCase):
+ """
+ Test functionality specific to AlignedDynamicTables containing DynamicTableRegion columns.
+
+ Since these functions only implements front-end convenient functions for DynamicTable
+ we do not need to worry about I/O here (that is tested elsewere), but it is sufficient if
+ we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a
+ h5py.Dataset may behave differently than a numpy array.
+ """
+ def setUp(self):
+ """
+ Create basic set of linked tables consisting of
+
+ aligned_table
+ |
+ +--> category0 ---> table_level_0_0
+ |
+ +--> category1 ---> table_level_0_1
+ """
+ # Level 0 0 table. I.e., first table on level 0
+ self.table_level0_0 = DynamicTable(name='level0_0', description="level0_0 DynamicTable")
+ self.table_level0_0.add_row(id=10)
+ self.table_level0_0.add_row(id=11)
+ self.table_level0_0.add_row(id=12)
+ self.table_level0_0.add_row(id=13)
+ self.table_level0_0.add_column(data=['tag1', 'tag2', 'tag2', 'tag1', 'tag3', 'tag4', 'tag5'],
+ name='tags',
+ description='custom tags',
+ index=[1, 2, 4, 7])
+ self.table_level0_0.add_column(data=np.arange(4),
+ name='myid',
+ description='custom ids',
+ index=False)
+ # Level 0 1 table. I.e., second table on level 0
+ self.table_level0_1 = DynamicTable(name='level0_1', description="level0_1 DynamicTable")
+ self.table_level0_1.add_row(id=14)
+ self.table_level0_1.add_row(id=15)
+ self.table_level0_1.add_row(id=16)
+ self.table_level0_1.add_row(id=17)
+ self.table_level0_1.add_column(data=['tag1', 'tag1', 'tag2', 'tag2', 'tag3', 'tag3', 'tag4'],
+ name='tags',
+ description='custom tags',
+ index=[2, 4, 6, 7])
+ self.table_level0_1.add_column(data=np.arange(4),
+ name='myid',
+ description='custom ids',
+ index=False)
+
+ # category 0 table
+ self.category0 = DynamicTableSingleDTR(name='category0', child_table1=self.table_level0_0)
+ self.category0.add_row(id=0, child_table_ref1=[0, ])
+ self.category0.add_row(id=1, child_table_ref1=[1, 2])
+ self.category0.add_row(id=1, child_table_ref1=[3, ])
+ self.category0.add_column(data=[10, 11, 12],
+ name='filter',
+ description='filter value',
+ index=False)
+
+ # category 1 table
+ self.category1 = DynamicTableSingleDTR(name='category1', child_table1=self.table_level0_1)
+ self.category1.add_row(id=0, child_table_ref1=[0, 1])
+ self.category1.add_row(id=1, child_table_ref1=[2, 3])
+ self.category1.add_row(id=1, child_table_ref1=[1, 3])
+ self.category1.add_column(data=[1, 2, 3],
+ name='filter',
+ description='filter value',
+ index=False)
+ # Aligned table
+ self.aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ columns=[VectorData(name='a1', description='a1', data=np.arange(3)), ],
+ colnames=['a1', ],
+ category_tables=[self.category0, self.category1])
+
+ def tearDown(self):
+ del self.table_level0_0
+ del self.table_level0_1
+ del self.category0
+ del self.category1
+ del self.aligned_table
+
+ def test_to_hierarchical_dataframe(self):
+ """Test that converting an AlignedDynamicTable with links works"""
+ hier_df = to_hierarchical_dataframe(self.aligned_table)
+ self.assertListEqual(hier_df.columns.to_list(),
+ [('level0_0', 'id'), ('level0_0', 'tags'), ('level0_0', 'myid')])
+ self.assertListEqual(hier_df.index.names,
+ [('my_aligned_table', 'id'), ('my_aligned_table', ('my_aligned_table', 'a1')),
+ ('my_aligned_table', ('category0', 'id')), ('my_aligned_table', ('category0', 'filter')),
+ ('my_aligned_table', ('category1', 'id')),
+ ('my_aligned_table', ('category1', 'child_table_ref1')),
+ ('my_aligned_table', ('category1', 'filter'))])
+ self.assertListEqual(hier_df.index.to_list(),
+ [(0, 0, 0, 10, 0, (0, 1), 1),
+ (1, 1, 1, 11, 1, (2, 3), 2),
+ (1, 1, 1, 11, 1, (2, 3), 2),
+ (2, 2, 1, 12, 1, (1, 3), 3)])
+ self.assertListEqual(hier_df[('level0_0', 'tags')].values.tolist(),
+ [['tag1'], ['tag2'], ['tag2', 'tag1'], ['tag3', 'tag4', 'tag5']])
+
+ def test_has_foreign_columns_in_category_tables(self):
+ """Test confirming working order for DynamicTableRegions in subtables"""
+ self.assertTrue(self.aligned_table.has_foreign_columns())
+ self.assertFalse(self.aligned_table.has_foreign_columns(ignore_category_tables=True))
+
+ def test_has_foreign_columns_false(self):
+ """Test false if there are no DynamicTableRegionColumns"""
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ VectorData(name='a2', description='c2', data=np.arange(4))])
+ self.assertFalse(temp_aligned_table.has_foreign_columns())
+ self.assertFalse(temp_aligned_table.has_foreign_columns(ignore_category_tables=True))
+
+ def test_has_foreign_column_in_main_table(self):
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='a2', description='c2',
+ data=np.arange(4), table=temp_table)])
+ self.assertTrue(temp_aligned_table.has_foreign_columns())
+ self.assertTrue(temp_aligned_table.has_foreign_columns(ignore_category_tables=True))
+
+ def test_get_foreign_columns(self):
+ # check without subcateogries
+ foreign_cols = self.aligned_table.get_foreign_columns(ignore_category_tables=True)
+ self.assertListEqual(foreign_cols, [])
+ # check with subcateogries
+ foreign_cols = self.aligned_table.get_foreign_columns()
+ self.assertEqual(len(foreign_cols), 2)
+ for i, v in enumerate([('category0', 'child_table_ref1'), ('category1', 'child_table_ref1')]):
+ self.assertTupleEqual(foreign_cols[i], v)
+
+ def test_get_foreign_columns_none(self):
+ """Test false if there are no DynamicTableRegionColumns"""
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ VectorData(name='a2', description='c2', data=np.arange(4))])
+ self.assertListEqual(temp_aligned_table.get_foreign_columns(), [])
+ self.assertListEqual(temp_aligned_table.get_foreign_columns(ignore_category_tables=True), [])
+
+ def test_get_foreign_column_in_main_and_category_table(self):
+ temp_table0 = DynamicTable(name='t0', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='c2', description='c2',
+ data=np.arange(4), table=temp_table0)])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='a2', description='c2',
+ data=np.arange(4), table=temp_table)])
+ # We should get both the DynamicTableRegion from the main table and the category 't1'
+ self.assertListEqual(temp_aligned_table.get_foreign_columns(), [(None, 'a2'), ('t1', 'c2')])
+ # We should only get the column from the main table
+ self.assertListEqual(temp_aligned_table.get_foreign_columns(ignore_category_tables=True), [(None, 'a2')])
+
+ def test_get_linked_tables(self):
+ # check without subcateogries
+ linked_table = self.aligned_table.get_linked_tables(ignore_category_tables=True)
+ self.assertListEqual(linked_table, [])
+ # check with subcateogries
+ linked_tables = self.aligned_table.get_linked_tables()
+ self.assertEqual(len(linked_tables), 2)
+ self.assertTupleEqual((linked_tables[0].source_table.name,
+ linked_tables[0].source_column.name,
+ linked_tables[0].target_table.name),
+ ('category0', 'child_table_ref1', 'level0_0'))
+ self.assertTupleEqual((linked_tables[1].source_table.name,
+ linked_tables[1].source_column.name,
+ linked_tables[1].target_table.name),
+ ('category1', 'child_table_ref1', 'level0_1'))
+
+ def test_get_linked_tables_none(self):
+ """Test false if there are no DynamicTableRegionColumns"""
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ VectorData(name='a2', description='c2', data=np.arange(4))])
+ self.assertListEqual(temp_aligned_table.get_linked_tables(), [])
+ self.assertListEqual(temp_aligned_table.get_linked_tables(ignore_category_tables=True), [])
+
+ def test_get_linked_tables_complex_link(self):
+ temp_table0 = DynamicTable(name='t0', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='c2', description='c2',
+ data=np.arange(4), table=temp_table0)])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='a2', description='c2',
+ data=np.arange(4), table=temp_table)])
+ # NOTE: in this example templ_aligned_table both points to temp_table and at the
+ # same time contains temp_table as a category. This could lead to temp_table
+ # visited multiple times and we want to make sure this doesn't happen
+ # We should get both the DynamicTableRegion from the main table and the category 't1'
+ linked_tables = temp_aligned_table.get_linked_tables()
+ self.assertEqual(len(linked_tables), 2)
+ for i, v in enumerate([('my_aligned_table', 'a2', 't1'), ('t1', 'c2', 't0')]):
+ self.assertTupleEqual((linked_tables[i].source_table.name,
+ linked_tables[i].source_column.name,
+ linked_tables[i].target_table.name), v)
+ # Now, since our main table links to the category table the result should remain the same
+ # even if we ignore the category table
+ linked_tables = temp_aligned_table.get_linked_tables(ignore_category_tables=True)
+ self.assertEqual(len(linked_tables), 2)
+ for i, v in enumerate([('my_aligned_table', 'a2', 't1'), ('t1', 'c2', 't0')]):
+ self.assertTupleEqual((linked_tables[i].source_table.name,
+ linked_tables[i].source_column.name,
+ linked_tables[i].target_table.name), v)
+
+ def test_get_linked_tables_simple_link(self):
+ temp_table0 = DynamicTable(name='t0', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ VectorData(name='c2', description='c2', data=np.arange(4))])
+ temp_table = DynamicTable(name='t1', description='t1',
+ colnames=['c1', 'c2'],
+ columns=[VectorData(name='c1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='c2', description='c2',
+ data=np.arange(4), table=temp_table0)])
+ temp_aligned_table = AlignedDynamicTable(name='my_aligned_table',
+ description='my test table',
+ category_tables=[temp_table],
+ colnames=['a1', 'a2'],
+ columns=[VectorData(name='a1', description='c1', data=np.arange(4)),
+ DynamicTableRegion(name='a2', description='c2',
+ data=np.arange(4), table=temp_table0)])
+ # NOTE: in this example temp_aligned_table and temp_table both point to temp_table0
+ # We should get both the DynamicTableRegion from the main table and the category 't1'
+ linked_tables = temp_aligned_table.get_linked_tables()
+ self.assertEqual(len(linked_tables), 2)
+ for i, v in enumerate([('my_aligned_table', 'a2', 't0'), ('t1', 'c2', 't0')]):
+ self.assertTupleEqual((linked_tables[i].source_table.name,
+ linked_tables[i].source_column.name,
+ linked_tables[i].target_table.name), v)
+ # Since no table ever link to our category temp_table we should only get the link from our
+ # main table here, in contrast to what happens in the test_get_linked_tables_complex_link case
+ linked_tables = temp_aligned_table.get_linked_tables()
+ self.assertEqual(len(linked_tables), 2)
+ for i, v in enumerate([('my_aligned_table', 'a2', 't0'), ]):
+ self.assertTupleEqual((linked_tables[i].source_table.name,
+ linked_tables[i].source_column.name,
+ linked_tables[i].target_table.name), v)
+
+
+class TestHierarchicalTable(TestCase):
+
+ def setUp(self):
+ """
+ Create basic set of linked tables consisting of
+
+ super_parent_table ---> parent_table ---> aligned_table
+ |
+ +--> category0
+ """
+ # Level 0 0 table. I.e., first table on level 0
+ self.category0 = DynamicTable(name='level0_0', description="level0_0 DynamicTable")
+ self.category0.add_row(id=10)
+ self.category0.add_row(id=11)
+ self.category0.add_row(id=12)
+ self.category0.add_row(id=13)
+ self.category0.add_column(data=['tag1', 'tag2', 'tag2', 'tag1', 'tag3', 'tag4', 'tag5'],
+ name='tags',
+ description='custom tags',
+ index=[1, 2, 4, 7])
+ self.category0.add_column(data=np.arange(4),
+ name='myid',
+ description='custom ids',
+ index=False)
+
+ # Aligned table
+ self.aligned_table = AlignedDynamicTable(name='aligned_table',
+ description='parent_table',
+ columns=[VectorData(name='a1', description='a1', data=np.arange(4)), ],
+ colnames=['a1', ],
+ category_tables=[self.category0, ])
+
+ # Parent table
+ self.parent_table = DynamicTable(name='parent_table',
+ description='parent_table',
+ columns=[VectorData(name='p1', description='p1', data=np.arange(4)),
+ DynamicTableRegion(name='l1', description='l1',
+ data=np.arange(4), table=self.aligned_table)])
+ # Super-parent table
+ dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=self.parent_table)
+ vi_dtr_sp = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_sp)
+ self.super_parent_table = DynamicTable(name='super_parent_table',
+ description='super_parent_table',
+ columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)),
+ dtr_sp, vi_dtr_sp])
+
+ def tearDown(self):
+ del self.category0
+ del self.aligned_table
+ del self.parent_table
+
+ def test_to_hierarchical_dataframe_no_dtr_on_top_level(self):
+ # Cover the case where our top dtr is flat (i.e., without a VectorIndex)
+ dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=self.parent_table)
+ spttable = DynamicTable(name='super_parent_table',
+ description='super_parent_table',
+ columns=[VectorData(name='sp1', description='sp1', data=np.arange(4)), dtr_sp])
+ hier_df = to_hierarchical_dataframe(spttable).reset_index()
+ expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'),
+ ('parent_table', 'id'), ('parent_table', 'p1'),
+ ('aligned_table', 'id'),
+ ('aligned_table', ('aligned_table', 'a1')), ('aligned_table', ('level0_0', 'id')),
+ ('aligned_table', ('level0_0', 'tags')), ('aligned_table', ('level0_0', 'myid'))]
+ self.assertListEqual(hier_df.columns.to_list(), expected_columns)
+
+ def test_to_hierarchical_dataframe_indexed_dtr_on_last_level(self):
+ # Parent table
+ dtr_p1 = DynamicTableRegion(name='l1', description='l1', data=np.arange(4), table=self.aligned_table)
+ vi_dtr_p1 = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_p1)
+ p1 = DynamicTable(name='parent_table', description='parent_table',
+ columns=[VectorData(name='p1', description='p1', data=np.arange(3)), dtr_p1, vi_dtr_p1])
+ # Super-parent table
+ dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=p1)
+ vi_dtr_sp = VectorIndex(name='sl1_index', data=[1, 2, 3], target=dtr_sp)
+ spt = DynamicTable(name='super_parent_table', description='super_parent_table',
+ columns=[VectorData(name='sp1', description='sp1', data=np.arange(3)), dtr_sp, vi_dtr_sp])
+ hier_df = to_hierarchical_dataframe(spt).reset_index()
+ expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'),
+ ('parent_table', 'id'), ('parent_table', 'p1'),
+ ('aligned_table', 'id'),
+ ('aligned_table', ('aligned_table', 'a1')), ('aligned_table', ('level0_0', 'id')),
+ ('aligned_table', ('level0_0', 'tags')), ('aligned_table', ('level0_0', 'myid'))]
+ self.assertListEqual(hier_df.columns.to_list(), expected_columns) # make sure we have the right columns
+ self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(),
+ [['tag1'], ['tag2'], ['tag2', 'tag1']])
+
+ def test_to_hierarchical_dataframe_empty_tables(self):
+ # Setup empty tables with the following hierarchy
+ # super_parent_table ---> parent_table ---> child_table
+ a1 = DynamicTable(name='level0_0', description="level0_0 DynamicTable",
+ columns=[VectorData(name='l0', description='l0', data=[])])
+ p1 = DynamicTable(name='parent_table', description='parent_table',
+ columns=[DynamicTableRegion(name='l1', description='l1', data=[], table=a1),
+ VectorData(name='p1c', description='l0', data=[])])
+ dtr_sp = DynamicTableRegion(name='sl1', description='sl1', data=np.arange(4), table=p1)
+ vi_dtr_sp = VectorIndex(name='sl1_index', data=[], target=dtr_sp)
+ spt = DynamicTable(name='super_parent_table', description='super_parent_table',
+ columns=[dtr_sp, vi_dtr_sp, VectorData(name='sptc', description='l0', data=[])])
+ # Convert to hierarchical dataframe and make sure we get the right columns
+ hier_df = to_hierarchical_dataframe(spt).reset_index()
+ expected_columns = [('super_parent_table', 'id'), ('super_parent_table', 'sptc'),
+ ('parent_table', 'id'), ('parent_table', 'p1c'),
+ ('level0_0', 'id'), ('level0_0', 'l0')]
+ self.assertListEqual(hier_df.columns.to_list(), expected_columns)
+
+ def test_to_hierarchical_dataframe_multilevel(self):
+ hier_df = to_hierarchical_dataframe(self.super_parent_table).reset_index()
+ expected_cols = [('super_parent_table', 'id'), ('super_parent_table', 'sp1'),
+ ('parent_table', 'id'), ('parent_table', 'p1'),
+ ('aligned_table', 'id'),
+ ('aligned_table', ('aligned_table', 'a1')),
+ ('aligned_table', ('level0_0', 'id')),
+ ('aligned_table', ('level0_0', 'tags')),
+ ('aligned_table', ('level0_0', 'myid'))]
+ # Check that we have all the columns
+ self.assertListEqual(hier_df.columns.to_list(), expected_cols)
+ # Spot-check the data in two columns
+ self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list(),
+ [['tag1'], ['tag2'], ['tag2', 'tag1']])
+ self.assertListEqual(hier_df[('aligned_table', ('aligned_table', 'a1'))].to_list(), list(range(3)))
+
+ def test_to_hierarchical_dataframe(self):
+ hier_df = to_hierarchical_dataframe(self.parent_table)
+ self.assertEqual(len(hier_df), 4)
+ self.assertEqual(len(hier_df.columns), 5)
+ self.assertEqual(len(hier_df.index.names), 2)
+ columns = [('aligned_table', 'id'),
+ ('aligned_table', ('aligned_table', 'a1')),
+ ('aligned_table', ('level0_0', 'id')),
+ ('aligned_table', ('level0_0', 'tags')),
+ ('aligned_table', ('level0_0', 'myid'))]
+ for i, c in enumerate(hier_df.columns):
+ self.assertTupleEqual(c, columns[i])
+ index_names = [('parent_table', 'id'), ('parent_table', 'p1')]
+ self.assertListEqual(hier_df.index.names, index_names)
+ self.assertListEqual(hier_df.index.to_list(), [(i, i) for i in range(4)])
+ self.assertListEqual(hier_df[('aligned_table', ('aligned_table', 'a1'))].to_list(), list(range(4)))
+ self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'id'))].to_list(), list(range(10, 14)))
+ self.assertListEqual(hier_df[('aligned_table', ('level0_0', 'myid'))].to_list(), list(range(4)))
+ tags = [['tag1'], ['tag2'], ['tag2', 'tag1'], ['tag3', 'tag4', 'tag5']]
+ for i, v in enumerate(hier_df[('aligned_table', ('level0_0', 'tags'))].to_list()):
+ self.assertListEqual(v, tags[i])
+
+ def test_to_hierarchical_dataframe_flat_table(self):
+ hier_df = to_hierarchical_dataframe(self.category0)
+ assert_frame_equal(hier_df, self.category0.to_dataframe())
+ hier_df = to_hierarchical_dataframe(self.aligned_table)
+ assert_frame_equal(hier_df, self.aligned_table.to_dataframe())
+
+ def test_drop_id_columns(self):
+ hier_df = to_hierarchical_dataframe(self.parent_table)
+ cols = hier_df.columns.to_list()
+ mod_df = drop_id_columns(hier_df, inplace=False)
+ expected_cols = [('aligned_table', ('aligned_table', 'a1')),
+ ('aligned_table', ('level0_0', 'tags')),
+ ('aligned_table', ('level0_0', 'myid'))]
+ self.assertListEqual(hier_df.columns.to_list(), cols) # Test that no columns are dropped with inplace=False
+ self.assertListEqual(mod_df.columns.to_list(), expected_cols) # Assert that we got back a modified dataframe
+ drop_id_columns(hier_df, inplace=True)
+ self.assertListEqual(hier_df.columns.to_list(),
+ expected_cols)
+ flat_df = to_hierarchical_dataframe(self.parent_table).reset_index(inplace=False)
+ drop_id_columns(flat_df, inplace=True)
+ self.assertListEqual(flat_df.columns.to_list(),
+ [('parent_table', 'p1'),
+ ('aligned_table', ('aligned_table', 'a1')),
+ ('aligned_table', ('level0_0', 'tags')),
+ ('aligned_table', ('level0_0', 'myid'))])
+
+ def test_flatten_column_index(self):
+ hier_df = to_hierarchical_dataframe(self.parent_table).reset_index()
+ cols = hier_df.columns.to_list()
+ expexted_cols = [('parent_table', 'id'),
+ ('parent_table', 'p1'),
+ ('aligned_table', 'id'),
+ ('aligned_table', 'aligned_table', 'a1'),
+ ('aligned_table', 'level0_0', 'id'),
+ ('aligned_table', 'level0_0', 'tags'),
+ ('aligned_table', 'level0_0', 'myid')]
+ df = flatten_column_index(hier_df, inplace=False)
+ # Test that our columns have not changed with inplace=False
+ self.assertListEqual(hier_df.columns.to_list(), cols)
+ self.assertListEqual(df.columns.to_list(), expexted_cols) # make sure we got back a modified dataframe
+ flatten_column_index(hier_df, inplace=True) # make sure we can also directly flatten inplace
+ self.assertListEqual(hier_df.columns.to_list(), expexted_cols)
+ # Test that we can apply flatten_column_index again on our already modified dataframe to reduce the levels
+ flatten_column_index(hier_df, inplace=True, max_levels=2)
+ expexted_cols = [('parent_table', 'id'), ('parent_table', 'p1'), ('aligned_table', 'id'),
+ ('aligned_table', 'a1'), ('level0_0', 'id'), ('level0_0', 'tags'), ('level0_0', 'myid')]
+ self.assertListEqual(hier_df.columns.to_list(), expexted_cols)
+ # Test that we can directly reduce the max_levels to just 1
+ hier_df = to_hierarchical_dataframe(self.parent_table).reset_index()
+ flatten_column_index(hier_df, inplace=True, max_levels=1)
+ expexted_cols = ['id', 'p1', 'id', 'a1', 'id', 'tags', 'myid']
+ self.assertListEqual(hier_df.columns.to_list(), expexted_cols)
+
+ def test_flatten_column_index_already_flat_index(self):
+ hier_df = to_hierarchical_dataframe(self.parent_table).reset_index()
+ flatten_column_index(hier_df, inplace=True, max_levels=1)
+ expexted_cols = ['id', 'p1', 'id', 'a1', 'id', 'tags', 'myid']
+ self.assertListEqual(hier_df.columns.to_list(), expexted_cols)
+ # Now try to flatten the already flat columns again to make sure nothing changes
+ flatten_column_index(hier_df, inplace=True, max_levels=1)
+ self.assertListEqual(hier_df.columns.to_list(), expexted_cols)
+
+ def test_flatten_column_index_bad_maxlevels(self):
+ hier_df = to_hierarchical_dataframe(self.parent_table)
+ with self.assertRaisesWith(ValueError, 'max_levels must be greater than 0'):
+ flatten_column_index(dataframe=hier_df, inplace=True, max_levels=-1)
+ with self.assertRaisesWith(ValueError, 'max_levels must be greater than 0'):
+ flatten_column_index(dataframe=hier_df, inplace=True, max_levels=0)
+
+
+class TestLinkedDynamicTables(TestCase):
+ """
+ Test functionality specific to DynamicTables containing DynamicTableRegion columns.
+
+ Since these functions only implements front-end convenient functions for DynamicTable
+ we do not need to worry about I/O here (that is tested elsewere), ut it is sufficient if
+ we test with container class. The only time I/O becomes relevant is on read in case that, e.g., a
+ h5py.Dataset may behave differently than a numpy array.
+ """
+ def setUp(self):
+ """
+ Create basic set of linked tables consisting of
+
+ table_level2 ---> table_level1 ----> table_level_0_0
+ \
+ ------> table_level_0_1
+
+ """
+ self.table_level0_0 = DynamicTable(name='level0_0', description="level0_0 DynamicTable")
+ self.table_level0_1 = DynamicTable(name='level0_1', description="level0_1 DynamicTable")
+ self.table_level1 = DynamicTableMultiDTR(name='level1',
+ child_table1=self.table_level0_0,
+ child_table2=self.table_level0_1)
+ self.table_level2 = DynamicTableSingleDTR(name='level2', child_table1=self.table_level1)
+
+ def tearDown(self):
+ del self.table_level0_0
+ del self.table_level0_1
+ del self.table_level1
+ del self.table_level2
+
+ def popolate_tables(self):
+ """Helper function to populate our tables generate in setUp with some simple data"""
+ # Level 0 0 table. I.e., first table on level 0
+ self.table_level0_0.add_row(id=10)
+ self.table_level0_0.add_row(id=11)
+ self.table_level0_0.add_row(id=12)
+ self.table_level0_0.add_row(id=13)
+ self.table_level0_0.add_column(data=['tag1', 'tag2', 'tag2', 'tag1', 'tag3', 'tag4', 'tag5'],
+ name='tags',
+ description='custom tags',
+ index=[1, 2, 4, 7])
+ self.table_level0_0.add_column(data=np.arange(4),
+ name='myid',
+ description='custom ids',
+ index=False)
+ # Level 0 1 table. I.e., second table on level 0
+ self.table_level0_1.add_row(id=14)
+ self.table_level0_1.add_row(id=15)
+ self.table_level0_1.add_row(id=16)
+ self.table_level0_1.add_row(id=17)
+ self.table_level0_1.add_column(data=['tag1', 'tag1', 'tag2', 'tag2', 'tag3', 'tag3', 'tag4'],
+ name='tags',
+ description='custom tags',
+ index=[2, 4, 6, 7])
+ self.table_level0_1.add_column(data=np.arange(4),
+ name='myid',
+ description='custom ids',
+ index=False)
+ # Level 1 table
+ self.table_level1.add_row(id=0, child_table_ref1=[0, 1], child_table_ref2=[0])
+ self.table_level1.add_row(id=1, child_table_ref1=[2], child_table_ref2=[1, 2])
+ self.table_level1.add_row(id=2, child_table_ref1=[3], child_table_ref2=[3])
+ self.table_level1.add_column(data=['tag1', 'tag2', 'tag2'],
+ name='tag',
+ description='custom tag',
+ index=False)
+ self.table_level1.add_column(data=['tag1', 'tag2', 'tag2', 'tag3', 'tag3', 'tag4', 'tag5'],
+ name='tags',
+ description='custom tags',
+ index=[2, 4, 7])
+ # Level 2 data
+ self.table_level2.add_row(id=0, child_table_ref1=[0, ])
+ self.table_level2.add_row(id=1, child_table_ref1=[1, 2])
+ self.table_level2.add_column(data=[10, 12],
+ name='filter',
+ description='filter value',
+ index=False)
+
+ def test_populate_table_hierarchy(self):
+ """Test that just checks that populating the tables with data works correctly"""
+ self.popolate_tables()
+ # Check level0 0 data
+ self.assertListEqual(self.table_level0_0.id[:], np.arange(10, 14, 1).tolist())
+ self.assertListEqual(self.table_level0_0['tags'][:],
+ [['tag1'], ['tag2'], ['tag2', 'tag1'], ['tag3', 'tag4', 'tag5']])
+ self.assertListEqual(self.table_level0_0['myid'][:].tolist(), np.arange(0, 4, 1).tolist())
+ # Check level0 1 data
+ self.assertListEqual(self.table_level0_1.id[:], np.arange(14, 18, 1).tolist())
+ self.assertListEqual(self.table_level0_1['tags'][:],
+ [['tag1', 'tag1'], ['tag2', 'tag2'], ['tag3', 'tag3'], ['tag4']])
+ self.assertListEqual(self.table_level0_1['myid'][:].tolist(), np.arange(0, 4, 1).tolist())
+ # Check level1 data
+ self.assertListEqual(self.table_level1.id[:], np.arange(0, 3, 1).tolist())
+ self.assertListEqual(self.table_level1['tag'][:], ['tag1', 'tag2', 'tag2'])
+ self.assertTrue(self.table_level1['child_table_ref1'].target.table is self.table_level0_0)
+ self.assertTrue(self.table_level1['child_table_ref2'].target.table is self.table_level0_1)
+ self.assertEqual(len(self.table_level1['child_table_ref1'].target.table), 4)
+ self.assertEqual(len(self.table_level1['child_table_ref2'].target.table), 4)
+ # Check level2 data
+ self.assertListEqual(self.table_level2.id[:], np.arange(0, 2, 1).tolist())
+ self.assertListEqual(self.table_level2['filter'][:], [10, 12])
+ self.assertTrue(self.table_level2['child_table_ref1'].target.table is self.table_level1)
+ self.assertEqual(len(self.table_level2['child_table_ref1'].target.table), 3)
+
+ def test_get_foreign_columns(self):
+ """Test DynamicTable.get_foreign_columns"""
+ self.popolate_tables()
+ self.assertListEqual(self.table_level0_0.get_foreign_columns(), [])
+ self.assertListEqual(self.table_level0_1.get_foreign_columns(), [])
+ self.assertListEqual(self.table_level1.get_foreign_columns(), ['child_table_ref1', 'child_table_ref2'])
+ self.assertListEqual(self.table_level2.get_foreign_columns(), ['child_table_ref1'])
+
+ def test_has_foreign_columns(self):
+ """Test DynamicTable.get_foreign_columns"""
+ self.popolate_tables()
+ self.assertFalse(self.table_level0_0.has_foreign_columns())
+ self.assertFalse(self.table_level0_1.has_foreign_columns())
+ self.assertTrue(self.table_level1.has_foreign_columns())
+ self.assertTrue(self.table_level2.has_foreign_columns())
+
+ def test_get_linked_tables(self):
+ """Test DynamicTable.get_linked_tables"""
+ self.popolate_tables()
+ # check level0_0
+ self.assertListEqual(self.table_level0_0.get_linked_tables(), [])
+ # check level0_0
+ self.assertListEqual(self.table_level0_1.get_linked_tables(), [])
+ # check level1
+ temp = self.table_level1.get_linked_tables()
+ self.assertEqual(len(temp), 2)
+ self.assertEqual(temp[0].source_table.name, self.table_level1.name)
+ self.assertEqual(temp[0].source_column.name, 'child_table_ref1')
+ self.assertEqual(temp[0].target_table.name, self.table_level0_0.name)
+ self.assertEqual(temp[1].source_table.name, self.table_level1.name)
+ self.assertEqual(temp[1].source_column.name, 'child_table_ref2')
+ self.assertEqual(temp[1].target_table.name, self.table_level0_1.name)
+ # check level2
+ temp = self.table_level2.get_linked_tables()
+ self.assertEqual(len(temp), 3)
+ self.assertEqual(temp[0].source_table.name, self.table_level2.name)
+ self.assertEqual(temp[0].source_column.name, 'child_table_ref1')
+ self.assertEqual(temp[0].target_table.name, self.table_level1.name)
+ self.assertEqual(temp[1].source_table.name, self.table_level1.name)
+ self.assertEqual(temp[1].source_column.name, 'child_table_ref1')
+ self.assertEqual(temp[1].target_table.name, self.table_level0_0.name)
+ self.assertEqual(temp[2].source_table.name, self.table_level1.name)
+ self.assertEqual(temp[2].source_column.name, 'child_table_ref2')
+ self.assertEqual(temp[2].target_table.name, self.table_level0_1.name)
=====================================
tests/unit/common/test_table.py
=====================================
@@ -799,7 +799,7 @@ class TestDynamicTableRegion(TestCase):
table = self.with_columns_and_data()
dynamic_table_region = DynamicTableRegion('dtr', [0, 1, 2, 2], 'desc', table=table)
with self.assertRaises(ValueError):
- _ = dynamic_table_region['bad index']
+ _ = dynamic_table_region[True]
def test_dynamic_table_region_table_prop(self):
table = self.with_columns_and_data()
@@ -1051,6 +1051,20 @@ class DynamicTableRegionRoundTrip(H5RoundTripMixin, TestCase):
exp = [np.array([0, 1]), np.array([1, 2]), np.array([10.0, 20.0]), np.array(['cat', 'dog']), np.array([0, 1])]
self._assert_list_of_ndarray_equal(exp, rec)
+ def test_getitem_int_str(self):
+ """Test DynamicTableRegion.__getitem__ with (int, str)."""
+ mc = self.roundtripContainer()
+ table = mc.containers['table_with_dtr']
+ rec = table['dtr'][0, 'qux']
+ self.assertEqual(rec, 'qux_1')
+
+ def test_getitem_str(self):
+ """Test DynamicTableRegion.__getitem__ with str."""
+ mc = self.roundtripContainer()
+ table = mc.containers['table_with_dtr']
+ rec = table['dtr']['qux']
+ self.assertIs(rec, mc.containers['target_table']['qux'])
+
class TestElementIdentifiers(TestCase):
View it on GitLab: https://salsa.debian.org/med-team/hdmf/-/compare/ddbbea37b32c87d9b9ad4bb70566aa2f108798ea...11cc62d7b77a1ebd5519b708637ca24d6ab8d874
--
View it on GitLab: https://salsa.debian.org/med-team/hdmf/-/compare/ddbbea37b32c87d9b9ad4bb70566aa2f108798ea...11cc62d7b77a1ebd5519b708637ca24d6ab8d874
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210805/61078bca/attachment-0001.htm>
More information about the debian-med-commit
mailing list