[med-svn] [Git][med-team/heudiconv][upstream] New upstream version 0.11.4
Yaroslav Halchenko (@yoh)
gitlab at salsa.debian.org
Fri Sep 30 00:56:08 BST 2022
Yaroslav Halchenko pushed to branch upstream at Debian Med / heudiconv
Commits:
0717afb2 by Yaroslav Halchenko at 2022-09-29T19:12:05-04:00
New upstream version 0.11.4
- - - - -
10 changed files:
- PKG-INFO
- README.rst
- heudiconv.egg-info/PKG-INFO
- heudiconv/_version.py
- heudiconv/convert.py
- heudiconv/heuristics/reproin.py
- heudiconv/heuristics/test_reproin.py
- heudiconv/parser.py
- heudiconv/tests/test_heuristics.py
- heudiconv/utils.py
Changes:
=====================================
PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: heudiconv
-Version: 0.11.3
+Version: 0.11.4
Summary: Heuristic DICOM Converter
Author: HeuDiConv team and contributors
License: Apache 2.0
=====================================
README.rst
=====================================
@@ -44,3 +44,22 @@ How to cite
Please use `Zenodo record <https://doi.org/10.5281/zenodo.1012598>`_ for
your specific version of HeuDiConv. We also support gathering
all relevant citations via `DueCredit <http://duecredit.org>`_.
+
+
+How to contribute
+-----------------
+
+HeuDiConv sources are managed with Git on `GitHub <https://github.com/nipy/heudiconv/>`_.
+Please file issues and suggest changes via Pull Requests.
+
+HeuDiConv requires installation of
+`dcm2niix <https://github.com/rordenlab/dcm2niix/>`_ and optionally
+`DataLad <https://datalad.org>`_.
+
+For development you will need a non-shallow clone (so there is a
+recent released tag) of the aforementioned repository. You can then
+install all necessary development requirements using ``pip install -r
+dev-requirements.txt``. Testing is done using `pytest
+<https://docs.pytest.org/>`_. Releases are packaged using Intuit
+auto. Workflow for releases and preparation of Docker images is in
+``.github/workflows/release.yml``.
=====================================
heudiconv.egg-info/PKG-INFO
=====================================
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: heudiconv
-Version: 0.11.3
+Version: 0.11.4
Summary: Heuristic DICOM Converter
Author: HeuDiConv team and contributors
License: Apache 2.0
=====================================
heudiconv/_version.py
=====================================
@@ -1 +1 @@
-__version__ = "0.11.3"
+__version__ = "0.11.4"
=====================================
heudiconv/convert.py
=====================================
@@ -895,7 +895,7 @@ def add_taskname_to_infofile(infofiles):
for infofile in infofiles:
meta_info = load_json(infofile)
try:
- meta_info['TaskName'] = (re.search('(?<=_task-)\w+',
+ meta_info['TaskName'] = (re.search(r'(?<=_task-)\w+',
op.basename(infofile))
.group(0).split('_')[0])
except AttributeError:
=====================================
heudiconv/heuristics/reproin.py
=====================================
@@ -28,7 +28,7 @@ per each session.
Sequence names on the scanner must follow this specification to avoid manual
conversion/handling:
- [PREFIX:][WIP ]<seqtype[-label]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
+ [PREFIX:][WIP ]<datatype[-<suffix>]>[_ses-<SESID>][_task-<TASKID>][_acq-<ACQLABEL>][_run-<RUNID>][_dir-<DIR>][<more BIDS>][__<custom>]
where
[PREFIX:] - leading capital letters followed by : are stripped/ignored
@@ -42,17 +42,19 @@ where
descriptive ones for e.g. SESID (_ses-movie, _ses-localizer)
-<seqtype[-label]>
- a known BIDS sequence type which is usually a name of the folder under
- subject's directory. And (optional) label is specific per sequence type
- (e.g. typical "bold" for func, or "T1w" for "anat"), which could often
- (but not always) be deduced from DICOM. Known to BIDS modalities are:
+<datatype[-suffix]>
+ a known BIDS sequence datatype which is usually a name of the folder under
+ subject's directory. And (optional) suffix is a specific sequence type
+ (e.g., "bold" for func, or "T1w" for "anat"), which could often
+ (but not always) be deduced from DICOM. Known to ReproIn BIDS modalities
+ are:
anat - anatomical data. Might also be collected multiple times across
runs (e.g. if subject is taken out of magnet etc), so could
(optionally) have "_run" definition attached. For "standard anat"
- labels, please consult to "8.3 Anatomy imaging data" but most
- common are 'T1w', 'T2w', 'angio'
+ suffixes, please consult to "8.3 Anatomy imaging data" but most
+ common are 'T1w', 'T2w', 'angio'.
+ beh - behavioral data. known but not "treated".
func - functional (AKA task, including resting state) data.
Typically contains multiple runs, and might have multiple different
tasks different per each run
@@ -60,6 +62,13 @@ where
fmap - field maps
dwi - diffusion weighted imaging (also can as well have runs)
+ The other BIDS modalities are not known ATM and their data will not be
+ converted and will be just skipped (with a warning). Full list of datatypes
+ can be found at
+ https://github.com/bids-standard/bids-specification/blob/v1.7.0/src/schema/objects/datatypes.yaml
+ and their corresponding suffixes at
+ https://github.com/bids-standard/bids-specification/tree/v1.7.0/src/schema/rules/datatypes
+
_ses-<SESID> (optional)
a session. Having a single sequence within a study would make that study
follow "multi-session" layout. A common practice to have a _ses specifier
@@ -204,6 +213,10 @@ POPULATE_INTENDED_FOR_OPTS = {
'criterion': 'Closest'
}
+
+KNOWN_DATATYPES = {'anat', 'func', 'dwi', 'behav', 'fmap'}
+
+
def _delete_chars(from_str, deletechars):
""" Delete characters from string allowing for Python 2 / 3 difference
"""
@@ -404,9 +417,9 @@ def infotodict(seqinfo):
# 1 - PRIMARY/SECONDARY
# 3 - Image IOD specific specialization (optional)
dcm_image_iod_spec = s.image_type[2]
- image_type_seqtype = {
+ image_type_datatype = {
# Note: P and M are too generic to make a decision here, could be
- # for different seqtypes (bold, fmap, etc)
+ # for different datatypes (bold, fmap, etc)
'FMRI': 'func',
'MPR': 'anat',
'DIFFUSION': 'dwi',
@@ -415,7 +428,7 @@ def infotodict(seqinfo):
'MIP_TRA': 'anat', # angiography
}.get(dcm_image_iod_spec, None)
else:
- dcm_image_iod_spec = image_type_seqtype = None
+ dcm_image_iod_spec = image_type_datatype = None
series_info = {} # For please lintian and its friends
for sfield in series_spec_fields:
@@ -440,19 +453,19 @@ def infotodict(seqinfo):
if dcm_image_iod_spec and dcm_image_iod_spec.startswith('MIP'):
series_info['acq'] = series_info.get('acq', '') + sanitize_str(dcm_image_iod_spec)
- seqtype = series_info.pop('seqtype')
- seqtype_label = series_info.pop('seqtype_label', None)
+ datatype = series_info.pop('datatype')
+ datatype_suffix = series_info.pop('datatype_suffix', None)
- if image_type_seqtype and seqtype != image_type_seqtype:
+ if image_type_datatype and datatype != image_type_datatype:
lgr.warning(
- "Deduced seqtype to be %s from DICOM, but got %s out of %s",
- image_type_seqtype, seqtype, series_spec)
+ "Deduced datatype to be %s from DICOM, but got %s out of %s",
+ image_type_datatype, datatype, series_spec)
# if s.is_derived:
# # Let's for now stash those close to original images
# # TODO: we might want a separate tree for all of this!?
# # so more of a parameter to the create_key
- # #seqtype += '/derivative'
+ # #datatype += '/derivative'
# # just keep it lower case and without special characters
# # XXXX what for???
# #seq.append(s.series_description.lower())
@@ -462,26 +475,26 @@ def infotodict(seqinfo):
prefix = ''
#
- # Figure out the seqtype_label (BIDS _suffix)
+ # Figure out the datatype_suffix (BIDS _suffix)
#
# If none was provided -- let's deduce it from the information we find:
# analyze s.protocol_name (series_id is based on it) for full name mapping etc
- if not seqtype_label:
- if seqtype == 'func':
+ if not datatype_suffix:
+ if datatype == 'func':
if '_pace_' in series_spec:
- seqtype_label = 'pace' # or should it be part of seq-
+ datatype_suffix = 'pace' # or should it be part of seq-
elif 'P' in s.image_type:
- seqtype_label = 'phase'
+ datatype_suffix = 'phase'
elif 'M' in s.image_type:
- seqtype_label = 'bold'
+ datatype_suffix = 'bold'
else:
# assume bold by default
- seqtype_label = 'bold'
- elif seqtype == 'fmap':
+ datatype_suffix = 'bold'
+ elif datatype == 'fmap':
# TODO: support phase1 phase2 like in "Case 2: Two phase images ..."
if not dcm_image_iod_spec:
raise ValueError("Do not know image data type yet to make decision")
- seqtype_label = {
+ datatype_suffix = {
# might want explicit {file_index} ?
# _epi for pepolar fieldmaps, see
# https://bids-specification.readthedocs.io/en/stable/04-modality-specific-files/01-magnetic-resonance-imaging-data.html#case-4-multiple-phase-encoded-directions-pepolar
@@ -489,19 +502,19 @@ def infotodict(seqinfo):
'P': 'phasediff',
'DIFFUSION': 'epi', # according to KODI those DWI are the EPIs we need
}[dcm_image_iod_spec]
- elif seqtype == 'dwi':
+ elif datatype == 'dwi':
# label for dwi as well
- seqtype_label = 'dwi'
+ datatype_suffix = 'dwi'
#
- # Even if seqtype_label was provided, for some data we might need to override,
+ # Even if datatype_suffix was provided, for some data we might need to override,
# since they are complementary files produced along-side with original
# ones.
#
if s.series_description.endswith('_SBRef'):
- seqtype_label = 'sbref'
+ datatype_suffix = 'sbref'
- if not seqtype_label:
+ if not datatype_suffix:
# Might be provided by the bids ending within series_spec, we would
# just want to check if that the last element is not _key-value pair
bids_ending = series_info.get('bids', None)
@@ -559,7 +572,7 @@ def infotodict(seqinfo):
# assert s.is_derived, "Motion corrected images must be 'derived'"
if s.is_motion_corrected and 'rec-' in series_info.get('bids', ''):
- raise NotImplementedError("want to add _acq-moco but there is _acq- already")
+ raise NotImplementedError("want to add _rec-moco but there is _rec- already")
def from_series_info(name):
"""A little helper to provide _name-value if series_info knows it
@@ -571,7 +584,12 @@ def infotodict(seqinfo):
else:
return None
- suffix_parts = [
+ # TODO: get order from schema, do not hardcode. ATM could be checked at
+ # https://bids-specification.readthedocs.io/en/stable/99-appendices/04-entity-table.html
+ # https://github.com/bids-standard/bids-specification/blob/HEAD/src/schema/rules/entities.yaml
+ # ATM we at large rely on possible (re)ordering according to schema to be done
+ # by heudiconv, not reproin here.
+ filename_suffix_parts = [
from_series_info('task'),
from_series_info('acq'),
# But we want to add an indicator in case it was motion corrected
@@ -580,10 +598,10 @@ def infotodict(seqinfo):
from_series_info('dir'),
series_info.get('bids'),
run_label,
- seqtype_label,
+ datatype_suffix,
]
# filter those which are None, and join with _
- suffix = '_'.join(filter(bool, suffix_parts))
+ suffix = '_'.join(filter(bool, filename_suffix_parts))
# # .series_description in case of
# sdesc = s.study_description
@@ -602,12 +620,12 @@ def infotodict(seqinfo):
# For scouts -- we want only dicoms
# https://github.com/nipy/heudiconv/issues/145
if "_Scout" in s.series_description or \
- (seqtype == 'anat' and seqtype_label and seqtype_label.startswith('scout')):
+ (datatype == 'anat' and datatype_suffix and datatype_suffix.startswith('scout')):
outtype = ('dicom',)
else:
outtype = ('nii.gz', 'dicom')
- template = create_key(seqtype, suffix, prefix=prefix, outtype=outtype)
+ template = create_key(datatype, suffix, prefix=prefix, outtype=outtype)
# we wanted ordered dict for consistent demarcation of dups
if template not in info:
info[template] = []
@@ -849,17 +867,17 @@ def parse_series_spec(series_spec):
return s, None
# Let's analyze first element which should tell us sequence type
- seqtype, seqtype_label = split2(split[0])
- if seqtype not in {'anat', 'func', 'dwi', 'behav', 'fmap'}:
+ datatype, datatype_suffix = split2(split[0])
+ if datatype not in KNOWN_DATATYPES:
# It is not something we don't consume
if bids:
- lgr.warning("It was instructed to be BIDS sequence but unknown "
- "type %s found", seqtype)
+ lgr.warning("It was instructed to be BIDS datatype but unknown "
+ "%s found. Known are: %s", datatype, ', '.join(KNOWN_DATATYPES))
return {}
- regd = dict(seqtype=seqtype)
- if seqtype_label:
- regd['seqtype_label'] = seqtype_label
+ regd = dict(datatype=datatype)
+ if datatype_suffix:
+ regd['datatype_suffix'] = datatype_suffix
# now go through each to see if one which we care
bids_leftovers = []
for s in split[1:]:
@@ -886,12 +904,12 @@ def parse_series_spec(series_spec):
# TODO: might want to check for all known "standard" BIDS suffixes here
# among bids_leftovers, thus serve some kind of BIDS validator
- # if not regd.get('seqtype_label', None):
- # # might need to assign a default label for each seqtype if was not
+ # if not regd.get('datatype_suffix', None):
+ # # might need to assign a default label for each datatype if was not
# # given
- # regd['seqtype_label'] = {
+ # regd['datatype_suffix'] = {
# 'func': 'bold'
- # }.get(regd['seqtype'], None)
+ # }.get(regd['datatype'], None)
return regd
@@ -900,7 +918,7 @@ def fixup_subjectid(subjectid):
"""Just in case someone managed to miss a zero or added an extra one"""
# make it lowercase
subjectid = subjectid.lower()
- reg = re.match("sid0*(\d+)$", subjectid)
+ reg = re.match(r"sid0*(\d+)$", subjectid)
if not reg:
# some completely other pattern
# just filter out possible _- in it
=====================================
heudiconv/heuristics/test_reproin.py
=====================================
@@ -114,7 +114,7 @@ def test_fix_dbic_protocol():
seqinfos = [seq1, seq2]
protocols2fix = {
md5sum('mystudy'):
- [('scout_run\+', 'THESCOUT-runX'),
+ [(r'scout_run\+', 'THESCOUT-runX'),
('run-life[0-9]', 'run+_task-life')],
re.compile('^my.*'):
[('THESCOUT-runX', 'THESCOUT')],
@@ -169,7 +169,7 @@ def test_parse_series_spec():
assert pdpn("bids_func-bold") == \
pdpn("func-bold") == \
- {'seqtype': 'func', 'seqtype_label': 'bold'}
+ {'datatype': 'func', 'datatype_suffix': 'bold'}
# pdpn("bids_func_ses+_task-boo_run+") == \
# order and PREFIX: should not matter, as well as trailing spaces
@@ -179,8 +179,8 @@ def test_parse_series_spec():
pdpn("WIP func_ses+_task-boo_run+") == \
pdpn("bids_func_ses+_run+_task-boo") == \
{
- 'seqtype': 'func',
- # 'seqtype_label': 'bold',
+ 'datatype': 'func',
+ # 'datatype_suffix': 'bold',
'session': '+',
'run': '+',
'task': 'boo',
@@ -191,7 +191,7 @@ def test_parse_series_spec():
pdpn("bids_func-pace_ses-1_run-2_task-boo_acq-bu_bids-please__therest") == \
pdpn("func-pace_ses-1_task-boo_acq-bu_bids-please_run-2") == \
{
- 'seqtype': 'func', 'seqtype_label': 'pace',
+ 'datatype': 'func', 'datatype_suffix': 'pace',
'session': '1',
'run': '2',
'task': 'boo',
@@ -201,24 +201,24 @@ def test_parse_series_spec():
assert pdpn("bids_anat-scout_ses+") == \
{
- 'seqtype': 'anat',
- 'seqtype_label': 'scout',
+ 'datatype': 'anat',
+ 'datatype_suffix': 'scout',
'session': '+',
}
assert pdpn("anat_T1w_acq-MPRAGE_run+") == \
{
- 'seqtype': 'anat',
+ 'datatype': 'anat',
'run': '+',
'acq': 'MPRAGE',
- 'seqtype_label': 'T1w'
+ 'datatype_suffix': 'T1w'
}
# Check for currently used {date}, which should also should get adjusted
# from (date) since Philips does not allow for {}
assert pdpn("func_ses-{date}") == \
pdpn("func_ses-(date)") == \
- {'seqtype': 'func', 'session': '{date}'}
+ {'datatype': 'func', 'session': '{date}'}
assert pdpn("fmap_dir-AP_ses-01") == \
- {'seqtype': 'fmap', 'session': '01', 'dir': 'AP'}
\ No newline at end of file
+ {'datatype': 'fmap', 'session': '01', 'dir': 'AP'}
=====================================
heudiconv/parser.py
=====================================
@@ -22,7 +22,7 @@ tempdirs = TempDirs()
# Ensure they are cleaned up upon exit
atexit.register(tempdirs.cleanup)
-_VCS_REGEX = '%s\.(?:git|gitattributes|svn|bzr|hg)(?:%s|$)' % (op.sep, op.sep)
+_VCS_REGEX = r'%s\.(?:git|gitattributes|svn|bzr|hg)(?:%s|$)' % (op.sep, op.sep)
@docstring_parameter(_VCS_REGEX)
@@ -161,7 +161,7 @@ def get_study_sessions(dicom_dir_template, files_opt, heuristic, outdir,
for f in files_opt:
if op.isdir(f):
files += sorted(find_files(
- '.*', topdir=f, exclude_vcs=True, exclude="/\.datalad/"))
+ '.*', topdir=f, exclude_vcs=True, exclude=r"/\.datalad/"))
else:
files.append(f)
=====================================
heudiconv/tests/test_heuristics.py
=====================================
@@ -90,7 +90,10 @@ def test_reproin_largely_smoke(tmpdir, heuristic, invocation):
# but there should be nothing new
assert not ds.repo.dirty
- assert head == ds.repo.get_hexsha()
+ # TODO: remove whenever https://github.com/datalad/datalad/issues/6843
+ # is fixed/released
+ buggy_datalad = (ds.pathobj / ".gitmodules").read_text().splitlines().count('[submodule "Halchenko"]') > 1
+ assert head == ds.repo.get_hexsha() or buggy_datalad
# unless we pass 'overwrite' flag
runner(args + ['--overwrite'])
@@ -98,7 +101,7 @@ def test_reproin_largely_smoke(tmpdir, heuristic, invocation):
# and at the same commit
assert ds.is_installed()
assert not ds.repo.dirty
- assert head == ds.repo.get_hexsha()
+ assert head == ds.repo.get_hexsha() or buggy_datalad
@pytest.mark.parametrize(
@@ -121,7 +124,7 @@ def test_scans_keys_reproin(tmpdir, invocation):
if i != 0:
assert(os.path.exists(pjoin(dirname(scans_keys[0]), row[0])))
assert(re.match(
- '^[\d]{4}-[\d]{2}-[\d]{2}T[\d]{2}:[\d]{2}:[\d]{2}.[\d]{6}$',
+ r'^[\d]{4}-[\d]{2}-[\d]{2}T[\d]{2}:[\d]{2}:[\d]{2}.[\d]{6}$',
row[1]))
=====================================
heudiconv/utils.py
=====================================
@@ -259,20 +259,20 @@ def json_dumps_pretty(j, indent=2, sort_keys=True):
'[\n ]+("?[-+.0-9e]+"?,?) *\n(?= *"?[-+.0-9e]+"?)', r' \1',
js, flags=re.MULTILINE)
# uniform no spaces before ]
- js_ = re.sub(" *\]", "]", js_)
+ js_ = re.sub(r" *\]", "]", js_)
# uniform spacing before numbers
# But that thing could screw up dates within strings which would have 2 spaces
# in a date like Mar 3 2017, so we do negative lookahead to avoid changing
# in those cases
#import pdb; pdb.set_trace()
js_ = re.sub(
- '(?<!\w{3})' # negative lookbehind for the month
- ' *("?[-+.0-9e]+"?)'
- '(?! [123]\d{3})' # negative lookahead for a year
- '(?P<space> ?)[ \n]*',
+ r'(?<!\w{3})' # negative lookbehind for the month
+ r' *("?[-+.0-9e]+"?)'
+ r'(?! [123]\d{3})' # negative lookahead for a year
+ r'(?P<space> ?)[ \n]*',
r' \1\g<space>', js_)
# no spaces after [
- js_ = re.sub('\[ ', '[', js_)
+ js_ = re.sub(r'\[ ', '[', js_)
# the load from the original dump and reload from tuned up
# version should result in identical values since no value
# must be changed, just formatting.
View it on GitLab: https://salsa.debian.org/med-team/heudiconv/-/commit/0717afb292787bf581d55d57b55199a4642ddfb9
--
View it on GitLab: https://salsa.debian.org/med-team/heudiconv/-/commit/0717afb292787bf581d55d57b55199a4642ddfb9
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20220929/382b50d2/attachment-0001.htm>
More information about the debian-med-commit
mailing list