[med-svn] [fast5] 02/12: Imported Upstream version 0.5.6
Afif Elghraoui
afif at moszumanska.debian.org
Fri Aug 12 06:13:41 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif pushed a commit to branch master
in repository fast5.
commit 7d9396b7acf4b1596477480f73d99f39a819c16f
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Thu Aug 11 18:55:40 2016 -0700
Imported Upstream version 0.5.6
---
.VERSION.in | 1 +
.gitignore | 2 +
.travis.Dockerfile.in | 37 +
.travis.yml | 19 +
.version_files | 2 +
LICENSE | 22 +
README.md | 11 -
README.org | 48 ++
VERSION | 1 +
python/.gitignore | 5 +
python/Makefile | 35 +
python/fast5/.version.py.in | 1 +
python/fast5/__init__.py | 10 +
python/fast5/source/fast5.cpp | 182 ++++
python/fast5/version.py | 1 +
python/setup.py | 94 ++
src/.gitignore | 4 +
src/Makefile | 35 +-
src/a.cpp | 86 --
src/f5-mod.cpp | 92 ++
src/f5dump-full.cpp | 273 ++++++
src/f5dump.cpp | 197 +++++
src/fast5.hpp | 928 ++++++++++++++++----
src/hdf5-mod.cpp | 308 +++++++
src/hdf5_tools.hpp | 1894 ++++++++++++++++++++++++++++++++---------
src/tmp.cpp | 207 +++++
26 files changed, 3847 insertions(+), 648 deletions(-)
diff --git a/.VERSION.in b/.VERSION.in
new file mode 100644
index 0000000..4640e9f
--- /dev/null
+++ b/.VERSION.in
@@ -0,0 +1 @@
+${VERSION}
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..113a175
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/build*
+/local*
diff --git a/.travis.Dockerfile.in b/.travis.Dockerfile.in
new file mode 100644
index 0000000..a63b01d
--- /dev/null
+++ b/.travis.Dockerfile.in
@@ -0,0 +1,37 @@
+FROM debian:unstable
+MAINTAINER Matei David <matei.david.at.oicr.on.ca>
+ARG DEBIAN_FRONTEND=noninteractive
+
+# use host timezone
+ENV TZ=${TZ}
+RUN ln -snf /usr/share/zoneinfo/${TZ} /etc/localtime && echo ${TZ} > /etc/timezone
+
+# install prerequisites
+RUN apt-get update && \
+ apt-get install -y \
+ build-essential \
+ libhdf5-dev \
+ libboost-dev \
+ libboost-python-dev \
+ python2.7-minimal \
+ python-setuptools \
+ python-virtualenv
+
+# expose prerequisites settings
+ENV HDF5_INCLUDE_DIR=/usr/include/hdf5/serial
+ENV HDF5_LIB_DIR=/usr/lib/x86_64-linux-gnu/hdf5/serial
+ENV BOOST_INCLUDE_DIR=/usr/include
+ENV BOOST_LIB_DIR=/usr/lib/x86_64-linux-gnu
+
+# if necessary, specify compiler
+#RUN apt-get install -y g++-4.9 g++-5 g++-6
+#ENV CC=gcc-4.9
+#ENV CXX=g++-4.9
+
+# use host id
+RUN groupadd --gid ${GROUP_ID} ${GROUP_NAME}
+RUN useradd --create-home --uid ${USER_ID} --gid ${GROUP_ID} ${USER_NAME}
+USER ${USER_NAME}
+
+VOLUME /data
+WORKDIR /data
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..58c3113
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,19 @@
+# travis.yml for github.com/mateidavid/fast5
+
+sudo: required
+
+services:
+ - docker
+
+before_install:
+ - sudo apt-get update -y
+ - sudo apt-get install -y -o Dpkg::Options::="--force-confnew" docker-engine
+ - TZ=$(cat /etc/timezone) USER_ID=$(id -u) USER_NAME=$(id -un) GROUP_ID=$(id -g) GROUP_NAME=$(id -gn) envsubst <.travis.Dockerfile.in | docker build -t fast5 -
+
+install:
+ - docker run --rm -v $PWD:/data fast5 make -C src -e
+ - docker run --rm -v $PWD:/data fast5 bash -c 'virtualenv build-venv && source build-venv/bin/activate && make -C python -e develop'
+
+script:
+ - docker run --rm -v $PWD:/data fast5 bash -c 'src/hdf5-mod -f file.000.fast5 && src/f5-mod file.000.fast5 && src/f5dump file.000.fast5 && src/f5dump-full file.000.fast5'
+ - docker run --rm -v $PWD:/data fast5 bash -c 'source build-venv/bin/activate && python -c "import fast5; f = fast5.File(\"file.000.fast5\"); print(f.file_version()); print(f.have_eventdetection_events())"'
diff --git a/.version_files b/.version_files
new file mode 100644
index 0000000..c822d11
--- /dev/null
+++ b/.version_files
@@ -0,0 +1,2 @@
+VERSION
+python/fast5/version.py
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..ecd41e2
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research
+Copyright (c) 2015 Jared Simpson, Ontario Institute for Cancer Research
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
deleted file mode 100644
index e7bdea4..0000000
--- a/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# FAST5
-
-A lightweight C++11 library to read raw signal data from Oxford Nanopore's FAST5 files.
-
-## Installation instructions
-
-This library is provided as header files only, so you only need to copy ```fast5.hpp``` and ```hdf5_tools.hpp``` into your project.
-
-## Usage instructions
-
-See ```a.cpp``` for an example.
diff --git a/README.org b/README.org
new file mode 100644
index 0000000..a62c402
--- /dev/null
+++ b/README.org
@@ -0,0 +1,48 @@
+# -*- mode:org; mode:visual-line; coding:utf-8; -*-
+
+** Fast5 Library
+
+[[http://travis-ci.org/mateidavid/fast5][http://travis-ci.org/mateidavid/fast5.svg?branch=master]]
+
+A lightweight C++11 library to read raw signal data from Oxford Nanopore's Fast5 files.
+
+*** C++
+
+**** Installation
+
+This is a header-only library. You only need to copy [[file:src/fast5.hpp][src/fast5.hpp]] and [[file:src/hdf5_tools.hpp][src/hdf5_tools.hpp]] into your C++ project.
+
+**** Usage
+
+See [[file:src/f5dump.cpp][src/f5dump.cpp]] for an example.
+
+*** Python Wrapper
+
+An optional python wrapper for this library is available through Boost.Python. The wrapper currently implements only read-only access.
+
+**** Installation
+
+#+BEGIN_EXAMPLE
+cd python
+HDF5_DIR=/usr/local BOOST_DIR=/usr/local make develop-user
+#+END_EXAMPLE
+
+Notes:
+
+- HDF5 and Boost.Python must be available, and their locations can be passed on to the Python setup process using the environment variables =HDF5_DIR= and =BOOST_DIR=. Alternatively, the respective include directories, library directories, and library names may be specified explicitly with: =HDF5_INCLUDE_DIR=, =HDF5_LIB_DIR=, =HDF5_LIB=, =BOOST_INCLUDE_DIR=, =BOOST_LIB_DIR=, =BOOST_PYTHON_LIB=. For details, see [[file:python/setup.py][python/setup.py]] and [[file:.travis.yml][.travis.yml]].
+
+- To install =fast5= as a package in a virtualenv, use the target =develop=. To install as a user package, use the target =develop-user=. For details, see [[file:python/Makefile][python/Makefile]].
+
+**** Usage
+
+#+BEGIN_EXAMPLE
+import fast5
+f = fast5.File("file.000.fast5")
+print(f.file_version())
+print(f.have_eventdetection_events())
+#+END_EXAMPLE
+
+*** License
+
+[[file:LICENSE][MIT License]].
+
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..b49b253
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.5.6
diff --git a/python/.gitignore b/python/.gitignore
new file mode 100644
index 0000000..44dafa0
--- /dev/null
+++ b/python/.gitignore
@@ -0,0 +1,5 @@
+build/
+dist/
+*.egg-info/
+*.pyc
+*.so
diff --git a/python/Makefile b/python/Makefile
new file mode 100755
index 0000000..8c4047e
--- /dev/null
+++ b/python/Makefile
@@ -0,0 +1,35 @@
+.SUFFIXES:
+MAKEFLAGS += -r
+SHELL := /bin/bash
+.DELETE_ON_ERROR:
+.PHONY: all help clean check_virtualenv develop develop-user develop-uninstall develop-uninstall-user
+
+PYTHON = $(shell which python)
+
+all: help
+
+print-%:
+ @echo '$*=$($*)'
+
+help: ## This help.
+ @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+
+clean: ## Remove build products
+ ${PYTHON} setup.py clean
+ rm -rf fast5.egg-info build dist
+ find fast5/ \( -name '*.pyc' -o -name '*.so' \) -delete
+
+check_virtualenv:
+ @[ "$$VIRTUAL_ENV" ] || { echo "not in a virtualenv" >&2; exit 1; }
+
+develop: check_virtualenv clean ## Install in develop mode to current virtualenv
+ ${PYTHON} setup.py develop
+
+develop-user: clean ## Install in develop mode to current user
+ ${PYTHON} setup.py develop --user
+
+develop-uninstall: check_virtualenv clean ## Uninstall from current virtualenv
+ ${PYTHON} setup.py develop --uninstall
+
+develop-uninstall-user: clean ## Uninstall from current user
+ ${PYTHON} setup.py develop --uninstall --user
diff --git a/python/fast5/.version.py.in b/python/fast5/.version.py.in
new file mode 100644
index 0000000..d8ed4d2
--- /dev/null
+++ b/python/fast5/.version.py.in
@@ -0,0 +1 @@
+__version__ = '${VERSION}'
diff --git a/python/fast5/__init__.py b/python/fast5/__init__.py
new file mode 100755
index 0000000..14e4b5d
--- /dev/null
+++ b/python/fast5/__init__.py
@@ -0,0 +1,10 @@
+"""
+fast5.__init__.py
+(c) 2016: Matei David, Ontario Institute for Cancer Research
+MIT License
+"""
+
+from .version import __version__
+from fast5 import *
+
+__version_info__ = tuple([int(num) for num in __version__.split('.')])
diff --git a/python/fast5/source/fast5.cpp b/python/fast5/source/fast5.cpp
new file mode 100644
index 0000000..2e51dd8
--- /dev/null
+++ b/python/fast5/source/fast5.cpp
@@ -0,0 +1,182 @@
+#include <boost/python.hpp>
+#include <boost/python/suite/indexing/map_indexing_suite.hpp>
+#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
+#include <boost/python/overloads.hpp>
+
+#include "fast5.hpp"
+
+namespace bp = boost::python;
+
+// member functions with default arguments
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_raw_samples_params_overloads, get_raw_samples_params, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_raw_samples_overloads, get_raw_samples, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_read_name_list_overloads, get_eventdetection_read_name_list, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_eventdetection_events_overloads, have_eventdetection_events, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_params_overloads, get_eventdetection_params, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_event_params_overloads, get_eventdetection_event_params, 0, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_events_overloads, get_eventdetection_events, 0, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_fastq_overlords, have_basecall_fastq, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_fastq_overlords, get_basecall_fastq, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_seq_overlords, have_basecall_seq, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_seq_overlords, get_basecall_seq, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_model_overlords, have_basecall_model, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_file_overlords, get_basecall_model_file, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_params_overlords, get_basecall_model_params, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_overlords, get_basecall_model, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_events_overlords, have_basecall_events, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_events_overlords, get_basecall_events, 1, 2)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_event_alignment_overlords, have_basecall_event_alignment, 0, 1)
+BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_event_alignment_overlords, get_basecall_event_alignment, 0, 1)
+
+BOOST_PYTHON_MODULE(fast5)
+{
+ bp::class_<fast5::Channel_Id_Parameters>("Channel_Id_Parameters")
+ .def_readwrite("channel_number", &fast5::Channel_Id_Parameters::channel_number)
+ .def_readwrite("digitisation", &fast5::Channel_Id_Parameters::digitisation)
+ .def_readwrite("offset", &fast5::Channel_Id_Parameters::offset)
+ .def_readwrite("range", &fast5::Channel_Id_Parameters::range)
+ .def_readwrite("sampling_rate", &fast5::Channel_Id_Parameters::sampling_rate)
+ ;
+ bp::class_<fast5::Raw_Samples_Parameters>("Raw_Samples_Parameters")
+ .def_readwrite("read_id", &fast5::Raw_Samples_Parameters::read_id)
+ .def_readwrite("read_number", &fast5::Raw_Samples_Parameters::read_number)
+ .def_readwrite("start_mux", &fast5::Raw_Samples_Parameters::start_mux)
+ .def_readwrite("start_time", &fast5::Raw_Samples_Parameters::start_time)
+ .def_readwrite("duration", &fast5::Raw_Samples_Parameters::duration)
+ ;;
+ bp::class_<fast5::EventDetection_Event_Parameters>("EventDetection_Event_Parameters")
+ .def_readwrite("read_id", &fast5::EventDetection_Event_Parameters::read_id)
+ .def_readwrite("read_number", &fast5::EventDetection_Event_Parameters::read_number)
+ .def_readwrite("scaling_used", &fast5::EventDetection_Event_Parameters::scaling_used)
+ .def_readwrite("start_mux", &fast5::EventDetection_Event_Parameters::start_mux)
+ .def_readwrite("start_time", &fast5::EventDetection_Event_Parameters::start_time)
+ .def_readwrite("duration", &fast5::EventDetection_Event_Parameters::duration)
+ .def_readwrite("median_before", &fast5::EventDetection_Event_Parameters::median_before)
+ .def_readwrite("abasic_found", &fast5::EventDetection_Event_Parameters::abasic_found)
+ ;
+ bp::class_<fast5::EventDetection_Event_Entry>("EventDetection_Event_Entry")
+ .def_readwrite("mean", &fast5::EventDetection_Event_Entry::mean)
+ .def_readwrite("stdv", &fast5::EventDetection_Event_Entry::stdv)
+ .def_readwrite("start", &fast5::EventDetection_Event_Entry::start)
+ .def_readwrite("length", &fast5::EventDetection_Event_Entry::length)
+ ;
+ bp::class_<fast5::Model_Entry>("Model_Entry")
+ .def_readwrite("variant", &fast5::Model_Entry::variant)
+ .def_readwrite("level_mean", &fast5::Model_Entry::level_mean)
+ .def_readwrite("level_stdv", &fast5::Model_Entry::level_stdv)
+ .def_readwrite("sd_mean", &fast5::Model_Entry::sd_mean)
+ .def_readwrite("sd_stdv", &fast5::Model_Entry::sd_stdv)
+ .def_readwrite("weight", &fast5::Model_Entry::weight)
+ .def_readwrite("kmer", &fast5::Model_Entry::kmer)
+ ;
+ bp::class_<fast5::Model_Parameters>("Model_Parameters")
+ .def_readwrite("scale", &fast5::Model_Parameters::scale)
+ .def_readwrite("shift", &fast5::Model_Parameters::shift)
+ .def_readwrite("drift", &fast5::Model_Parameters::drift)
+ .def_readwrite("var", &fast5::Model_Parameters::var)
+ .def_readwrite("scale_sd", &fast5::Model_Parameters::scale_sd)
+ .def_readwrite("var_sd", &fast5::Model_Parameters::var_sd)
+ ;
+ bp::class_<fast5::Event_Entry>("Event_Entry")
+ .def_readwrite("mean", &fast5::Event_Entry::mean)
+ .def_readwrite("stdv", &fast5::Event_Entry::stdv)
+ .def_readwrite("start", &fast5::Event_Entry::start)
+ .def_readwrite("length", &fast5::Event_Entry::length)
+ .def_readwrite("p_model_state", &fast5::Event_Entry::p_model_state)
+ .def_readwrite("p_mp_state", &fast5::Event_Entry::p_mp_state)
+ .def_readwrite("p_A", &fast5::Event_Entry::p_A)
+ .def_readwrite("p_C", &fast5::Event_Entry::p_C)
+ .def_readwrite("p_G", &fast5::Event_Entry::p_G)
+ .def_readwrite("p_T", &fast5::Event_Entry::p_T)
+ .def_readwrite("move", &fast5::Event_Entry::move)
+ .def_readwrite("model_state", &fast5::Event_Entry::model_state)
+ .def_readwrite("mp_state", &fast5::Event_Entry::mp_state)
+ ;;
+ bp::class_<fast5::Event_Alignment_Entry>("Event_Alignment_Entry")
+ .def_readwrite("template_index", &fast5::Event_Alignment_Entry::template_index)
+ .def_readwrite("complement_index", &fast5::Event_Alignment_Entry::complement_index)
+ .def("get_kmer", &fast5::Event_Alignment_Entry::get_kmer)
+ ;;
+
+ bp::class_<std::map<std::string, std::string>>("Map_Str_Str")
+ .def(bp::map_indexing_suite<std::map<std::string, std::string>>())
+ ;
+ bp::class_<std::vector<std::string>>("Vec_Str")
+ .def(bp::vector_indexing_suite<std::vector<std::string>>())
+ ;
+ bp::class_<std::vector<fast5::Raw_Samples_Entry>>("Vec_Raw_Samples_Entry")
+ .def(bp::vector_indexing_suite<std::vector<fast5::Raw_Samples_Entry>>())
+ ;
+ bp::class_<std::vector<fast5::EventDetection_Event_Entry>>("Vec_EventDetection_Event_Entry")
+ .def(bp::vector_indexing_suite<std::vector<fast5::EventDetection_Event_Entry>>())
+ ;
+ bp::class_<std::vector<fast5::Model_Entry>>("Vec_Model_Entry")
+ .def(bp::vector_indexing_suite<std::vector<fast5::Model_Entry>>())
+ ;
+ bp::class_<std::vector<fast5::Event_Entry>>("Vec_Event_Entry")
+ .def(bp::vector_indexing_suite<std::vector<fast5::Event_Entry>>())
+ ;
+ bp::class_<std::vector<fast5::Event_Alignment_Entry>>("Vec_Event_Alignment_Entry")
+ .def(bp::vector_indexing_suite<std::vector<fast5::Event_Alignment_Entry>>())
+ ;
+
+ bp::class_<fast5::File, boost::noncopyable>("File")
+ .def(bp::init<std::string, bp::optional<bool>>())
+ .def("is_open", &fast5::File::is_open)
+ .def("is_rw", &fast5::File::is_rw)
+ .def("file_name", &fast5::File::file_name, bp::return_value_policy<bp::copy_const_reference>())
+ .def("open", &fast5::File::open)
+ .def("create", &fast5::File::create)
+ .def("close", &fast5::File::close)
+ .def("is_valid_file", &hdf5_tools::File::is_valid_file).staticmethod("is_valid_file")
+ .def("get_object_count", &hdf5_tools::File::get_object_count).staticmethod("get_object_count")
+ //
+ .def("file_version", &fast5::File::file_version)
+ //
+ .def("have_channel_id_params", &fast5::File::have_channel_id_params)
+ .def("get_channel_id_params", &fast5::File::get_channel_id_params)
+ //
+ .def("have_sampling_rate", &fast5::File::have_sampling_rate)
+ .def("get_sampling_rate", &fast5::File::get_sampling_rate)
+ //
+ .def("have_tracking_id_params", &fast5::File::have_tracking_id_params)
+ .def("get_tracking_id_params", &fast5::File::get_tracking_id_params)
+ //
+ .def("have_sequences_params", &fast5::File::have_sequences_params)
+ .def("get_sequences_params", &fast5::File::get_sequences_params)
+ //
+ .def("get_raw_samples_read_name_list", &fast5::File::get_raw_samples_read_name_list, bp::return_value_policy<bp::copy_const_reference>())
+ .def("have_raw_samples", &fast5::File::have_raw_samples)
+ .def("get_raw_samples_params", &fast5::File::get_raw_samples_params, get_raw_samples_params_overloads())
+ .def("get_raw_samples", &fast5::File::get_raw_samples, get_raw_samples_overloads())
+ //
+ .def("get_eventdetection_group_list", &fast5::File::get_eventdetection_group_list, bp::return_value_policy<bp::copy_const_reference>())
+ .def("have_eventdetection_groups", &fast5::File::have_eventdetection_groups)
+ .def("get_eventdetection_read_name_list", &fast5::File::get_eventdetection_read_name_list, get_eventdetection_read_name_list_overloads())
+ .def("have_eventdetection_events", &fast5::File::have_eventdetection_events, have_eventdetection_events_overloads())
+ .def("get_eventdetection_params", &fast5::File::get_eventdetection_params, get_eventdetection_params_overloads())
+ .def("get_eventdetection_event_params", &fast5::File::get_eventdetection_event_params, get_eventdetection_event_params_overloads())
+ .def("get_eventdetection_events", &fast5::File::get_eventdetection_events, get_eventdetection_events_overloads())
+ //
+ .def("get_basecall_group_list", &fast5::File::get_basecall_group_list, bp::return_value_policy<bp::copy_const_reference>())
+ .def("have_basecall_groups", &fast5::File::have_basecall_groups)
+ .def("get_basecall_strand_group_list", &fast5::File::get_basecall_strand_group_list, bp::return_value_policy<bp::copy_const_reference>())
+ .def("have_basecall_strand_groups", &fast5::File::have_basecall_strand_groups)
+ .def("have_basecall_log", &fast5::File::have_basecall_log)
+ .def("get_basecall_log", &fast5::File::get_basecall_log)
+ .def("have_basecall_fastq", &fast5::File::have_basecall_fastq, have_basecall_fastq_overlords())
+ .def("get_basecall_fastq", &fast5::File::get_basecall_fastq, get_basecall_fastq_overlords())
+ .def("add_basecall_fastq", &fast5::File::add_basecall_fastq)
+ .def("have_basecall_seq", &fast5::File::have_basecall_seq, have_basecall_seq_overlords())
+ .def("get_basecall_seq", &fast5::File::get_basecall_seq, get_basecall_seq_overlords())
+ .def("add_basecall_seq", &fast5::File::add_basecall_seq)
+ .def("have_basecall_model", &fast5::File::have_basecall_model, have_basecall_model_overlords())
+ .def("get_basecall_model_file", &fast5::File::get_basecall_model_file, get_basecall_model_file_overlords())
+ .def("get_basecall_model_params", &fast5::File::get_basecall_model_params, get_basecall_model_params_overlords())
+ .def("get_basecall_model", &fast5::File::get_basecall_model, get_basecall_model_overlords())
+ .def("have_basecall_events", &fast5::File::have_basecall_events, have_basecall_events_overlords())
+ .def("get_basecall_events", &fast5::File::get_basecall_events, get_basecall_events_overlords())
+ .def("have_basecall_event_alignment", &fast5::File::have_basecall_event_alignment, have_basecall_event_alignment_overlords())
+ .def("get_basecall_event_alignment", &fast5::File::get_basecall_event_alignment, get_basecall_event_alignment_overlords())
+ ;
+}
diff --git a/python/fast5/version.py b/python/fast5/version.py
new file mode 100644
index 0000000..8701e4d
--- /dev/null
+++ b/python/fast5/version.py
@@ -0,0 +1 @@
+__version__ = '0.5.6'
diff --git a/python/setup.py b/python/setup.py
new file mode 100755
index 0000000..77dbc70
--- /dev/null
+++ b/python/setup.py
@@ -0,0 +1,94 @@
+"""
+fast5.setup.py
+(c) 2016: Matei David, Ontario Institute for Cancer Research
+MIT License
+"""
+
+import os
+import re
+import pkg_resources
+import sys
+from setuptools import setup, Extension
+
+exec(open('fast5/version.py').read())
+
+# check HDF5 include and lib dirs
+hdf5_dir = os.environ.get('HDF5_DIR', '/usr')
+hdf5_include_dir = os.environ.get('HDF5_INCLUDE_DIR', os.path.join(hdf5_dir, 'include'))
+hdf5_lib_dir = os.environ.get('HDF5_LIB_DIR', os.path.join(hdf5_dir, 'lib'))
+hdf5_lib = os.environ.get('HDF_LIB', 'hdf5')
+if not os.path.isfile(os.path.join(hdf5_include_dir, 'H5pubconf.h')):
+ sys.exit(hdf5_include_dir + ': could not find HDF5 header files; use HDF5_DIR or HDF5_INCLUDE_DIR')
+if (not os.path.isfile(os.path.join(hdf5_lib_dir, 'lib' + hdf5_lib + '.so'))
+ and not os.path.isfile(os.path.join(hdf5_lib_dir, 'lib' + hdf5_lib + '.a'))):
+ sys.exit(hdf5_lib_dir + ': could not find HDF5 library file; use HDF5_DIR or HDF5_LIB_DIR/HDF5_LIB')
+
+# check Boost.Python include and lib dirs
+boost_dir = os.environ.get('BOOST_DIR', '/usr')
+boost_include_dir = os.environ.get('BOOST_INCLUDE_DIR', os.path.join(boost_dir, 'include'))
+boost_lib_dir = os.environ.get('BOOST_LIB_DIR', os.path.join(boost_dir, 'lib'))
+boost_python_lib = os.environ.get('BOOST_PYTHON_LIB', 'boost_python')
+if not os.path.isfile(os.path.join(boost_include_dir, 'boost', 'python.hpp')):
+ sys.exit(boost_include_dir + ': could not find Boost Python header files; use BOOST_DIR or BOOST_INCLUDE_DIR')
+if (not os.path.isfile(os.path.join(boost_lib_dir, 'lib' + boost_python_lib + '.so'))
+ and not os.path.isfile(os.path.join(boost_lib_dir, 'lib' + boost_python_lib + '.a'))):
+ sys.exit(boost_lib_dir + ': could not find Boost Python library file; use BOOST_DIR or BOOST_LIB_DIR/BOOST_PYTHON_LIB')
+
+fast5_dir = os.environ.get('FAST5_DIR', os.path.join('..', 'src'))
+
+extra_compile_args = [
+ '-std=c++11',
+ '-Wall', '-Wextra', '-Wpedantic',
+ '-isystem', hdf5_include_dir,
+ '-isystem', boost_include_dir,
+]
+#extra_compile_args += ['-O0', '-g3', '-ggdb', '-fno-eliminate-unused-debug-types', '-v']
+extra_link_args = []
+#extra_link_args += ['-v']
+
+extensions = [
+ Extension(
+ 'fast5.fast5',
+ include_dirs=[
+ fast5_dir,
+ ],
+ sources=[
+ os.path.join('fast5', 'source', 'fast5.cpp'),
+ ],
+ depends=[
+ os.path.join(fast5_dir, fn)
+ for fn in ['fast5.hpp', 'hdf5_tools.hpp']
+ ],
+ extra_compile_args=extra_compile_args,
+ extra_link_args=extra_link_args,
+ library_dirs=[
+ hdf5_lib_dir,
+ boost_lib_dir,
+ ],
+ runtime_library_dirs=[
+ hdf5_lib_dir,
+ boost_lib_dir,
+ ],
+ libraries=[
+ hdf5_lib,
+ boost_python_lib,
+ ],
+ ),
+]
+
+setup(
+ name='fast5',
+ description='Fast5 file interface.',
+ version=__version__,
+ #long_description=open('README').read(),
+ author='Matei David, Ontario Institute for Cancer Research',
+ author_email='matei.david at oicr.on.ca',
+ license='MIT',
+ url='https://github.com/mateidavid/fast5',
+ packages=['fast5'],
+ exclude_package_data={
+ '': ['*.c', '*.cpp', '*.h', '*.hpp'],
+ },
+ ext_modules=extensions,
+ scripts=[],
+)
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000..869f46c
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,4 @@
+f5dump
+f5dump-full
+hdf5-mod
+f5-mod
diff --git a/src/Makefile b/src/Makefile
index 0b3456c..f9e2325 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,2 +1,33 @@
-a: a.cpp fast5.hpp hdf5_tools.hpp
- g++ -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -pedantic -Wno-unused-parameter -o $@ $^ -L /usr/local/lib -lhdf5
+.SUFFIXES:
+MAKEFLAGS += -r
+SHELL := /bin/bash
+.DELETE_ON_ERROR:
+.PHONY: all help list clean check_hdf5
+
+HDF5_DIR = /usr/local
+HDF5_INCLUDE_DIR = ${HDF5_DIR}/include
+HDF5_LIB_DIR = ${HDF5_DIR}/lib
+HDF5_LIB = hdf5
+
+TARGETS = f5dump f5dump-full hdf5-mod f5-mod
+
+all: ${TARGETS}
+
+print-%:
+ @echo '$*=$($*)'
+
+help: ## This help.
+ @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
+
+list:
+ @echo "TARGETS=${TARGETS}"
+
+clean:
+ rm -rf ${TARGETS}
+
+check_hdf5:
+ @[ -f "${HDF5_INCLUDE_DIR}/H5pubconf.h" ] || { echo "HDF5 headers not found" >&2; exit 1; }
+ @[ -f "${HDF5_LIB_DIR}/lib${HDF5_LIB}.so" ] || [ -f "${HDF5_LIB_DIR}/lib${HDF5_LIB}.a" ] || { echo "HDF5 library not found" >&2; exit 1; }
+
+%: %.cpp fast5.hpp hdf5_tools.hpp | check_hdf5
+ ${CXX} -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -Wpedantic -isystem ${HDF5_INCLUDE_DIR} -o $@ $< -L${HDF5_LIB_DIR} -Wl,--rpath=${HDF5_LIB_DIR} -l${HDF5_LIB} -lpthread -lz -ldl
diff --git a/src/a.cpp b/src/a.cpp
deleted file mode 100644
index 2c89933..0000000
--- a/src/a.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include <cassert>
-#include <iostream>
-#include <string>
-
-#include "fast5.hpp"
-
-using namespace std;
-
-
-int main(int argc, char* argv[])
-{
- assert(argc == 2);
- string file_name(argv[1]);
- //string ds_name(argv[2]);
-
- // Open the FAST5 file for reading
- fast5::File* f_p;
- f_p = new fast5::File(file_name);
-
- // Check that it opened successfully
- assert(f_p->is_open());
-
- // Extract version information for the ONT software used to generate this dataset
- cout << "file_version=" << f_p->file_version() << endl;
- cout << "basecall_version=" << f_p->basecall_version() << endl;
- cout << "eventdetection_version=" << f_p->eventdetection_version() << endl;
- cout << "sequences_version=" << f_p->sequences_version() << endl;
-
- // This function checks to see if 2D basecalls are available
- if(f_p->have_basecalled_2D())
- {
- cout << "basecalled_2D=" << f_p->basecalled_2D() << endl;
-
- // Extract the alignment between template and complement events
- // which were generated by the 2D basecaller
- auto v = f_p->get_event_alignments();
- cout << "event_alignment().size()=" << v.size() << endl;
- for (const auto& e : v)
- {
- cout << "(template=" << e.template_index << ", complement=" << e.complement_index << ", kmer=" << e.kmer << ")" << endl;
- }
- }
-
- // Iterate over the template/complement strands
- for (size_t i = 0; i < 2; ++i)
- {
- // Check if a pore model for this strand exists
- if (f_p->have_model(i))
- {
- // Print the name of ONT's reference model used to basecall
- cout << "Model file: " << f_p->get_model_file(i) << endl;
-
- // Extract the global scaling parameters for the pore model
- auto params = f_p->get_model_parameters(i);
- cout << "model drift=" << params.drift <<
- ", scale=" << params.scale <<
- ", scale_sd=" << params.scale_sd <<
- ", shift=" << params.shift <<
- ", var=" << params.var <<
- ", var_sd=" << params.var_sd << endl;
-
- // Extract the expected current levels for each k-mer
- auto v = f_p->get_model(i);
- cout << "model(" << i << ").size()=" << v.size() << endl;
- for (const auto& e : v)
- {
- cout << "(kmer=" << e.kmer << ", level_mean=" << e.level_mean << ", level_stdv=" << e.level_stdv << ")" << endl;
- }
- }
-
- // Check if this strand has event observations
- if (f_p->have_events(i))
- {
- // Extract each event
- auto v = f_p->get_events(i);
- cout << "events(" << i << ").size()=" << v.size() << endl;
- for (const auto& e : v)
- {
- cout << "(mean=" << e.mean << ", start=" << e.start << ", stdv=" << e.stdv << ", length=" << e.length << ")" << endl;
- }
- }
- }
-
- // Cleanup the file pointer, which closes the file
- delete f_p;
-}
diff --git a/src/f5-mod.cpp b/src/f5-mod.cpp
new file mode 100644
index 0000000..278ba57
--- /dev/null
+++ b/src/f5-mod.cpp
@@ -0,0 +1,92 @@
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include "fast5.hpp"
+
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "use: " << argv[0] << " <fast5_file>" << endl;
+ return EXIT_FAILURE;
+ }
+ string file_name(argv[1]);
+ {
+ fast5::File f;
+ //
+ // All fast5 operations are performed inside a try-catch block. This should
+ // resist various hdf5 errors without leaking memory.
+ //
+ try
+ {
+ //
+ // open file in rw mode
+ //
+ f.open(file_name, true);
+ assert(f.is_open());
+ assert(f.is_rw());
+ //
+ // find next available basecall group with given prefix
+ //
+ string test_bc_grp_prefix = "Test_";
+ auto bc_grp_l = f.get_basecall_group_list();
+ set< string > test_bc_grp_suffix_s;
+ for (const auto& bc_grp : bc_grp_l)
+ {
+ if (bc_grp.compare(0, test_bc_grp_prefix.size(), test_bc_grp_prefix) == 0)
+ {
+ cerr << "found group: " << test_bc_grp_prefix + bc_grp.substr(test_bc_grp_prefix.size()) << endl;
+ }
+ test_bc_grp_suffix_s.insert(bc_grp.substr(test_bc_grp_prefix.size()));
+ }
+ string test_bc_grp_suffix;
+ for (unsigned i = 0; i < 1000; ++i)
+ {
+ ostringstream os;
+ os << setw(3) << setfill('0') << i;
+ if (test_bc_grp_suffix_s.count(os.str()) == 0)
+ {
+ test_bc_grp_suffix = os.str();
+ break;
+ }
+ }
+ assert(not test_bc_grp_suffix.empty());
+ clog << "using group: " << test_bc_grp_prefix + test_bc_grp_suffix << endl;
+ //
+ // add basecall seq
+ //
+ f.add_basecall_seq(0, test_bc_grp_prefix + test_bc_grp_suffix, "test_name", "ACGT");
+ //
+ // add basecall events
+ //
+ vector< fast5::Event_Entry > ev(3, {55.0, 1.0, 0.05, 0.01, .5, .5, .7, .1, .1, .1, 0,
+ array< char, 8 >{"ACGTA"}, array< char, 8 >{"CGTAC"}});
+ f.add_basecall_events(0, test_bc_grp_prefix + test_bc_grp_suffix, ev);
+ //
+ // add basecall pore model
+ //
+ vector< fast5::Model_Entry > mod(3, {0, 56.0, 1.0, 42.0, 1.0, 5.0, array< char, 8 >{"ACGTA"}});
+ f.add_basecall_model(0, test_bc_grp_prefix + test_bc_grp_suffix, mod);
+ //
+ // add basecall pore model params
+ //
+ fast5::Model_Parameters params{1.0, 0.0, 0.0, 1.0, .9, .9};
+ f.add_basecall_model_params(0, test_bc_grp_prefix + test_bc_grp_suffix, params);
+ //
+ // add basecall model file
+ //
+ f.add_basecall_model_file(0, test_bc_grp_prefix + test_bc_grp_suffix, "/dev/null");
+ }
+ catch (hdf5_tools::Exception& e)
+ {
+ cout << "hdf5 error: " << e.what() << endl;
+ }
+ //
+ // fast5 file is closed by its destructor at the end of this scope
+ //
+ }
+ assert(hdf5_tools::File::get_object_count() == 0);
+}
diff --git a/src/f5dump-full.cpp b/src/f5dump-full.cpp
new file mode 100644
index 0000000..d29000a
--- /dev/null
+++ b/src/f5dump-full.cpp
@@ -0,0 +1,273 @@
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include "fast5.hpp"
+
+using namespace std;
+
+template < typename T >
+void print_vector(ostream& os, const vector< T >& v, const string& delim)
+{
+ for (auto it = v.begin(); it != v.end(); ++it)
+ {
+ if (it != v.begin()) os << delim;
+ os << *it;
+ }
+}
+template < typename U, typename V >
+void print_map(ostream& os, const map< U, V >& m, const string& prefix)
+{
+ for (const auto& p : m)
+ {
+ os << prefix << p.first << "=" << p.second << endl;
+ }
+}
+
+int main(int argc, char* argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "use: " << argv[0] << " <fast5_file>" << endl;
+ return EXIT_FAILURE;
+ }
+ string file_name(argv[1]);
+ //
+ // open the FAST5 file for reading
+ //
+ if (not fast5::File::is_valid_file(file_name))
+ {
+ cout << "not a fast5 file [" << file_name << "]" << endl;
+ return EXIT_SUCCESS;
+ }
+ {
+ fast5::File f;
+ //
+ // All fast5 operations are performed inside a try-catch block. This should
+ // resist various hdf5 errors without leaking memory.
+ //
+ try
+ {
+ //
+ // open file
+ //
+ f.open(file_name);
+ assert(f.is_open());
+ //
+ // extract version information for the ONT software used to generate this dataset
+ //
+ cout << "file_version=" << f.file_version() << endl;
+ //
+ // inspect channel_id params
+ //
+ bool have_channel_id_params = f.have_channel_id_params();
+ cout << "have_channel_id_params=" << have_channel_id_params << endl;
+ if (have_channel_id_params)
+ {
+ auto channel_id_params = f.get_channel_id_params();
+ cout << "channel_id/channel_number=" << channel_id_params.channel_number << endl
+ << "channel_id/digitisation=" << channel_id_params.digitisation << endl
+ << "channel_id/offset=" << channel_id_params.offset << endl
+ << "channel_id/range=" << channel_id_params.range << endl
+ << "channel_id/sampling_rate=" << channel_id_params.sampling_rate << endl;
+ }
+ //
+ // inspect tracking_id params
+ //
+ bool have_tracking_id_params = f.have_tracking_id_params();
+ cout << "have_tracking_id_params=" << have_tracking_id_params << endl;
+ if (have_tracking_id_params)
+ {
+ auto tracking_id_params = f.get_tracking_id_params();
+ print_map(cout, tracking_id_params, "tracking_id/");
+ }
+ //
+ // inspect sequences params
+ //
+ bool have_sequences_params = f.have_sequences_params();
+ cout << "have_sequences_params=" << have_sequences_params << endl;
+ if (have_sequences_params)
+ {
+ auto sequences_params = f.get_sequences_params();
+ print_map(cout, sequences_params, "sequences/");
+ }
+ //
+ // inspect raw samples
+ //
+ bool have_raw_samples = f.have_raw_samples();
+ cout << "have_raw_samples=" << have_raw_samples << endl;
+ if (have_raw_samples)
+ {
+ auto rs_rn_list = f.get_raw_samples_read_name_list();
+ cout << "raw_samples_read_name_list=";
+ print_vector(cout, rs_rn_list, ",");
+ cout << endl;
+ for (const auto& rn : rs_rn_list)
+ {
+ auto rs_params = f.get_raw_samples_params();
+ auto rs = f.get_raw_samples();
+ cout << "raw_samples/" << rn << "/read_id=" << rs_params.read_id << endl
+ << "raw_samples/" << rn << "/read_number=" << rs_params.read_number << endl
+ << "raw_samples/" << rn << "/start_mux=" << rs_params.start_mux << endl
+ << "raw_samples/" << rn << "/start_time=" << rs_params.start_time << endl
+ << "raw_samples/" << rn << "/duration=" << rs_params.duration << endl
+ << "raw_samples/" << rn << "/size=" << rs.size() << endl;
+ const auto& e = rs.front();
+ cout << " (" << e << ")" << endl;
+ }
+ }
+ //
+ // inspect eventdetection groups
+ //
+ bool have_eventdetection_events = f.have_eventdetection_events();
+ cout << "have_eventdetection_events=" << have_eventdetection_events << endl;
+ bool have_eventdetection_groups = f.have_eventdetection_groups();
+ cout << "have_eventdetection_groups=" << have_eventdetection_groups << endl;
+ if (have_eventdetection_groups)
+ {
+ auto ed_gr_list = f.get_eventdetection_group_list();
+ cout << "eventdetection_group_list=";
+ print_vector(cout, ed_gr_list, ",");
+ cout << endl;
+ for (const auto& ed_gr : ed_gr_list)
+ {
+ auto ed_params = f.get_eventdetection_params(ed_gr);
+ print_map(cout, ed_params, "eventdetection/");
+ auto rn_list = f.get_eventdetection_read_name_list(ed_gr);
+ cout << "eventdetection/" << ed_gr << "/read_name_list=";
+ print_vector(cout, rn_list, ",");
+ cout << endl;
+ have_eventdetection_events = f.have_eventdetection_events(ed_gr);
+ cout << "eventdetection/" << ed_gr << "/have_eventdetection_events=" << have_eventdetection_events << endl;
+ for (const auto& rn : rn_list)
+ {
+ std::ostringstream tmp;
+ tmp << "eventdetection/" << ed_gr << "/" << rn;
+ auto ed_ev_params = f.get_eventdetection_event_params(ed_gr, rn);
+ auto ed_ev = f.get_eventdetection_events(ed_gr, rn);
+ cout << tmp.str() << "/abasic_found=" << ed_ev_params.abasic_found << endl
+ << tmp.str() << "/duration=" << ed_ev_params.duration << endl
+ << tmp.str() << "/median_before=" << ed_ev_params.median_before << endl
+ << tmp.str() << "/read_id=" << ed_ev_params.read_id << endl
+ << tmp.str() << "/read_number=" << ed_ev_params.read_number << endl
+ << tmp.str() << "/scaling_used=" << ed_ev_params.scaling_used << endl
+ << tmp.str() << "/start_mux=" << ed_ev_params.start_mux << endl
+ << tmp.str() << "/start_time=" << ed_ev_params.start_time << endl
+ << tmp.str() << "/size=" << ed_ev.size() << endl;
+ for (const auto& e : ed_ev)
+ {
+ cout << " (mean=" << e.mean
+ << ", stdv=" << e.stdv
+ << ", start=" << e.start
+ << ", length=" << e.length << ")" << endl;
+ break;
+ }
+ } // for rn : rn_list
+ } // for ed_gr : ed_gr_list
+ } // if have_eventdetection_groups
+ //
+ // inspect basecall groups
+ //
+ bool have_basecall_groups = f.have_basecall_groups();
+ cout << "have_basecall_groups=" << have_basecall_groups << endl;
+ if (have_basecall_groups)
+ {
+ auto bc_gr_list = f.get_basecall_group_list();
+ cout << "basecall_group_list=";
+ print_vector(cout, bc_gr_list, ",");
+ cout << endl;
+ for (unsigned st = 0; st < 3; ++st)
+ {
+ auto bc_st_gr_list = f.get_basecall_strand_group_list(st);
+ cout << "basecall_strand_group_list(" << st << ")=";
+ print_vector(cout, bc_st_gr_list, ",");
+ cout << endl;
+ }
+ for (const auto& bc_gr : bc_gr_list)
+ {
+ // dump basecall params
+ auto bc_params = f.get_basecall_params(bc_gr);
+ std::ostringstream tmp;
+ tmp << "basecall/" << bc_gr << "/";
+ print_map(cout, bc_params, tmp.str());
+ // check if basecall log exists
+ cout << "basecall/" << bc_gr << "/have_log=" << f.have_basecall_log(bc_gr) << endl;
+ }
+ for (unsigned st = 0; st < 3; ++st)
+ {
+ bool have_seq = f.have_basecall_seq(st);
+ cout << "basecall(" << st << ")/have_seq=" << have_seq << endl;
+ if (have_seq)
+ {
+ cout << "basecall(" << st << ")/seq=" << f.get_basecall_seq(st).substr(0, 10) << "..." << endl;
+ }
+ bool have_model = f.have_basecall_model(st);
+ cout << "basecall(" << st << ")/have_model=" << have_model << endl;
+ if (have_model)
+ {
+ cout << "basecall(" << st << ")/model_file=" << f.get_basecall_model_file(st) << endl;
+ auto m_params = f.get_basecall_model_params(st);
+ auto m = f.get_basecall_model(st);
+ cout << "basecall(" << st << ")/model/scale=" << m_params.scale << endl
+ << "basecall(" << st << ")/model/shift=" << m_params.shift << endl
+ << "basecall(" << st << ")/model/drift=" << m_params.drift << endl
+ << "basecall(" << st << ")/model/var=" << m_params.var << endl
+ << "basecall(" << st << ")/model/scale_sd=" << m_params.scale_sd << endl
+ << "basecall(" << st << ")/model/var_sd=" << m_params.var_sd << endl
+ << "basecall(" << st << ")/model/size=" << m.size() << endl;
+ for (const auto& e : m)
+ {
+ cout << " (kmer=" << e.get_kmer()
+ << ", level_mean=" << e.level_mean
+ << ", level_stdv=" << e.level_stdv << ")" << endl;
+ break;
+ }
+ }
+ bool have_events = f.have_basecall_events(st);
+ cout << "basecall(" << st << ")/have_events=" << have_events << endl;
+ if (have_events)
+ {
+ auto ev = f.get_basecall_events(st);
+ cout << "basecall(" << st << ")/events/size=" << ev.size() << endl;
+ for (const auto& e : ev)
+ {
+ cout << " (mean=" << e.mean
+ << ", stdv=" << e.stdv
+ << ", start=" << e.start
+ << ", length=" << e.length
+ << ", model_state=" << e.get_model_state()
+ << ", p_model_state=" << e.p_model_state
+ << ", move=" << e.move << ")" << endl;
+ break;
+ }
+ }
+ if (st == 2)
+ {
+ bool have_event_alignment = f.have_basecall_event_alignment();
+ cout << "basecall(2)/have_event_alignment=" << have_event_alignment << endl;
+ if (have_event_alignment)
+ {
+ auto al = f.get_basecall_event_alignment();
+ cout << "basecall(2)/event_alignment/size=" << al.size() << endl;
+ for (const auto& e : al)
+ {
+ cout << " (template_index=" << e.template_index
+ << ", complement_index=" << e.complement_index
+ << ", kmer=" << e.get_kmer() << ")" << endl;
+ break;
+ }
+ }
+ }
+ }
+ } // have_basecall_groups
+ }
+ catch (hdf5_tools::Exception& e)
+ {
+ cout << "hdf5 error: " << e.what() << endl;
+ }
+ //
+ // fast5 file is closed by its destructor at the end of this scope
+ //
+ }
+ assert(fast5::File::get_object_count() == 0);
+}
diff --git a/src/f5dump.cpp b/src/f5dump.cpp
new file mode 100644
index 0000000..af655dd
--- /dev/null
+++ b/src/f5dump.cpp
@@ -0,0 +1,197 @@
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include "fast5.hpp"
+
+using namespace std;
+
+template < typename T >
+void print_vector(ostream& os, const vector< T >& v, const string& delim)
+{
+ for (auto it = v.begin(); it != v.end(); ++it)
+ {
+ if (it != v.begin()) os << delim;
+ os << *it;
+ }
+}
+template < typename U, typename V >
+void print_map(ostream& os, const map< U, V >& m, const string& prefix)
+{
+ for (const auto& p : m)
+ {
+ os << prefix << p.first << "=" << p.second << endl;
+ }
+}
+
+int main(int argc, char* argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "use: " << argv[0] << " <fast5_file>" << endl;
+ return EXIT_FAILURE;
+ }
+ string file_name(argv[1]);
+ //
+ // open the FAST5 file for reading
+ //
+ if (not fast5::File::is_valid_file(file_name))
+ {
+ cout << "not a fast5 file [" << file_name << "]" << endl;
+ return EXIT_SUCCESS;
+ }
+ {
+ fast5::File f;
+ //
+ // All fast5 operations are performed inside a try-catch block. This should
+ // resist various hdf5 errors without leaking memory.
+ //
+ try
+ {
+ //
+ // open file
+ //
+ f.open(file_name);
+ assert(f.is_open());
+ //
+ // extract version information for the ONT software used to generate this dataset
+ //
+ cout << "file_version=" << f.file_version() << endl;
+ //
+ // inspect channel_id params
+ //
+ if (f.have_channel_id_params())
+ {
+ auto channel_id_params = f.get_channel_id_params();
+ cout << "channel_id/channel_number=" << channel_id_params.channel_number << endl
+ << "channel_id/digitisation=" << channel_id_params.digitisation << endl
+ << "channel_id/offset=" << channel_id_params.offset << endl
+ << "channel_id/range=" << channel_id_params.range << endl
+ << "channel_id/sampling_rate=" << channel_id_params.sampling_rate << endl;
+ }
+ //
+ // inspect tracking_id params
+ //
+ if (f.have_tracking_id_params())
+ {
+ auto tracking_id_params = f.get_tracking_id_params();
+ print_map(cout, tracking_id_params, "tracking_id/");
+ }
+ //
+ // inspect sequences params
+ //
+ if (f.have_sequences_params())
+ {
+ auto sequences_params = f.get_sequences_params();
+ print_map(cout, sequences_params, "sequences/");
+ }
+ //
+ // inspect raw samples
+ //
+ if (f.have_raw_samples())
+ {
+ auto rs_params = f.get_raw_samples_params();
+ auto rs = f.get_raw_samples();
+ cout << "raw_samples/read_id=" << rs_params.read_id << endl
+ << "raw_samples/read_number=" << rs_params.read_number << endl
+ << "raw_samples/start_mux=" << rs_params.start_mux << endl
+ << "raw_samples/start_time=" << rs_params.start_time << endl
+ << "raw_samples/duration=" << rs_params.duration << endl
+ << "raw_samples/size=" << rs.size() << endl;
+ const auto& e = rs.front();
+ cout << " (" << e << ")" << endl;
+ }
+ //
+ // inspect eventdetection events
+ //
+ cout << "eventdetection_group_list=";
+ print_vector(cout, f.get_eventdetection_group_list(), ",");
+ cout << endl;
+ if (f.have_eventdetection_events())
+ {
+ auto ed_params = f.get_eventdetection_params();
+ print_map(cout, ed_params, "eventdetection/");
+ auto ed_ev_params = f.get_eventdetection_event_params();
+ auto ed_ev = f.get_eventdetection_events();
+ cout << "eventdetection/events/abasic_found=" << ed_ev_params.abasic_found << endl
+ << "eventdetection/events/duration=" << ed_ev_params.duration << endl
+ << "eventdetection/events/median_before=" << ed_ev_params.median_before << endl
+ << "eventdetection/events/read_id=" << ed_ev_params.read_id << endl
+ << "eventdetection/events/read_number=" << ed_ev_params.read_number << endl
+ << "eventdetection/events/scaling_used=" << ed_ev_params.scaling_used << endl
+ << "eventdetection/events/start_mux=" << ed_ev_params.start_mux << endl
+ << "eventdetection/events/start_time=" << ed_ev_params.start_time << endl
+ << "eventdetection/events/size=" << ed_ev.size() << endl;
+ const auto& e = ed_ev.front();
+ cout << " (mean=" << e.mean
+ << ", stdv=" << e.stdv
+ << ", start=" << e.start
+ << ", length=" << e.length << ")" << endl;
+ } // if have_eventdetection_events
+ //
+ // inspect basecall groups
+ //
+ for (unsigned st = 0; st < 3; ++st)
+ {
+ cout << "basecall(" << st << ")/group_list=";
+ print_vector(cout, f.get_basecall_strand_group_list(st), ",");
+ cout << endl;
+ // basecall sequence
+ if (f.have_basecall_seq(st))
+ {
+ cout << "basecall(" << st << ")/seq_size=" << f.get_basecall_seq(st).size() << endl;
+ }
+ // basecall model
+ if (f.have_basecall_model(st))
+ {
+ cout << "basecall(" << st << ")/model_file=" << f.get_basecall_model_file(st) << endl;
+ auto m_params = f.get_basecall_model_params(st);
+ auto m = f.get_basecall_model(st);
+ cout << "basecall(" << st << ")/model/scale=" << m_params.scale << endl
+ << "basecall(" << st << ")/model/shift=" << m_params.shift << endl
+ << "basecall(" << st << ")/model/drift=" << m_params.drift << endl
+ << "basecall(" << st << ")/model/var=" << m_params.var << endl
+ << "basecall(" << st << ")/model/scale_sd=" << m_params.scale_sd << endl
+ << "basecall(" << st << ")/model/var_sd=" << m_params.var_sd << endl
+ << "basecall(" << st << ")/model/size=" << m.size() << endl;
+ const auto& e = m.front();
+ cout << " (kmer=" << e.get_kmer()
+ << ", level_mean=" << e.level_mean
+ << ", level_stdv=" << e.level_stdv << ")" << endl;
+ }
+ // basecall events
+ if (f.have_basecall_events(st))
+ {
+ auto ev = f.get_basecall_events(st);
+ cout << "basecall(" << st << ")/events/size=" << ev.size() << endl;
+ const auto& e = ev.front();
+ cout << " (mean=" << e.mean
+ << ", stdv=" << e.stdv
+ << ", start=" << e.start
+ << ", length=" << e.length
+ << ", model_state=" << e.get_model_state()
+ << ", p_model_state=" << e.p_model_state
+ << ", move=" << e.move << ")" << endl;
+ }
+ // basecall event alignment
+ if (st == 2 and f.have_basecall_event_alignment())
+ {
+ auto al = f.get_basecall_event_alignment();
+ cout << "basecall(2)/event_alignment/size=" << al.size() << endl;
+ const auto& e = al.front();
+ cout << " (template_index=" << e.template_index
+ << ", complement_index=" << e.complement_index
+ << ", kmer=" << e.get_kmer() << ")" << endl;
+ }
+ } // for st
+ }
+ catch (hdf5_tools::Exception& e)
+ {
+ cout << "hdf5 error: " << e.what() << endl;
+ }
+ //
+ // fast5 file is closed by its destructor at the end of this scope
+ //
+ }
+ assert(fast5::File::get_object_count() == 0);
+}
diff --git a/src/fast5.hpp b/src/fast5.hpp
index 9f03de3..53d3913 100644
--- a/src/fast5.hpp
+++ b/src/fast5.hpp
@@ -1,20 +1,84 @@
#ifndef __FAST5_HPP
#define __FAST5_HPP
+#include <algorithm>
#include <cassert>
+#include <cmath>
#include <exception>
#include <iostream>
#include <sstream>
#include <iomanip>
#include <string>
#include <vector>
+#include <array>
+#include <set>
+#include <map>
#include "hdf5_tools.hpp"
#define MAX_K_LEN 8
+namespace
+{
+ inline static std::string array_to_string(const std::array< char, MAX_K_LEN >& a)
+ {
+ return std::string(a.begin(), std::find(a.begin(), a.end(), '\0'));
+ }
+}
+
namespace fast5
{
+struct Channel_Id_Parameters
+{
+ std::string channel_number;
+ double digitisation;
+ double offset;
+ double range;
+ double sampling_rate;
+}; // struct Channel_Id_Parameters
+
+typedef std::map< std::string, std::string > Tracking_Id_Parameters;
+
+typedef std::map< std::string, std::string > Sequences_Parameters;
+
+typedef float Raw_Samples_Entry;
+
+struct Raw_Samples_Parameters
+{
+ std::string read_id;
+ long long read_number;
+ long long start_mux;
+ long long start_time;
+ long long duration;
+}; // struct Raw_Samples_Parameters
+
+struct EventDetection_Event_Entry
+{
+ double mean;
+ double stdv;
+ long long start;
+ long long length;
+ friend bool operator == (const EventDetection_Event_Entry& lhs, const EventDetection_Event_Entry& rhs)
+ {
+ return lhs.mean == rhs.mean
+ and lhs.stdv == rhs.stdv
+ and lhs.start == rhs.start
+ and lhs.length == rhs.length;
+ }
+}; // struct EventDetection_Event
+
+struct EventDetection_Event_Parameters
+{
+ std::string read_id;
+ long long read_number;
+ long long scaling_used;
+ long long start_mux;
+ long long start_time;
+ long long duration;
+ double median_before;
+ unsigned abasic_found;
+}; // struct EventDetection_Event_Parameters
+
//
// This struct represents the expected signal measured
// given the kmer sequence that is in the pore when the
@@ -24,13 +88,24 @@ namespace fast5
//
struct Model_Entry
{
- char kmer[MAX_K_LEN];
long long variant;
double level_mean;
double level_stdv;
double sd_mean;
double sd_stdv;
double weight;
+ std::array< char, MAX_K_LEN > kmer;
+ std::string get_kmer() const { return array_to_string(kmer); }
+ friend bool operator == (const Model_Entry& lhs, const Model_Entry& rhs)
+ {
+ return lhs.variant == rhs.variant
+ and lhs.level_mean == rhs.level_mean
+ and lhs.level_stdv == rhs.level_stdv
+ and lhs.sd_mean == rhs.sd_mean
+ and lhs.sd_stdv == rhs.sd_stdv
+ and lhs.weight == rhs.weight
+ and lhs.kmer == rhs.kmer;
+ }
}; // struct Model_Entry
//
@@ -39,35 +114,52 @@ struct Model_Entry
//
struct Model_Parameters
{
- double drift;
double scale;
- double scale_sd;
double shift;
+ double drift;
double var;
+ double scale_sd;
double var_sd;
}; // struct Model_Parameters
//
// This struct represents an observed event.
-// The members of the struct are the same as
+// The members of the struct are the same as
// the fields encoded in the FAST5 file.
//
struct Event_Entry
{
double mean;
- double start;
double stdv;
+ double start;
double length;
- char model_state[MAX_K_LEN];
- double model_level;
- long long move;
double p_model_state;
- char mp_state[MAX_K_LEN];
double p_mp_state;
double p_A;
double p_C;
double p_G;
double p_T;
+ long long move;
+ std::array< char, MAX_K_LEN > model_state;
+ std::array< char, MAX_K_LEN > mp_state;
+ std::string get_model_state() const { return array_to_string(model_state); }
+ std::string get_mp_state() const { return array_to_string(mp_state); }
+ friend bool operator == (const Event_Entry& lhs, const Event_Entry& rhs)
+ {
+ return lhs.mean == rhs.mean
+ and lhs.stdv == rhs.stdv
+ and lhs.start == rhs.start
+ and lhs.length == rhs.length
+ and lhs.p_model_state == rhs.p_model_state
+ and lhs.p_mp_state == rhs.p_mp_state
+ and lhs.p_A == rhs.p_A
+ and lhs.p_C == rhs.p_C
+ and lhs.p_G == rhs.p_G
+ and lhs.p_T == rhs.p_T
+ and lhs.move == rhs.move
+ and lhs.model_state == rhs.model_state
+ and lhs.mp_state == rhs.mp_state;
+ }
}; // struct Event_Entry
//
@@ -78,234 +170,772 @@ struct Event_Alignment_Entry
{
long long template_index;
long long complement_index;
- char kmer[MAX_K_LEN];
+ std::array< char, MAX_K_LEN > kmer;
+ std::string get_kmer() const { return array_to_string(kmer); }
+ friend bool operator == (const Event_Alignment_Entry& lhs, const Event_Alignment_Entry& rhs)
+ {
+ return lhs.template_index == rhs.template_index
+ and lhs.complement_index == rhs.complement_index
+ and lhs.kmer == rhs.kmer;
+ }
}; // struct Event_Alignment_Entry
+
class File
- : private hdf5_tools::File_Reader
+ : private hdf5_tools::File
{
private:
- typedef hdf5_tools::File_Reader Base;
+ typedef hdf5_tools::File Base;
public:
- using Base::Base;
-
- using Base::is_open;
- using Base::file_name;
- using Base::open;
- using Base::close;
-
- std::string file_version() const
+ //using Base::is_open;
+ //using Base::is_rw;
+ //using Base::file_name;
+ //using Base::create;
+ //using Base::close;
+ using Base::get_object_count;
+ using Base::is_valid_file;
+ //using Base::write;
+
+ File() = default;
+ File(const std::string& file_name, bool rw = false) { open(file_name, rw); }
+
+ bool is_open() const { return static_cast< const Base* >(this)->is_open(); }
+ bool is_rw() const { return static_cast< const Base* >(this)->is_rw(); }
+ const std::string& file_name() const { return static_cast< const Base* >(this)->file_name(); }
+ void create(const std::string& file_name, bool truncate = false) { static_cast< Base* >(this)->create(file_name, truncate); }
+ void close() { static_cast< Base* >(this)->close(); }
+
+ void open(const std::string& file_name, bool rw = false)
{
- double v;
- assert(Base::exists("/file_version"));
- Base::read< double >("/file_version", v);
- // convert it to string
- std::ostringstream os;
- os << v;
- return os.str();
+ Base::open(file_name, rw);
+ if (is_open())
+ {
+ // detect raw samples read name
+ detect_raw_samples_read_name_list();
+ // detect eventdetection groups
+ detect_eventdetection_group_list();
+ // detect basecall groups
+ detect_basecall_group_list();
+ }
}
- std::string basecall_version() const
+ /**
+ * Extract "/file_version" attribute. This must exist.
+ */
+ std::string file_version() const
{
std::string res;
- std::string path = get_bc_2d_root() + "/version";
- assert(Base::exists(path));
- Base::read< std::string >(path, res);
+ assert(Base::exists(file_version_path()));
+ Base::read(file_version_path(), res);
return res;
}
- std::string eventdetection_version() const
+ /**
+ * Check if "/UniqueGlobalKey/channel_id" attributes exist.
+ */
+ bool have_channel_id_params() const
{
- std::string res;
- // only support eventdetection group 000 for now
- std::string path = "/Analyses/EventDetection_000/version";
- assert(Base::exists(path));
- Base::read< std::string >(path, res);
- return res;
+ return Base::group_exists(channel_id_path());
}
-
- std::string get_log() const
+ /**
+ * Extract "/UniqueGlobalKey/channel_id" attributes.
+ */
+ Channel_Id_Parameters get_channel_id_params() const
{
- std::string res;
- std::string path = get_bc_2d_root() + "/Log";
- assert(Base::exists(path));
- Base::read< std::string >(path, res);
+ Channel_Id_Parameters res;
+ Base::read(channel_id_path() + "/channel_number", res.channel_number);
+ Base::read(channel_id_path() + "/digitisation", res.digitisation);
+ Base::read(channel_id_path() + "/offset", res.offset);
+ Base::read(channel_id_path() + "/range", res.range);
+ Base::read(channel_id_path() + "/sampling_rate", res.sampling_rate);
return res;
}
-
+ /**
+ * Check if sampling rate exists.
+ */
+ bool have_sampling_rate() const
+ {
+ return have_channel_id_params();
+ }
+ /**
+ * Get sampling rate.
+ */
double get_sampling_rate() const
{
- assert(have_sampling_rate());
-
- auto lg = get_log();
- auto idx = lg.find("Sampling rate is");
-
- std::string line;
- std::stringstream ss1(lg.substr(idx));
- std::getline(ss1,line,'\n');
-
- std::stringstream ss2(line);
-
- std::string token;
- std::getline(ss2,token,' '); //Sampling
- std::getline(ss2,token,' '); //rate
- std::getline(ss2,token,' '); //is
- std::getline(ss2,token,' '); //Hz value
+ auto channel_id_params = get_channel_id_params();
+ return channel_id_params.sampling_rate;
+ }
- return std::atof(token.c_str());
+ /**
+ * Check if "/UniqueGlobalKey/tracking_id" attributes exist.
+ */
+ bool have_tracking_id_params() const
+ {
+ return Base::group_exists(tracking_id_path());
+ }
+ /**
+ * Extract "/UniqueGlobalKey/tracking_id" attributes.
+ */
+ Tracking_Id_Parameters get_tracking_id_params() const
+ {
+ return get_attr_map(tracking_id_path());
}
- bool have_sampling_rate() const
+ /**
+ * Check if sequences attributes exists.
+ */
+ bool have_sequences_params() const
+ {
+ return Base::group_exists(sequences_path());
+ }
+ /**
+ * Get sequences attributes.
+ */
+ Sequences_Parameters get_sequences_params() const
{
- auto lg = get_log();
- auto idx = lg.find("Sampling rate is");
- return idx != std::string::npos;
+ return get_attr_map(sequences_path());
}
- std::string get_model_file(size_t i) const
+ /**
+ * Get list of raw samples read names.
+ */
+ const std::vector< std::string >& get_raw_samples_read_name_list() const
{
- std::string res;
- assert(Base::exists(model_file_path(i)));
- Base::read< std::string >(model_file_path(i), res);
+ return _raw_samples_read_name_list;
+ }
+ /**
+ * Check if raw samples exist.
+ */
+ bool have_raw_samples() const
+ {
+ return have_channel_id_params() and not get_raw_samples_read_name_list().empty();
+ }
+ /**
+ * Get raw samples attributes for given read name (default: first read name).
+ */
+ Raw_Samples_Parameters get_raw_samples_params(const std::string& _rn = std::string()) const
+ {
+ Raw_Samples_Parameters res;
+ const std::string& rn = not _rn.empty()? _rn : get_raw_samples_read_name_list().front();
+ std::string p = raw_samples_params_path(rn);
+ Base::read(p + "/read_id", res.read_id);
+ Base::read(p + "/read_number", res.read_number);
+ Base::read(p + "/start_mux", res.start_mux);
+ Base::read(p + "/start_time", res.start_time);
+ Base::read(p + "/duration", res.duration);
return res;
}
-
- std::string sequences_version() const
+ /**
+ * Get raw samples for given read name (default: first read name).
+ */
+ std::vector< Raw_Samples_Entry > get_raw_samples(const std::string& _rn = std::string()) const
{
- std::vector< std::string > tmp;
- assert(Base::exists("/Sequences/Meta/version"));
- Base::read< std::string >("/Sequences/Meta/version", tmp);
- std::string res;
- for (const auto& s: tmp)
+ // get raw samples
+ std::vector< uint16_t > raw_samples;
+ const std::string& rn = not _rn.empty()? _rn : get_raw_samples_read_name_list().front();
+ Base::read(raw_samples_path(rn), raw_samples);
+ // get scaling parameters
+ auto channel_id_params = get_channel_id_params();
+ // decode levels
+ std::vector< Raw_Samples_Entry > res;
+ res.reserve(raw_samples.size());
+ for (auto int_level : raw_samples)
{
- res += s;
+ res.push_back((static_cast< float >(int_level) + channel_id_params.offset)
+ * channel_id_params.range / channel_id_params.digitisation);
}
return res;
}
- bool have_basecalled_2D() const
+ /**
+ * Get list of EventDetection groups.
+ */
+ const std::vector< std::string >& get_eventdetection_group_list() const
{
- return Base::exists(get_bc_2d_root() + "/BaseCalled_2D/Fastq");
+ return _eventdetection_group_list;
}
-
- std::string basecalled_2D() const
+ /**
+ * Check if any EventDetection groups exist.
+ */
+ bool have_eventdetection_groups() const
{
- std::string res;
- Base::read< std::string >(get_bc_2d_root() + "/BaseCalled_2D/Fastq", res);
-
- // Split the FASTQ record on newlines
- size_t nl1 = res.find_first_of('\n');
- size_t nl2 = res.find_first_of('\n', nl1 + 1);
-
- if(nl1 == std::string::npos || nl2 == std::string::npos)
- return "";
+ return not get_eventdetection_group_list().empty();
+ }
+ /**
+ * Get list of reads for given EventDetection group (default: first EventDetection group).
+ */
+ std::vector< std::string > get_eventdetection_read_name_list(const std::string& _ed_gr = std::string()) const
+ {
+ const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
+ return detect_eventdetection_read_name_list(ed_gr);
+ }
+ /**
+ * Check if EventDetection events exist for given EventDetection group (default: first EventDetection group).
+ */
+ bool have_eventdetection_events(const std::string& _ed_gr = std::string()) const
+ {
+ std::string ed_gr;
+ if (_ed_gr.empty())
+ {
+ auto ed_gr_l = get_eventdetection_group_list();
+ if (ed_gr_l.empty()) return false;
+ ed_gr = ed_gr_l.front();
+ }
else
- return res.substr(nl1 + 1, nl2 - nl1 - 1);
+ {
+ ed_gr = _ed_gr;
+ }
+ return not get_eventdetection_read_name_list(ed_gr).empty();
}
-
- std::vector< Event_Alignment_Entry > get_event_alignments() const
+ /**
+ * Get EventDetection params for given EventDetection group (default: first EventDetection group).
+ */
+ std::map< std::string, std::string > get_eventdetection_params(const std::string& _ed_gr = std::string()) const
{
- std::vector< Event_Alignment_Entry > res;
- hdf5_tools::Compound_Map m;
- m.add_member("template", &Event_Alignment_Entry::template_index);
- m.add_member("complement", &Event_Alignment_Entry::complement_index);
- m.add_member("kmer", &Event_Alignment_Entry::kmer);
- Base::read< Event_Alignment_Entry >(get_bc_2d_root() + "/BaseCalled_2D/Alignment", res, &m);
+ const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
+ return get_attr_map(eventdetection_params_path(ed_gr));
+ }
+ /**
+ * Get EventDetection event params for given EventDetection group, and given read name
+ * (default: first EventDetection group, and first read name in it).
+ */
+ EventDetection_Event_Parameters get_eventdetection_event_params(
+ const std::string& _ed_gr = std::string(), const std::string& _rn = std::string()) const
+ {
+ EventDetection_Event_Parameters res;
+ const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
+ const std::string rn = not _rn.empty()? _rn : get_eventdetection_read_name_list(ed_gr).front();
+ auto p = eventdetection_event_params_path(ed_gr, rn);
+ auto a_v = Base::get_attr_list(p);
+ std::set< std::string > a_s(a_v.begin(), a_v.end());
+ Base::read(p + "/read_number", res.read_number);
+ Base::read(p + "/scaling_used", res.scaling_used);
+ Base::read(p + "/start_mux", res.start_mux);
+ Base::read(p + "/start_time", res.start_time);
+ Base::read(p + "/duration", res.duration);
+ // optional fields
+ if (a_s.count("read_id"))
+ {
+ Base::read(p + "/read_id", res.read_id);
+ }
+ if (a_s.count("median_before"))
+ {
+ Base::read(p + "/median_before", res.median_before);
+ }
+ else
+ {
+ res.median_before = -1;
+ }
+ if (a_s.count("abasic_found"))
+ {
+ Base::read(p + "/abasic_found", res.abasic_found);
+ }
+ else
+ {
+ res.abasic_found = 0;
+ }
return res;
}
+ /**
+ * Get EventDetection events for given EventDetection group, and given read name.
+ */
+ std::vector< EventDetection_Event_Entry > get_eventdetection_events(
+ const std::string& _ed_gr = std::string(), const std::string& _rn = std::string()) const
+ {
+ std::vector< EventDetection_Event_Entry > res;
+ const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
+ const std::string rn = not _rn.empty()? _rn : get_eventdetection_read_name_list(ed_gr).front();
+ auto p = eventdetection_events_path(ed_gr, rn);
+ auto struct_member_names = Base::get_struct_members(p);
+ assert(struct_member_names.size() >= 4);
+ bool have_stdv = false;
+ bool have_variance = false;
+ for (const auto& s : struct_member_names)
+ {
+ if (s == "stdv") have_stdv = true;
+ else if (s == "variance") have_variance = true;
+ }
+ hdf5_tools::Compound_Map m;
+ m.add_member("mean", &EventDetection_Event_Entry::mean);
+ m.add_member("start", &EventDetection_Event_Entry::start);
+ m.add_member("length", &EventDetection_Event_Entry::length);
+ if (have_stdv)
+ {
+ m.add_member("stdv", &EventDetection_Event_Entry::stdv);
+ }
+ else if (have_variance)
+ {
+ m.add_member("variance", &EventDetection_Event_Entry::stdv);
+ }
+ else
+ {
+ // must have stdv or variance
+ abort();
+ }
+ Base::read(p, res, m);
+ if (not have_stdv)
+ {
+ // have read variances
+ for (auto& e : res)
+ {
+ e.stdv = std::sqrt(e.stdv);
+ }
+ }
+ return res;
+ } // get_eventdetection_events()
- bool have_model(size_t i) const
+ /**
+ * Get list of all Basecall groups.
+ */
+ const std::vector< std::string >& get_basecall_group_list() const
{
- return Base::exists(model_path(i));
+ return _basecall_group_list;
}
- bool have_events(size_t i) const
+ /**
+ * Check if any Basecall groups exist.
+ */
+ bool have_basecall_groups() const
{
- return Base::exists(events_path(i));
+ return not get_basecall_group_list().empty();
}
-
- std::vector< Model_Entry > get_model(size_t i) const
+ /**
+ * Get list of Basecall groups for given strand.
+ */
+ const std::vector< std::string >& get_basecall_strand_group_list(unsigned st) const
+ {
+ return _basecall_strand_group_list[st];
+ }
+ /**
+ * Check if any Basecall groups exist for given strand.
+ */
+ bool have_basecall_strand_groups(unsigned st) const
+ {
+ return not get_basecall_strand_group_list(st).empty();
+ }
+ /**
+ * Get Basecall group params for given Basecall group.
+ */
+ std::map< std::string, std::string > get_basecall_params(const std::string& bc_gr) const
+ {
+ return get_attr_map(basecall_root_path() + "/" + basecall_group_prefix() + bc_gr);
+ }
+ /**
+ * Check if Basecall log exists for given Basecall group.
+ */
+ bool have_basecall_log(const std::string& bc_gr) const
+ {
+ std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/Log";
+ return Base::exists(path);
+ }
+ /**
+ * Get Basecall log for given Basecall group.
+ */
+ std::string get_basecall_log(const std::string& bc_gr) const
+ {
+ std::string res;
+ std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/Log";
+ Base::read(path, res);
+ return res;
+ }
+ /**
+ * Check if Basecall fastq exists for given Basecall group and given strand.
+ */
+ bool have_basecall_fastq(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
+ return Base::dataset_exists(basecall_fastq_path(bc_gr, st));
+ }
+ /**
+ * Get Basecall fastq for given Basecall group and given strand.
+ */
+ std::string get_basecall_fastq(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ std::string res;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
+ Base::read(basecall_fastq_path(bc_gr, st), res);
+ return res;
+ }
+ /**
+ * Add Basecall fastq
+ */
+ void add_basecall_fastq(unsigned st, const std::string& bc_gr, const std::string& fq) const
+ {
+ Base::write(basecall_fastq_path(bc_gr, st), true, fq);
+ }
+ /**
+ * Check if Basecall seq exists for given Basecall group and given strand.
+ */
+ bool have_basecall_seq(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ return have_basecall_fastq(st, _bc_gr);
+ }
+ /**
+ * Get Basecall sequence for given Basecall group and given strand.
+ */
+ std::string get_basecall_seq(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ return fq2seq(get_basecall_fastq(st, _bc_gr));
+ }
+ /**
+ * Add Basecall seq
+ */
+ void add_basecall_seq(unsigned st, const std::string& bc_gr,
+ const std::string& name, const std::string& seq, int default_qual = 33) const
+ {
+ std::ostringstream oss;
+ oss << '@' << name << std::endl
+ << seq << std::endl
+ << '+' << std::endl
+ << std::string(seq.size(), static_cast< char >(default_qual));
+ add_basecall_fastq(st, bc_gr, oss.str());
+ }
+ /**
+ * Check if Basecall model exist for given Basecall group and given strand.
+ */
+ bool have_basecall_model(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
+ return Base::dataset_exists(basecall_model_path(bc_gr, st));
+ }
+ /**
+ * Get Basecall model file name for given Basecall group and given strand.
+ */
+ std::string get_basecall_model_file(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ std::string res;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
+ assert(Base::exists(basecall_model_file_path(bc_gr, st)));
+ Base::read(basecall_model_file_path(bc_gr, st), res);
+ return res;
+ }
+ void add_basecall_model_file(unsigned st, const std::string& bc_gr, const std::string& file_name) const
+ {
+ std::string path = basecall_model_file_path(bc_gr, st);
+ Base::write(path, false, file_name);
+ }
+ /**
+ * Get Basecall model parameters for given Basecall group and given strand.
+ */
+ Model_Parameters get_basecall_model_params(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ Model_Parameters res;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
+ std::string path = basecall_model_path(bc_gr, st);
+ Base::read(path + "/scale", res.scale);
+ Base::read(path + "/shift", res.shift);
+ Base::read(path + "/drift", res.drift);
+ Base::read(path + "/var", res.var);
+ Base::read(path + "/scale_sd", res.scale_sd);
+ Base::read(path + "/var_sd", res.var_sd);
+ return res;
+ }
+ template < typename T >
+ void add_basecall_model_params(unsigned st, const std::string& bc_gr, const T& params) const
+ {
+ std::string path = basecall_model_path(bc_gr, st);
+ Base::write(path + "/scale", false, params.scale);
+ Base::write(path + "/shift", false, params.shift);
+ Base::write(path + "/drift", false, params.drift);
+ Base::write(path + "/var", false, params.var);
+ Base::write(path + "/scale_sd", false, params.scale_sd);
+ Base::write(path + "/var_sd", false, params.var_sd);
+ }
+ /**
+ * Get Basecall model for given Basecall group and given strand.
+ */
+ std::vector< Model_Entry > get_basecall_model(unsigned st, const std::string& _bc_gr = std::string()) const
{
std::vector< Model_Entry > res;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
hdf5_tools::Compound_Map m;
m.add_member("kmer", &Model_Entry::kmer);
m.add_member("level_mean", &Model_Entry::level_mean);
m.add_member("level_stdv", &Model_Entry::level_stdv);
m.add_member("sd_mean", &Model_Entry::sd_mean);
m.add_member("sd_stdv", &Model_Entry::sd_stdv);
- Base::read< Model_Entry >(model_path(i), res, &m);
+ Base::read(basecall_model_path(bc_gr, st), res, m);
return res;
}
-
- Model_Parameters get_model_parameters(size_t i) const
+ /**
+ * Add Basecall model
+ */
+ template < typename T >
+ void add_basecall_model(unsigned st, const std::string& bc_gr, const std::vector< T >& m) const
{
- Model_Parameters res;
- std::string path = model_path(i);
- Base::read< double >(path + "/drift", res.drift);
- Base::read< double >(path + "/scale", res.scale);
- Base::read< double >(path + "/scale_sd", res.scale_sd);
- Base::read< double >(path + "/shift", res.shift);
- Base::read< double >(path + "/var", res.var);
- Base::read< double >(path + "/var_sd", res.var_sd);
- return res;
+ hdf5_tools::Compound_Map cm;
+ cm.add_member("kmer", &T::kmer);
+ cm.add_member("level_mean", &T::level_mean);
+ cm.add_member("level_stdv", &T::level_stdv);
+ cm.add_member("sd_mean", &T::sd_mean);
+ cm.add_member("sd_stdv", &T::sd_stdv);
+ Base::write(basecall_model_path(bc_gr, st), true, m, cm);
}
-
- std::vector< Event_Entry > get_events(size_t i) const
+ /**
+ * Check if Basecall events exist for given Basecall group and given strand.
+ */
+ bool have_basecall_events(unsigned st, const std::string& _bc_gr = std::string()) const
+ {
+ if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
+ return Base::dataset_exists(basecall_events_path(bc_gr, st));
+ }
+ /**
+ * Get Basecall events for given Basecall group and given strand.
+ */
+ std::vector< Event_Entry > get_basecall_events(unsigned st, const std::string& _bc_gr = std::string()) const
{
std::vector< Event_Entry > res;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
hdf5_tools::Compound_Map m;
m.add_member("mean", &Event_Entry::mean);
m.add_member("start", &Event_Entry::start);
m.add_member("stdv", &Event_Entry::stdv);
m.add_member("length", &Event_Entry::length);
- Base::read< Event_Entry >(events_path(i), res, &m);
+ m.add_member("p_model_state", &Event_Entry::p_model_state);
+ m.add_member("model_state", &Event_Entry::model_state);
+ m.add_member("move", &Event_Entry::move);
+ Base::read(basecall_events_path(bc_gr, st), res, m);
return res;
}
-
- void set_basecalled_group_id(size_t i)
+ /**
+ * Add Basecall events
+ */
+ template < typename T >
+ void add_basecall_events(unsigned st, const std::string& bc_gr, const std::vector< T >& ev) const
+ {
+ hdf5_tools::Compound_Map cm;
+ cm.add_member("mean", &T::mean);
+ cm.add_member("start", &T::start);
+ cm.add_member("stdv", &T::stdv);
+ cm.add_member("length", &T::length);
+ cm.add_member("p_model_state", &T::p_model_state);
+ cm.add_member("model_state", &T::model_state);
+ cm.add_member("move", &T::move);
+ Base::write(basecall_events_path(bc_gr, st), true, ev, cm);
+ }
+ /**
+ * Check if Basecall event alignment exist for given Basecall group.
+ */
+ bool have_basecall_event_alignment(const std::string& _bc_gr = std::string()) const
+ {
+ if (_bc_gr.empty() and get_basecall_strand_group_list(2).empty()) return false;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(2).front();
+ return Base::dataset_exists(basecall_event_alignment_path(bc_gr));
+ }
+ /**
+ * Get Basecall events for given Basecall group.
+ */
+ std::vector< Event_Alignment_Entry > get_basecall_event_alignment(const std::string& _bc_gr = std::string()) const
{
- assert(i <= 999);
- std::stringstream ss;
- ss << std::setfill('0') << std::setw(3) << i;
- _basecalled_group_id = ss.str();
+ std::vector< Event_Alignment_Entry > res;
+ const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(2).front();
+ hdf5_tools::Compound_Map m;
+ m.add_member("template", &Event_Alignment_Entry::template_index);
+ m.add_member("complement", &Event_Alignment_Entry::complement_index);
+ m.add_member("kmer", &Event_Alignment_Entry::kmer);
+ Base::read(basecall_event_alignment_path(bc_gr), res, m);
+ return res;
}
+ static std::string fq2seq(const std::string& fq)
+ {
+ size_t nl1_pos = fq.find_first_of('\n');
+ if (nl1_pos == std::string::npos) return std::string();
+ size_t nl2_pos = fq.find_first_of('\n', nl1_pos + 1);
+ if (nl2_pos == std::string::npos) return std::string();
+ return fq.substr(nl1_pos + 1, nl2_pos - nl1_pos - 1);
+ }
private:
-
- // Returns the root path of the form:
- // Analyses/Basecall_2D_ddd/ where ddd is the group
- std::string get_bc_2d_root() const
+ void detect_raw_samples_read_name_list()
+ {
+ if (not Base::group_exists(raw_samples_root_path())) return;
+ auto rn_list = Base::list_group(raw_samples_root_path());
+ for (const auto& rn : rn_list)
+ {
+ if (not Base::dataset_exists(raw_samples_path(rn))) continue;
+ _raw_samples_read_name_list.push_back(rn);
+ }
+ }
+
+ void detect_eventdetection_group_list()
{
- return "/Analyses/Basecall_2D_" + _basecalled_group_id;
+ if (not Base::group_exists(eventdetection_root_path())) return;
+ auto g_list = Base::list_group(eventdetection_root_path());
+ for (const auto& g : g_list)
+ {
+ if (g.size() <= eventdetection_group_prefix().size()) continue;
+ auto p = std::mismatch(eventdetection_group_prefix().begin(),
+ eventdetection_group_prefix().end(),
+ g.begin());
+ if (p.first != eventdetection_group_prefix().end()) continue;
+ _eventdetection_group_list.emplace_back(p.second, g.end());
+ }
}
- std::string model_path(size_t i) const
+ std::vector< std::string > detect_eventdetection_read_name_list(const std::string& ed_gr) const
{
- static std::vector< std::string > _model_path =
- { "/BaseCalled_template/Model",
- "/BaseCalled_complement/Model" };
- return get_bc_2d_root() + _model_path.at(i);
+ std::vector< std::string > res;
+ std::string p = eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads";
+ if (not Base::group_exists(p)) return res;
+ auto rn_list = Base::list_group(p);
+ for (const auto& rn : rn_list)
+ {
+ if (not Base::dataset_exists(p + "/" + rn + "/Events")) continue;
+ res.push_back(rn);
+ }
+ return res;
}
- std::string events_path(size_t i) const
+ void detect_basecall_group_list()
{
- static std::vector< std::string > _events_path =
- { "/BaseCalled_template/Events",
- "/BaseCalled_complement/Events" };
- return get_bc_2d_root() + _events_path.at(i);
+ if (not Base::group_exists(basecall_root_path())) return;
+ auto g_list = Base::list_group(basecall_root_path());
+ for (const auto& g : g_list)
+ {
+ if (g.size() <= basecall_group_prefix().size()) continue;
+ auto p = std::mismatch(basecall_group_prefix().begin(),
+ basecall_group_prefix().end(),
+ g.begin());
+ if (p.first != basecall_group_prefix().end()) continue;
+ _basecall_group_list.emplace_back(p.second, g.end());
+ for (unsigned st = 0; st < 3; ++st)
+ {
+ if (Base::group_exists(basecall_root_path() + "/" + g + "/" + basecall_strand_subgroup(st)))
+ {
+ _basecall_strand_group_list[st].emplace_back(p.second, g.end());
+ }
+ }
+ }
}
- std::string model_file_path(size_t i) const
+ std::map< std::string, std::string > get_attr_map(const std::string& path) const
{
- static std::vector< std::string > _model_file_path =
- { "/Summary/basecall_1d_template/model_file",
- "/Summary/basecall_1d_complement/model_file" };
- return get_bc_2d_root() + _model_file_path.at(i);
+ std::map< std::string, std::string > res;
+ auto a_list = Base::get_attr_list(path);
+ for (const auto& a : a_list)
+ {
+ std::string tmp;
+ Base::read(path + "/" + a, tmp);
+ res[a] = tmp;
+ }
+ return res;
}
- // default to using the 000 analysis group
- std::string _basecalled_group_id = "000";
+ // list of read names for which we have raw samples
+ std::vector< std::string > _raw_samples_read_name_list;
+
+ // list of EventDetection groups
+ std::vector< std::string > _eventdetection_group_list;
+
+ // list of Basecall groups
+ std::vector< std::string > _basecall_group_list;
+
+ // list of per-strand Basecall groups; 0/1/2 = template/complement/2d
+ std::array< std::vector< std::string >, 3 > _basecall_strand_group_list;
+ // static paths
+ static const std::string& file_version_path()
+ {
+ static const std::string _file_version_path = "/file_version";
+ return _file_version_path;
+ }
+
+ static const std::string& channel_id_path()
+ {
+ static const std::string _channel_id_path = "/UniqueGlobalKey/channel_id";
+ return _channel_id_path;
+ }
+ static const std::string& tracking_id_path()
+ {
+ static const std::string _tracking_id_path = "/UniqueGlobalKey/tracking_id";
+ return _tracking_id_path;
+ }
+ static const std::string& raw_samples_root_path()
+ {
+ static const std::string _raw_samples_root_path = "/Raw/Reads";
+ return _raw_samples_root_path;
+ }
+ static std::string raw_samples_params_path(const std::string& rn)
+ {
+ return raw_samples_root_path() + "/" + rn;
+ }
+ static std::string raw_samples_path(const std::string& rn)
+ {
+ return raw_samples_root_path() + "/" + rn + "/Signal";
+ }
+ static const std::string& sequences_path()
+ {
+ static const std::string _sequences_path = "/Sequences/Meta";
+ return _sequences_path;
+ }
+ static const std::string& eventdetection_root_path()
+ {
+ static const std::string _eventdetection_root_path = "/Analyses";
+ return _eventdetection_root_path;
+ }
+ static const std::string& eventdetection_group_prefix()
+ {
+ static const std::string _eventdetection_group_prefix = "EventDetection_";
+ return _eventdetection_group_prefix;
+ }
+ static std::string eventdetection_params_path(const std::string& ed_gr)
+ {
+ return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr;
+ }
+ static std::string eventdetection_event_params_path(const std::string& ed_gr, const std::string& rn)
+ {
+ return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads/" + rn;
+ }
+ static std::string eventdetection_events_path(const std::string& ed_gr, const std::string& rn)
+ {
+ return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads/" + rn + "/Events";
+ }
+
+ static const std::string& basecall_root_path()
+ {
+ static const std::string _basecall_root_path = "/Analyses";
+ return _basecall_root_path;
+ }
+ static const std::string& basecall_group_prefix()
+ {
+ static const std::string _basecall_group_prefix = "Basecall_";
+ return _basecall_group_prefix;
+ }
+ static const std::string& basecall_strand_subgroup(unsigned st)
+ {
+ static const std::array< std::string, 3 > _basecall_strand_subgroup =
+ {{ "BaseCalled_template", "BaseCalled_complement", "BaseCalled_2D" }};
+ return _basecall_strand_subgroup[st];
+ }
+ static std::string basecall_fastq_path(const std::string& bc_gr, unsigned st)
+ {
+ return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
+ + basecall_strand_subgroup(st) + "/Fastq";
+ }
+ static std::string basecall_model_path(const std::string& bc_gr, unsigned st)
+ {
+ return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
+ + basecall_strand_subgroup(st) + "/Model";
+ }
+ static std::string basecall_model_file_path(const std::string& bc_gr, unsigned st)
+ {
+ assert(st < 2);
+ return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr
+ + "/Summary/basecall_1d_" + (st == 0? "template" : "complement") + "/model_file";
+ }
+ static std::string basecall_events_path(const std::string& bc_gr, unsigned st)
+ {
+ return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
+ + basecall_strand_subgroup(st) + "/Events";
+ }
+ static std::string basecall_event_alignment_path(const std::string& bc_gr)
+ {
+ return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
+ + basecall_strand_subgroup(2) + "/Alignment";
+ }
}; // class File
} // namespace fast5
diff --git a/src/hdf5-mod.cpp b/src/hdf5-mod.cpp
new file mode 100644
index 0000000..60dc01c
--- /dev/null
+++ b/src/hdf5-mod.cpp
@@ -0,0 +1,308 @@
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include "hdf5_tools.hpp"
+
+using namespace std;
+using namespace hdf5;
+
+struct B
+{
+ int val_1;
+ array< char, 6 > val_2;
+ string val_3;
+ friend ostream & operator << (ostream & os, const B & b)
+ {
+ os << "(val_1=" << b.val_1
+ << ",val_2=\"" << string(b.val_2.begin(), b.val_2.end())
+ << "\",val_3=\"" << b.val_3 << "\")";
+ return os;
+ }
+};
+
+struct A
+{
+ int val_1;
+ int val_1a;
+ float val_2;
+ char val_3[6];
+ array< char, 6 > val_4;
+ string val_5;
+ B val_6;
+ friend ostream & operator << (ostream & os, const A & a)
+ {
+ os << "(val_1=" << a.val_1
+ << ",val_1a=" << a.val_1a
+ << ",val_2=" << a.val_2
+ << ",val_3=\"" << a.val_3
+ << "\",val_4=\"" << string(a.val_4.begin(), a.val_4.end())
+ << "\",val_5=\"" << a.val_5
+ << "\",val_6=" << a.val_6 << ")";
+ return os;
+ }
+};
+
+struct B_string
+{
+ string val_1;
+ string val_2;
+ string val_3;
+ friend ostream & operator << (ostream & os, const B_string & b)
+ {
+ os << "(val_1=\"" << b.val_1
+ << "\",val_2=\"" << b.val_2
+ << "\",val_3=\"" << b.val_3 << "\")";
+ return os;
+ }
+};
+struct A_string
+{
+ string val_1;
+ string val_1a;
+ string val_2;
+ string val_3;
+ string val_4;
+ string val_5;
+ B_string val_6;
+ friend ostream & operator << (ostream & os, const A_string & a)
+ {
+ os << "(val_1=\"" << a.val_1
+ << "\",val_1a=\"" << a.val_1a
+ << "\",val_2=\"" << a.val_2
+ << "\",val_3=\"" << a.val_3
+ << "\",val_4=\"" << a.val_4
+ << "\",val_5=\"" << a.val_5
+ << "\",val_6=" << a.val_6 << ")";
+ return os;
+ }
+};
+
+struct B_char_array
+{
+ array< char, 6 > val_2;
+ array< char, 6 > val_3;
+ friend ostream & operator << (ostream & os, const B_char_array & b)
+ {
+ os << "(val_2=\"" << string(b.val_2.begin(), b.val_2.end())
+ << "\",val_3=\"" << string(b.val_3.begin(), b.val_3.end()) << ")";
+ return os;
+ }
+};
+
+struct A_char_array
+{
+ array< char, 6 > val_3;
+ array< char, 6 > val_4;
+ array< char, 6 > val_5;
+ B_char_array val_6;
+ friend ostream & operator << (ostream & os, const A_char_array & a)
+ {
+ os
+ << "(val_3=\"" << string(a.val_3.begin(), a.val_3.end())
+ << "\",val_4=\"" << string(a.val_4.begin(), a.val_4.end())
+ << "\",val_5=\"" << string(a.val_5.begin(), a.val_5.end())
+ << "\",val_6=" << a.val_6 << ")";
+ return os;
+ }
+};
+
+int main(int argc, char* argv[])
+{
+ if (argc != 2 and argc != 3)
+ {
+ cerr << "use: " << argv[0] << " [-f] <fast5_file>" << endl;
+ return EXIT_FAILURE;
+ }
+ bool force = string(argv[1]) == "-f";
+ string file_name(argv[force? 2 : 1]);
+ {
+ hdf5_tools::File f;
+ //
+ // All fast5 operations are performed inside a try-catch block. This should
+ // resist various hdf5 errors without leaking memory.
+ //
+ try
+ {
+ //
+ // create file; without -f, fail if it exist
+ //
+ f.create(file_name, force);
+ assert(f.is_open());
+ assert(f.is_rw());
+ //
+ // write a /file_version to allow f5dump to work
+ //
+ string file_version("42");
+ f.write("/file_version", false, file_version);
+ int val_1 = 42;
+ float val_2 = 3.14;
+ char val_3[6] = "ACGTA";
+ array< char, 6 > val_4 = { "AACCG" };
+ string val_5("CCCGG");
+ static_assert(hdf5_tools::detail::mem_type_class< void >::value == 0, "");
+ static_assert(hdf5_tools::detail::mem_type_class< decltype(val_1) >::value == 1, "");
+ static_assert(hdf5_tools::detail::mem_type_class< decltype(val_2) >::value == 1, "");
+ static_assert(hdf5_tools::detail::mem_type_class< decltype(val_3) >::value == 2, "");
+ static_assert(hdf5_tools::detail::mem_type_class< decltype(val_4) >::value == 2, "");
+ static_assert(hdf5_tools::detail::mem_type_class< decltype(val_5) >::value == 3, "");
+ static_assert(hdf5_tools::detail::mem_type_class< std::true_type >::value == 4, "");
+ //
+ // write integer
+ //
+ f.write("/val_1", false, val_1);
+ f.write("/val_1_as_64", false, val_1, H5T_STD_I64LE);
+ f.write("/val_1_v", false, vector< int >(3, val_1));
+ //
+ // write float
+ //
+ f.write("/val_2", false, val_2);
+ f.write("/val_2_as_64", false, val_2, H5T_IEEE_F64LE);
+ f.write("/val_2_v", false, vector< float >(3, val_2));
+ //
+ // write fixlen string: char[]
+ //
+ f.write("/val_3", false, val_3);
+ f.write("/val_3_as_len_3", false, val_3, 3);
+ f.write("/val_3_as_varlen", false, val_3, -1);
+ //
+ // write fixlen string: std::array< char >
+ //
+ f.write("/val_4", false, val_4);
+ f.write("/val_4_as_len_3", false, val_4, 3);
+ f.write("/val_4_as_varlen", false, val_4, -1);
+ f.write("/val_4_v", false, vector < decltype(val_4) >(3, val_4));
+ f.write("/val_4_v_as_len_3", false, vector < decltype(val_4) >(3, val_4), 3);
+ f.write("/val_4_v_as_varlen", false, vector < decltype(val_4) >(3, val_4), -1);
+ //
+ // write varlen string
+ //
+ f.write("/val_5", false, val_5);
+ f.write("/val_5_as_len_3", false, val_5, 3);
+ f.write("/val_5_as_fixlen", false, val_5, 0);
+ f.write("/val_5_v", false, vector< decltype(val_5) >(3, val_5));
+ f.write("/val_5_v_as_len_3", false, vector< decltype(val_5) >(3, val_5), 3);
+ f.write("/val_5_v_as_fixlen", false, vector< decltype(val_5) >(1, val_5), 0); // only size 1
+ //
+ // write compound
+ //
+ A val_6{ 1, 2, 3.14, "ACGTA", "CGTAC", "CCGGT", { 42, "GTTAC", "TTATT" } };
+ hdf5_tools::Compound_Map cm_A;
+ hdf5_tools::Compound_Map cm_B;
+ cm_B.add_member("val_1", &B::val_1);
+ cm_B.add_member("val_2", &B::val_2);
+ cm_B.add_member("val_3", &B::val_3);
+ for (const auto& e : cm_B.members())
+ {
+ clog << "cm_B: (" << (void*)&e << ")" << e << endl;
+ }
+ cm_A.add_member("val_1", &A::val_1);
+ cm_A.add_member("val_2", &A::val_2);
+ cm_A.add_member("val_3", &A::val_3);
+ cm_A.add_member("val_4", &A::val_4);
+ cm_A.add_member("val_5", &A::val_5);
+ cm_A.add_member("val_6", &A::val_6, &cm_B);
+ for (const auto& e : cm_A.members())
+ {
+ clog << "cm_A: (" << (void*)&e << ")" << e << endl;
+ }
+ auto l = cm_A.get_member_ptr_list();
+ for (const auto& p : l)
+ {
+ clog << "member:";
+ for (const auto& e_ptr : p.first)
+ {
+ clog << " " << *e_ptr;
+ }
+ clog << "; total_offset=" << p.second << endl;
+ }
+ //f.write("/val_6a", false, val_6, cm_A);
+ f.write("/val_6d", true, val_6, cm_A);
+ vector< A > src(3, val_6);
+ f.write("/val_6d_v", true, src, cm_A);
+ clog << "wrote val_6d_v:" << endl;
+ for (const auto& a : src)
+ {
+ clog << a << endl;
+ }
+
+ //
+ // test reading compound
+ //
+ // using original map
+ {
+ std::vector< A > dest;
+ f.read("/val_6d_v", dest, cm_A);
+ clog << "read val_6d_v:" << endl;
+ for (const auto& a : dest)
+ {
+ clog << a << endl;
+ }
+ }
+ // using all strings
+ {
+ hdf5_tools::Compound_Map cm_A_string;
+ hdf5_tools::Compound_Map cm_B_string;
+ cm_B_string.add_member("val_1", &B_string::val_1);
+ cm_B_string.add_member("val_2", &B_string::val_2);
+ cm_B_string.add_member("val_3", &B_string::val_3);
+ for (const auto& e : cm_B_string.members())
+ {
+ clog << "cm_B_string: (" << (void*)&e << ")" << e << endl;
+ }
+ cm_A_string.add_member("val_1", &A_string::val_1);
+ cm_A_string.add_member("val_2", &A_string::val_2);
+ cm_A_string.add_member("val_3", &A_string::val_3);
+ cm_A_string.add_member("val_4", &A_string::val_4);
+ cm_A_string.add_member("val_5", &A_string::val_5);
+ cm_A_string.add_member("val_6", &A_string::val_6, &cm_B_string);
+ for (const auto& e : cm_A_string.members())
+ {
+ clog << "cm_A_string: (" << (void*)&e << ")" << e << endl;
+ }
+ std::vector< A_string > dest;
+ f.read("/val_6d_v", dest, cm_A_string);
+ clog << "read val_6d_v using all-strings:" << endl;
+ for (const auto& a : dest)
+ {
+ clog << a << endl;
+ }
+ }
+ // using char arrays
+ {
+ hdf5_tools::Compound_Map cm_A_char_array;
+ hdf5_tools::Compound_Map cm_B_char_array;
+ cm_B_char_array.add_member("val_2", &B_char_array::val_2);
+ cm_B_char_array.add_member("val_3", &B_char_array::val_3);
+ for (const auto& e : cm_B_char_array.members())
+ {
+ clog << "cm_B_char_array: (" << (void*)&e << ")" << e << endl;
+ }
+ cm_A_char_array.add_member("val_3", &A_char_array::val_3);
+ cm_A_char_array.add_member("val_4", &A_char_array::val_4);
+ cm_A_char_array.add_member("val_5", &A_char_array::val_5);
+ cm_A_char_array.add_member("val_6", &A_char_array::val_6, &cm_B_char_array);
+ for (const auto& e : cm_A_char_array.members())
+ {
+ clog << "cm_A_char_array: (" << (void*)&e << ")" << e << endl;
+ }
+ std::vector< A_char_array > dest;
+ f.read("/val_6d_v", dest, cm_A_char_array);
+ clog << "read val_6d_v using char arrays:" << endl;
+ for (const auto& a : dest)
+ {
+ clog << a << endl;
+ }
+
+ }
+ }
+ catch (hdf5_tools::Exception& e)
+ {
+ cout << "hdf5 error: " << e.what() << endl;
+ }
+ //
+ // fast5 file is closed by its destructor at the end of this scope
+ //
+ }
+ assert(hdf5_tools::File::get_object_count() == 0);
+}
diff --git a/src/hdf5_tools.hpp b/src/hdf5_tools.hpp
index 6532d74..252b1a2 100644
--- a/src/hdf5_tools.hpp
+++ b/src/hdf5_tools.hpp
@@ -1,14 +1,27 @@
+//
+// The MIT License (MIT)
+//
+// Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research
+//
+
#ifndef __HDF5_TOOLS_HPP
#define __HDF5_TOOLS_HPP
#include <cassert>
+#include <cstring>
#include <exception>
#include <functional>
+#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <tuple>
#include <vector>
+#include <deque>
+#include <set>
+#include <map>
+#include <limits>
+#include <type_traits>
namespace hdf5
{
@@ -24,8 +37,13 @@ class Exception
: public std::exception
{
public:
- Exception(const std::string& msg) : _msg(msg) {}
+ Exception(const std::string& msg) : _msg(active_path() + ": " + msg) {}
const char* what() const noexcept { return _msg.c_str(); }
+ static std::string& active_path()
+ {
+ static thread_local std::string _active_path;
+ return _active_path;
+ }
private:
std::string _msg;
}; // class Exception
@@ -36,6 +54,13 @@ class Compound_Map;
namespace detail
{
+/// Compute offset of a struct member from a member pointer (runtime version).
+template < typename T, typename U >
+std::size_t offset_of(U T::* mem_ptr)
+{
+ return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr));
+}
+
/// TempMetaFunc: Given destination type, deduce memory type to be used in hdf5 read operation.
/// Only useful for numeric types.
/// HDF5 idiosyncracy:
@@ -58,77 +83,473 @@ template <> struct get_mem_type< float > { static hid_t id() { retu
template <> struct get_mem_type< double > { static hid_t id() { return H5T_NATIVE_DOUBLE; } };
template <> struct get_mem_type< long double > { static hid_t id() { return H5T_NATIVE_LDOUBLE; } };
-/// TempMetaFunc: Given destination type, can we read it
-template < typename Out_Data_Type >
-struct can_read
+/**
+ * Class of memory type:
+ * 0 - unknown
+ * 1 - numeric (signed/unsigned integer or float)
+ * 2 - fixed length string (char array)
+ * 3 - variable length string (std::string)
+ * 4 - class
+ */
+template < typename T >
+struct mem_type_class
{
- static const bool value =
- std::is_integral< Out_Data_Type >::value
- or std::is_floating_point< Out_Data_Type >::value
- or std::is_same< typename std::remove_extent< Out_Data_Type >::type, char >::value
- or std::is_same< Out_Data_Type, std::string >::value
- or std::is_class< Out_Data_Type >:: value;
+ static const int value =
+ std::conditional< std::is_integral< T >::value or std::is_floating_point< T >::value,
+ std::integral_constant< int, 1 >,
+ typename std::conditional< std::is_class< T >::value,
+ std::integral_constant< int, 4 >,
+ std::integral_constant< int, 0 > >::type >::type::value;
};
-
-/// TempMetaFunc: Given a destination type, does it need a compound map
-template < typename Out_Data_Type >
-struct read_as_atomic
+template < size_t Size >
+struct mem_type_class< char[Size] >
{
- static const bool value =
- std::is_integral< Out_Data_Type >::value
- or std::is_floating_point< Out_Data_Type >::value
- or std::is_same< typename std::remove_extent< Out_Data_Type >::type, char >::value
- or std::is_same< Out_Data_Type, std::string >::value;
+ static const int value = 2;
};
-
-/// Compute offset of a struct member from a member pointer (runtime version).
-template < typename T, typename U >
-std::size_t offset_of(U T::* mem_ptr)
+template < size_t Size >
+struct mem_type_class< const char[Size] >
{
- return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr));
-}
+ static const int value = 2;
+};
+template < size_t Size >
+struct mem_type_class< std::array< char, Size > >
+{
+ static const int value = 2;
+};
+template < size_t Size >
+struct mem_type_class< std::array< const char, Size > >
+{
+ static const int value = 2;
+};
+template <>
+struct mem_type_class< std::string >
+{
+ static const int value = 3;
+};
-/// Description of a member inside a compound
-/// Only works with numeric, string, and struct types.
-struct Compound_Member_Description
+// Struct whose purpuse is to destroy the HDF object during destruction
+struct HDF_Object_Holder
{
-public:
- Compound_Member_Description(const std::string& _name, size_t _offset, hid_t _numeric_type_id)
- : name(_name), offset(_offset), numeric_type_id(_numeric_type_id)
+ hid_t id;
+ std::function< herr_t(hid_t) > dtor;
+ HDF_Object_Holder()
+ : id(0) {}
+ HDF_Object_Holder(const HDF_Object_Holder&) = delete;
+ HDF_Object_Holder(HDF_Object_Holder&& other)
+ : id(0)
{
- type = numeric;
+ load(std::move(other));
}
- Compound_Member_Description(const std::string& _name, size_t _offset, size_t _char_array_size)
- : name(_name), offset(_offset), char_array_size(_char_array_size)
+ HDF_Object_Holder(hid_t _id, std::function< herr_t(hid_t) > _dtor)
{
- type = char_array;
+ load(_id, _dtor);
}
- Compound_Member_Description(const std::string& _name, size_t _offset)
- : name(_name), offset(_offset)
+ ~HDF_Object_Holder() noexcept(false)
+ {
+ if (id > 0)
+ {
+ if (dtor)
+ {
+ dtor(id);
+ }
+ id = 0;
+ }
+ }
+ HDF_Object_Holder& operator = (const HDF_Object_Holder&) = delete;
+ HDF_Object_Holder& operator = (HDF_Object_Holder&& other)
+ {
+ if (&other != this)
+ {
+ std::swap(id, other.id);
+ std::swap(dtor, other.dtor);
+ }
+ return *this;
+ }
+ void load(hid_t _id, std::function< herr_t(hid_t) > _dtor)
+ {
+ id = _id;
+ dtor = _dtor;
+ }
+ void load(HDF_Object_Holder&& other)
{
- type = string;
+ *this = std::move(other);
}
- Compound_Member_Description(const std::string& _name, size_t _offset, const Compound_Map* _compound_map_ptr)
- : name(_name), offset(_offset), compound_map_ptr(_compound_map_ptr)
+}; // struct HDF_Object_Holder
+
+struct Util
+{
+ /**
+ * Make hdf5 string type.
+ * @param sz If negative, make varlen string; else make fixlen string of size sz.
+ */
+ static HDF_Object_Holder make_str_type(long sz)
+ {
+ assert(sz != 0);
+ HDF_Object_Holder res(
+ wrap(H5Tcopy, H5T_C_S1),
+ wrapped_closer(H5Tclose));
+ size_t tmp = sz < 0? H5T_VARIABLE : sz;
+ wrap(H5Tset_size, res.id, tmp);
+ return res;
+ } // make_str_type
+
+ /**
+ * Get name and return value checker for hdf5 function.
+ */
+ static const std::pair< const char *, std::function< bool(void *) > >&
+ get_fcn_info(void (*fcn_ptr)())
+ {
+ static const std::map< void (*)(), std::pair< const char *, std::function< bool(void *) > > > fcn_info_m =
+ {
+ { (void(*)())&H5Aclose,
+ { "H5Aclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Acreate2,
+ { "H5Acreate2",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Aexists_by_name,
+ { "H5Aexists_by_name",
+ [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Aget_name_by_idx,
+ { "H5Aget_name_by_idx",
+ [] (void * vp) { return *reinterpret_cast< ssize_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Aget_space,
+ { "H5Aget_space",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Aget_type,
+ { "H5Aget_type",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Aopen,
+ { "H5Aopen",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Aopen_by_name,
+ { "H5Aopen_by_name",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Aread,
+ { "H5Aread",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Awrite,
+ { "H5Awrite",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+
+ { (void(*)())&H5Dclose,
+ { "H5Dclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Dcreate2,
+ { "H5Dcreate2",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Dget_space,
+ { "H5Dget_space",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Dget_type,
+ { "H5Dget_type",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Dopen,
+ { "H5Dopen",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Dread,
+ { "H5Dread",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Dvlen_reclaim,
+ { "H5Dvlen_reclaim",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Dwrite,
+ { "H5Dwrite",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+
+ { (void(*)())&H5Gclose,
+ { "H5Gclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Gcreate2,
+ { "H5Gcreate2",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Gget_info,
+ { "H5Gget_info",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Gopen2,
+ { "H5Gopen2",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+
+ { (void(*)())&H5Lexists,
+ { "H5Lexists",
+ [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Lget_name_by_idx,
+ { "H5Lget_name_by_idx",
+ [] (void * vp) { return *reinterpret_cast< ssize_t * >(vp) >= 0; }
+ }
+ },
+
+ { (void(*)())&H5Oclose,
+ { "H5Oclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Oexists_by_name,
+ { "H5Oexists_by_name",
+ [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Oget_info,
+ { "H5Oget_info",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Oopen,
+ { "H5Oopen",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+
+ { (void(*)())&H5Pclose,
+ { "H5Pclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Pcreate,
+ { "H5Pcreate",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Pset_create_intermediate_group,
+ { "H5Pset_create_intermediate_group",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+
+ { (void(*)())&H5Sclose,
+ { "H5Sclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Screate,
+ { "H5Screate",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Screate_simple,
+ { "H5Screate_simple",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Sget_simple_extent_dims,
+ { "H5Sget_simple_extent_dims",
+ [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Sget_simple_extent_ndims,
+ { "H5Sget_simple_extent_ndims",
+ [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Sget_simple_extent_type,
+ { "H5Sget_simple_extent_type",
+ [] (void * vp) { return *reinterpret_cast< H5S_class_t * >(vp) != H5S_NO_CLASS; }
+ }
+ },
+
+ { (void(*)())&H5Tclose,
+ { "H5Tclose",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Tcopy,
+ { "H5Tcopy",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Tcreate,
+ { "H5Tcreate",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Tget_class,
+ { "H5Tget_class",
+ [] (void * vp) { return *reinterpret_cast< H5T_class_t * >(vp) != H5T_NO_CLASS; }
+ }
+ },
+ { (void(*)())&H5Tget_member_index,
+ { "H5Tget_member_index",
+ [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Tget_member_name,
+ { "H5Tget_member_name",
+ [] (void * vp) { return *reinterpret_cast< char* * >(vp) != nullptr; }
+ }
+ },
+ { (void(*)())&H5Tget_member_type,
+ { "H5Tget_member_type",
+ [] (void * vp) { return *reinterpret_cast< hid_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Tget_nmembers,
+ { "H5Tget_nmembers",
+ [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Tget_sign,
+ { "H5Tget_sign",
+ [] (void * vp) { return *reinterpret_cast< H5T_sign_t * >(vp) != H5T_SGN_ERROR; }
+ }
+ },
+ { (void(*)())&H5Tget_size,
+ { "H5Tget_size",
+ [] (void * vp) { return *reinterpret_cast< size_t * >(vp) > 0; }
+ }
+ },
+ { (void(*)())&H5Tinsert,
+ { "H5Tinsert",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Tis_variable_str,
+ { "H5Tis_variable_str",
+ [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; }
+ }
+ },
+ { (void(*)())&H5Tset_size,
+ { "H5Tset_size",
+ [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+ }
+ },
+ };
+ return fcn_info_m.at(fcn_ptr);
+ }
+
+ /**
+ * General-purpose wrapper of hdf5 calls that checks return value for validity.
+ */
+ template < typename Function, typename... Args >
+ static typename std::result_of< Function(Args...) >::type
+ wrap(Function&& f, Args&& ...args)
{
- type = compound;
+ auto res = f(args...);
+ const auto& f_info = get_fcn_info((void(*)())&f);
+ if (not f_info.second((void*)&res)) throw Exception(std::string("error in ") + f_info.first);
+ return res;
}
- bool is_numeric() const { return type == numeric; }
+ /**
+ * Wrap closer function.
+ */
+ template < typename Function >
+ static std::function< herr_t(hid_t) > wrapped_closer(Function&& f)
+ {
+ return [&] (hid_t id) { return wrap(f, id); };
+ }
+}; // struct Util
+
+/// Description of a member inside a compound
+/// Only works with numeric, string, and struct types.
+struct Compound_Member_Description
+{
+public:
+ Compound_Member_Description(const std::string& _name, size_t _offset, hid_t _numeric_type_id)
+ : type(numeric),
+ name(_name),
+ offset(_offset),
+ numeric_type_id(_numeric_type_id) {}
+ Compound_Member_Description(const std::string& _name, size_t _offset, size_t _char_array_size)
+ : type(char_array),
+ name(_name),
+ offset(_offset),
+ char_array_size(_char_array_size) {}
+ Compound_Member_Description(const std::string& _name, size_t _offset)
+ : type(string),
+ name(_name),
+ offset(_offset) {}
+ Compound_Member_Description(const std::string& _name, size_t _offset,
+ const Compound_Map* _compound_map_ptr, size_t _compound_size)
+ : type(compound),
+ name(_name),
+ offset(_offset),
+ compound_map_ptr(_compound_map_ptr),
+ compound_size(_compound_size) {}
+
+ bool is_numeric() const { return type == numeric; }
bool is_char_array() const { return type == char_array; }
- bool is_string() const { return type == string; }
- bool is_compound() const { return type == compound; }
+ bool is_string() const { return type == string; }
+ bool is_compound() const { return type == compound; }
- std::string name;
- size_t offset;
- union
+ HDF_Object_Holder get_type() const
{
- hid_t numeric_type_id;
- size_t char_array_size;
- const Compound_Map* compound_map_ptr;
- };
+ assert(not is_compound());
+ HDF_Object_Holder res;
+ if (is_numeric())
+ {
+ res.load(numeric_type_id, nullptr);
+ }
+ else if (is_char_array())
+ {
+ res.load(Util::make_str_type(char_array_size));
+ }
+ else if (is_string())
+ {
+ res.load(Util::make_str_type(-1));
+ }
+ return res;
+ }
+
+ friend std::ostream& operator << (std::ostream& os, const Compound_Member_Description& e)
+ {
+ os << "(&=" << (void*)&e
+ << ",name=\"" << e.name
+ << "\",type=" << (e.is_numeric()
+ ? "numeric"
+ : (e.is_char_array()
+ ? "char_array"
+ : (e.is_string()
+ ? "string" : "compound")))
+ << ",offset=" << e.offset << ")";
+ return os;
+ }
-private:
enum member_type
{
numeric,
@@ -137,6 +558,15 @@ private:
compound
};
member_type type;
+ std::string name;
+ size_t offset;
+ union
+ {
+ hid_t numeric_type_id;
+ size_t char_array_size;
+ const Compound_Map* compound_map_ptr;
+ };
+ size_t compound_size;
}; // Compound_Member_Description
} // namespace detail
@@ -155,20 +585,19 @@ public:
template < typename T, typename U >
void add_member(const std::string& name, U T::* mem_ptr)
{
- static_assert(std::is_integral< U >::value
- or std::is_floating_point< U >::value
- or std::is_same< typename std::remove_extent< U >::type, char >::value
- or std::is_same< U, std::string >::value,
- "add_member(name, mem_ptr) overload expects numerical or string types only ");
- if (std::is_integral< U >::value or std::is_floating_point< U >::value)
+ static_assert(detail::mem_type_class< U >::value == 1
+ or detail::mem_type_class< U >::value == 2
+ or detail::mem_type_class< U >::value == 3,
+ "add_member(name, mem_ptr) overload expects numerical or string types only");
+ if (detail::mem_type_class< U >::value == 1)
{
_members.emplace_back(name, detail::offset_of(mem_ptr), detail::get_mem_type< U >::id());
}
- else if (std::is_same< typename std::remove_extent< U >::type, char >::value)
+ else if (detail::mem_type_class< U >::value == 2)
{
_members.emplace_back(name, detail::offset_of(mem_ptr), sizeof(U));
}
- else if (std::is_same< U, std::string >::value)
+ else if (detail::mem_type_class< U >::value == 3)
{
_members.emplace_back(name, detail::offset_of(mem_ptr));
}
@@ -177,14 +606,159 @@ public:
template < typename T, typename U >
void add_member(const std::string& name, U T::* mem_ptr, const Compound_Map* compound_map_ptr)
{
- assert(false); // not currently implemented
- static_assert(std::is_class< U >::value,
- "add_member(name, mem_ptr, compound_map_ptr) overload expects class types only ");
- _members.emplace_back(name, detail::offset_of(mem_ptr), compound_map_ptr);
+ static_assert(detail::mem_type_class< U >::value == 4,
+ "add_member(name, mem_ptr, compound_map_ptr) overload expects class types only");
+ _members.emplace_back(name, detail::offset_of(mem_ptr), compound_map_ptr, sizeof(U));
}
const std::vector< detail::Compound_Member_Description >& members() const { return _members; }
+ /**
+ * Get list of non-compound member types.
+ * @return A list of pairs; first: list of member ptrs followed; second: absolute offset.
+ */
+ typedef std::deque< std::pair< std::deque< const detail::Compound_Member_Description* >,
+ size_t > > member_ptr_list_type;
+ member_ptr_list_type get_member_ptr_list() const
+ {
+ member_ptr_list_type res;
+ for (const auto& e : members())
+ {
+ member_ptr_list_type::value_type p;
+ if (not e.is_compound())
+ {
+ member_ptr_list_type::value_type p;
+ p.first = { &e };
+ p.second = e.offset;
+ res.emplace_back(std::move(p));
+ }
+ else
+ {
+ auto tmp = e.compound_map_ptr->get_member_ptr_list();
+ for (auto& tmp_p : tmp)
+ {
+ member_ptr_list_type::value_type p;
+ p.first = std::move(tmp_p.first);
+ p.first.push_front(&e);
+ p.second = tmp_p.second + e.offset;
+ res.emplace_back(std::move(p));
+ }
+ }
+ }
+ return res;
+ }
+
+ /**
+ * Produce hdf5 compound datatype for this map.
+ * @param compound_size Extrenally-tracked compound size
+ * @param selector If empty, use all elements; if not empty, use only elements that pass selection.
+ * @fill If true, type offsets follow compound map offsets, allowing for gaps;
+ * if false: type offsets are minimal values required to fit members.
+ */
+ detail::HDF_Object_Holder build_type(
+ size_t compound_size,
+ std::function< bool(const detail::Compound_Member_Description&) > selector = nullptr,
+ bool fill = true) const
+ {
+ //std::clog << "===== build_type (" << (void*)this << ") start" << std::endl;
+ std::deque< std::tuple< std::string, detail::HDF_Object_Holder, size_t > > stype_id_holder_l;
+ size_t compressed_size = 0;
+ for (const auto& e : members())
+ {
+ detail::HDF_Object_Holder stype_id_holder;
+ if (selector and not e.is_compound() and not selector(e)) continue;
+ if (not e.is_compound())
+ {
+ stype_id_holder = e.get_type();
+ }
+ else
+ {
+ stype_id_holder = e.compound_map_ptr->build_type(e.compound_size, selector, fill);
+ }
+ if (stype_id_holder.id > 0)
+ {
+ stype_id_holder_l.emplace_back(
+ std::string(e.name),
+ std::move(stype_id_holder),
+ fill? e.offset : compressed_size);
+ compressed_size += H5Tget_size(std::get<1>(stype_id_holder_l.back()).id);
+ }
+ }
+ if (stype_id_holder_l.empty())
+ {
+ //std::clog << "===== build_type (" << (void*)this << ") empty" << std::endl;
+ return detail::HDF_Object_Holder();
+ }
+ //std::clog << "===== build_type (" << (void*)this << ") compound size: " << (fill? compound_size : compressed_size) << std::endl;
+ detail::HDF_Object_Holder res(
+ detail::Util::wrap(H5Tcreate, H5T_COMPOUND, fill? compound_size : compressed_size),
+ detail::Util::wrapped_closer(H5Tclose));
+ for (const auto& t : stype_id_holder_l)
+ {
+ //std::clog << "===== build_type (" << (void*)this << ") adding name=\"" << std::get<0>(t) << "\", offset=" << std::get<2>(t) << std::endl;
+ detail::Util::wrap(H5Tinsert, res.id, std::get<0>(t).c_str(), std::get<2>(t), std::get<1>(t).id);
+ }
+ //std::clog << "===== build_type (" << (void*)this << ") end" << std::endl;
+ return res;
+ }
+
+ static detail::HDF_Object_Holder build_flat_type(
+ const member_ptr_list_type::value_type::first_type& l, hid_t id = 0)
+ {
+ detail::HDF_Object_Holder res;
+ size_t sz = 0;
+ for (auto it = l.rbegin(); it != l.rend(); ++it)
+ {
+ const detail::Compound_Member_Description& e = **it;
+ assert((it == l.rbegin()) == (not e.is_compound()));
+ assert((it == l.rbegin()) == (res.id == 0));
+ assert((it == l.rbegin()) == (sz == 0));
+ if (it == l.rbegin())
+ {
+ if (id == 0)
+ {
+ res.load(e.get_type());
+ }
+ else
+ {
+ res.load(
+ detail::Util::wrap(H5Tcopy, id),
+ detail::Util::wrapped_closer(H5Tclose));
+ }
+ sz = detail::Util::wrap(H5Tget_size, res.id);
+ }
+ detail::HDF_Object_Holder tmp(
+ detail::Util::wrap(H5Tcreate, H5T_COMPOUND, sz),
+ detail::Util::wrapped_closer(H5Tclose));
+ detail::Util::wrap(H5Tinsert, tmp.id, e.name.c_str(), 0, res.id);
+ std::swap(res, tmp);
+ }
+ return res;
+ }
+
+ /**
+ * Get compound member from an existing compound type.
+ */
+ static detail::HDF_Object_Holder get_compound_member(
+ hid_t id, const member_ptr_list_type::value_type::first_type& l)
+ {
+ detail::HDF_Object_Holder res(
+ detail::Util::wrap(H5Tcopy, id),
+ detail::Util::wrapped_closer(H5Tclose));
+ for (auto it = l.begin(); it != l.end(); ++it)
+ {
+ const detail::Compound_Member_Description& e = **it;
+ assert(detail::Util::wrap(H5Tget_class, res.id) == H5T_COMPOUND);
+ unsigned idx = detail::Util::wrap(H5Tget_member_index, res.id, e.name.c_str());
+ detail::HDF_Object_Holder tmp(
+ detail::Util::wrap(H5Tget_member_type, res.id, idx),
+ detail::Util::wrapped_closer(H5Tclose));
+ std::swap(res, tmp);
+ }
+ assert(detail::Util::wrap(H5Tget_class, res.id) != H5T_COMPOUND);
+ return res;
+ }
+
private:
std::vector< detail::Compound_Member_Description > _members;
}; // Compound_Map
@@ -192,377 +766,673 @@ private:
namespace detail
{
-// TempSpec: reading numerics
-template < typename Out_Data_Type, typename Out_Data_Storage >
-struct Extent_Atomic_Reader
+// open object to be read, return dspace_id, file_dtype_id, reader fcn, reader fcn name, and is_ds
+struct Reader_Base
{
- void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
- const Compound_Map*, hid_t obj_id, hid_t,
- const std::string&, std::function< hid_t(hid_t) >,
- const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ Reader_Base(hid_t grp_id, const std::string& name)
{
- hid_t mem_type_id = get_mem_type< Out_Data_Type >::id();
- assert(mem_type_id != -1);
- int status = read_fcn(obj_id, mem_type_id, static_cast< void* >(dest.data()));
- if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name);
+ int status = Util::wrap(H5Aexists_by_name, grp_id, ".", name.c_str(), H5P_DEFAULT);
+ is_ds = status == 0;
+ if (is_ds)
+ {
+ obj_id_holder.load(
+ Util::wrap(H5Dopen, grp_id, name.c_str(), H5P_DEFAULT),
+ Util::wrapped_closer(H5Dclose));
+ dspace_id_holder.load(
+ Util::wrap(H5Dget_space, obj_id_holder.id),
+ Util::wrapped_closer(H5Sclose));
+ file_dtype_id_holder.load(
+ Util::wrap(H5Dget_type, obj_id_holder.id),
+ Util::wrapped_closer(H5Tclose));
+ reader = [&] (hid_t mem_dtype_id, void* dest) {
+ return Util::wrap(H5Dread, obj_id_holder.id, mem_dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, dest);
+ };
+ }
+ else
+ {
+ obj_id_holder.load(
+ Util::wrap(H5Aopen, grp_id, name.c_str(), H5P_DEFAULT),
+ Util::wrapped_closer(H5Aclose));
+ dspace_id_holder.load(
+ Util::wrap(H5Aget_space, obj_id_holder.id),
+ Util::wrapped_closer(H5Sclose));
+ file_dtype_id_holder.load(
+ Util::wrap(H5Aget_type, obj_id_holder.id),
+ Util::wrapped_closer(H5Tclose));
+ reader = [&] (hid_t mem_dtype_id, void* dest) {
+ return Util::wrap(H5Aread, obj_id_holder.id, mem_dtype_id, dest);
+ };
+ }
+ // dataspace class and size
+ dspace_class = Util::wrap(H5Sget_simple_extent_type, dspace_id_holder.id);
+ if (dspace_class == H5S_SCALAR)
+ {
+ dspace_size = 1;
+ }
+ else if (dspace_class == H5S_SIMPLE)
+ {
+ auto ndims = Util::wrap(H5Sget_simple_extent_ndims, dspace_id_holder.id);
+ if (ndims != 1) throw Exception("reading multi-dimensional extents is not supported");
+ hsize_t tmp;
+ Util::wrap(H5Sget_simple_extent_dims, dspace_id_holder.id, &tmp, nullptr);
+ dspace_size = tmp;
+ }
+ else
+ {
+ throw Exception("reading dataspaces other than SCALAR and SIMPLE is not supported");
+ }
+ // datatype class
+ file_dtype_class = Util::wrap(H5Tget_class, file_dtype_id_holder.id);
+ if (file_dtype_class == H5T_STRING)
+ {
+ file_dtype_is_vlen_str = Util::wrap(H5Tis_variable_str, file_dtype_id_holder.id);
+ }
+ else
+ {
+ file_dtype_is_vlen_str = false;
+ }
+ file_dtype_size = Util::wrap(H5Tget_size, file_dtype_id_holder.id);
}
-}; // struct Extent_Atomic_Reader
+ HDF_Object_Holder obj_id_holder;
+ HDF_Object_Holder dspace_id_holder;
+ HDF_Object_Holder file_dtype_id_holder;
+ std::function< void(hid_t, void*) > reader;
+ H5S_class_t dspace_class;
+ size_t dspace_size;
+ H5T_class_t file_dtype_class;
+ htri_t file_dtype_is_vlen_str;
+ size_t file_dtype_size;
+ bool is_ds;
+}; // struct Reader_Base
-// TempSpec: for reading strings
-template < typename Out_Data_Storage >
-struct Extent_Atomic_Reader< std::string, Out_Data_Storage >
+struct String_reader
{
- void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
- const Compound_Map*, hid_t obj_id, hid_t obj_space_id,
- const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
- const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ std::vector< std::string > operator () (
+ Reader_Base& reader_base,
+ const Compound_Map::member_ptr_list_type::value_type::first_type* mptr_l_ptr = nullptr) const
{
- int status;
- int file_type_id = get_type_fcn(obj_id);
- if (file_type_id < 0) throw Exception(loc_full_name + ": error in " + get_type_fcn_name);
- int is_vlen_str = H5Tis_variable_str(file_type_id);
- if (is_vlen_str < 0) throw Exception(loc_full_name + ": error in H5Tis_variable_str");
- hid_t mem_type_id = H5Tcopy(H5T_C_S1);
- if (mem_type_id < 0) throw Exception(loc_full_name + ": error in H5Tcopy");
- if (is_vlen_str) // stored as variable-length string
+ std::vector< std::string > res(reader_base.dspace_size);
+ assert((mptr_l_ptr != nullptr) == (reader_base.file_dtype_class == H5T_COMPOUND));
+ HDF_Object_Holder file_stype_id_holder;
+ hid_t file_stype_id = 0;
+ if (reader_base.file_dtype_class == H5T_COMPOUND)
{
- // compute mem_type
- status = H5Tset_size(mem_type_id, H5T_VARIABLE);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(variable)");
- // prepare buffer to receive data
- std::vector< char* > char_p_buff(dest.size(), nullptr);
- // perform the read
- status = read_fcn(obj_id, mem_type_id, static_cast< void* >(char_p_buff.data()));
- if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name);
- // transfer strings to destination
- for (size_t i = 0; i < dest.size(); ++i)
+ file_stype_id_holder = Compound_Map::get_compound_member(
+ reader_base.file_dtype_id_holder.id,
+ *mptr_l_ptr);
+ file_stype_id = file_stype_id_holder.id;
+ }
+ else
+ {
+ file_stype_id = reader_base.file_dtype_id_holder.id;
+ }
+ auto mem_type_wrapper = [&] (HDF_Object_Holder&& id_holder) {
+ HDF_Object_Holder tmp(std::move(id_holder));
+ return (mptr_l_ptr != nullptr
+ ? Compound_Map::build_flat_type(*mptr_l_ptr, tmp.id)
+ : std::move(tmp));
+ };
+ assert(Util::wrap(H5Tget_class, file_stype_id) != H5T_COMPOUND);
+ auto file_stype_class = Util::wrap(H5Tget_class, file_stype_id);
+ HDF_Object_Holder mem_dtype_id_holder;
+ if (file_stype_class == H5T_STRING) // stored as a string
+ {
+ if (Util::wrap(H5Tis_variable_str, file_stype_id)) // stored as a varlen string
+ {
+ // compute mem_type
+ mem_dtype_id_holder = mem_type_wrapper(Util::make_str_type(-1));
+ // prepare buffer to receive data
+ std::vector< char * > charptr_buff(res.size(), nullptr);
+ // perform the read
+ reader_base.reader(mem_dtype_id_holder.id, charptr_buff.data());
+ // transfer strings to destination
+ for (size_t i = 0; i < res.size(); ++i)
+ {
+ if (not charptr_buff[i]) throw Exception("read did not fill buffer");
+ res[i] = charptr_buff[i];
+ }
+ // reclaim memory allocated by libhdf5
+ Util::wrap(H5Dvlen_reclaim, mem_dtype_id_holder.id, reader_base.dspace_id_holder.id,
+ H5P_DEFAULT, charptr_buff.data());
+ }
+ else // stored as a fixlen string
+ {
+ // compute mem_type
+ size_t file_stype_size = Util::wrap(H5Tget_size, file_stype_id);
+ mem_dtype_id_holder = mem_type_wrapper(Util::make_str_type(file_stype_size + 1));
+ // prepare buffer to receieve data
+ std::vector< char > char_buff(res.size() * (file_stype_size + 1), '\0');
+ // perform the read
+ reader_base.reader(mem_dtype_id_holder.id, char_buff.data());
+ // transfer strings to destination
+ for (size_t i = 0; i < res.size(); ++i)
+ {
+ res[i] = std::string(&char_buff[i * (file_stype_size + 1)], file_stype_size);
+ // trim trailing '\0'-s
+ while (not res[i].empty() and res[i].back() == '\0')
+ {
+ res[i].resize(res[i].size() - 1);
+ }
+ }
+ }
+ }
+ else if (file_stype_class == H5T_INTEGER) // stored as an integer
+ {
+ if (Util::wrap(H5Tget_sign, file_stype_id) == H5T_SGN_NONE) // stored as an unsigned integer
+ {
+ // compute mem_type
+ mem_dtype_id_holder = mem_type_wrapper(
+ HDF_Object_Holder(get_mem_type< unsigned long long >::id(), nullptr));
+ // prepare buffer to read data
+ std::vector< unsigned long long > ull_buff(res.size());
+ // perform the read
+ reader_base.reader(mem_dtype_id_holder.id, ull_buff.data());
+ // transfer to destination
+ for (size_t i = 0; i < res.size(); ++i)
+ {
+ std::ostringstream oss;
+ oss << ull_buff[i];
+ res[i] = oss.str();
+ }
+ }
+ else // stored as a signed integer
{
- if (not char_p_buff[i]) throw Exception(loc_full_name + ": " + read_fcn_name + " did not fill buffer");
- dest[i] = char_p_buff[i];
+ // compute mem_type
+ mem_dtype_id_holder = mem_type_wrapper(
+ HDF_Object_Holder(get_mem_type< long long >::id(), nullptr));
+ // prepare buffer to read data
+ std::vector< long long > ll_buff(res.size());
+ // perform the read
+ reader_base.reader(mem_dtype_id_holder.id, ll_buff.data());
+ // transfer to destination
+ for (size_t i = 0; i < res.size(); ++i)
+ {
+ std::ostringstream oss;
+ oss << ll_buff[i];
+ res[i] = oss.str();
+ }
}
- // reclaim memory allocated by libhdf5
- status = H5Dvlen_reclaim(mem_type_id, obj_space_id, H5P_DEFAULT, char_p_buff.data());
- if (status < 0) throw Exception(loc_full_name + ": error in H5Dvlen_reclaim");
}
- else // stored as fixed-length string
+ else if (file_stype_class == H5T_FLOAT) // stored as a float
{
// compute mem_type
- size_t sz = H5Tget_size(file_type_id);
- if (sz == 0) throw Exception(loc_full_name + ": H5Tget_size returned 0; is this an error?!");
- status = H5Tset_size(mem_type_id, sz + 1);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(fixed)");
- // prepare buffer to receieve data
- std::vector< char > char_buff(dest.size() * (sz + 1));
+ mem_dtype_id_holder = mem_type_wrapper(
+ HDF_Object_Holder(get_mem_type< double >::id(), nullptr));
+ // prepare buffer to read data
+ std::vector< double > d_buff(res.size());
// perform the read
- status = read_fcn(obj_id, mem_type_id, static_cast< void* >(char_buff.data()));
- if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name);
- // transfer strings to destination
- for (size_t i = 0; i < dest.size(); ++i)
+ reader_base.reader(mem_dtype_id_holder.id, d_buff.data());
+ // transfer to destination
+ for (size_t i = 0; i < res.size(); ++i)
{
- dest[i] = std::string(&char_buff[i * (sz + 1)], sz);
+ std::ostringstream oss;
+ oss << d_buff[i];
+ res[i] = oss.str();
}
}
- status = H5Tclose(mem_type_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_type_id)");
- status = H5Tclose(file_type_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(file_type_id)");
+ return res;
}
-}; // struct Extent_Atomic_Reader< std::string >
+};
-template < typename Out_Data_Type, typename Out_Data_Storage >
-struct Extent_Compound_Reader
+// Reader_helper
+// Branch on memory type classes
+template < int, typename >
+struct Reader_helper;
+// numeric
+template < typename Data_Type >
+struct Reader_helper< 1, Data_Type >
{
- void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
- const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t,
- const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
- const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ void operator () (Reader_Base& reader_base, Data_Type * out) const
{
- int status;
- assert(compound_map_ptr);
- hid_t file_type_id = get_type_fcn(obj_id);
- if (file_type_id < 0) throw Exception(loc_full_name + ": error in " + get_type_fcn_name);
- H5T_class_t file_type_class = H5Tget_class(file_type_id);
- if (file_type_class == H5T_NO_CLASS) throw Exception(loc_full_name + ": error in H5Tget_class(file_type)");
- if (file_type_class != H5T_COMPOUND) throw Exception(loc_full_name + ": expected H5T_COMPOUND datatype");
-
- // pass 1
- // read numeric and char_array members only
- hid_t mem_type_id = H5Tcreate(H5T_COMPOUND, sizeof(Out_Data_Type));
- std::vector< hid_t > mem_stype_id_v;
- for (const auto& e : compound_map_ptr->members())
- {
- assert(not e.is_compound()); // not implemented yet
- if (e.is_string()) continue;
- int file_stype_idx = H5Tget_member_index(file_type_id, e.name.c_str());
- if (file_stype_idx < 0) throw Exception(loc_full_name + ": missing member \"" + e.name + "\"");
- hid_t file_stype_id = H5Tget_member_type(file_type_id, file_stype_idx);
- if (file_stype_id < 0) throw Exception(loc_full_name + ": error in H5Tget_member_type");
- H5T_class_t file_stype_class = H5Tget_class(file_stype_id);
- if (file_stype_class == H5T_NO_CLASS) throw Exception(loc_full_name + ": error in H5Tget_class(file_stype)");
- if (e.is_numeric())
+ assert(std::is_integral< Data_Type >::value or std::is_floating_point< Data_Type >::value);
+ hid_t mem_dtype_id = get_mem_type< Data_Type >::id();
+ reader_base.reader(mem_dtype_id, out);
+ }
+};
+// char array
+template < typename Data_Type >
+struct Reader_helper< 2, Data_Type >
+{
+ void operator () (Reader_Base& reader_base, Data_Type * out) const
+ {
+ if (reader_base.file_dtype_class == H5T_STRING
+ and not reader_base.file_dtype_is_vlen_str)
+ {
+ HDF_Object_Holder mem_dtype_id_holder(Util::make_str_type(sizeof(Data_Type)));
+ reader_base.reader(mem_dtype_id_holder.id, out);
+ }
+ else // conversion needed
+ {
+ auto tmp = String_reader()(reader_base);
+ for (size_t i = 0; i < tmp.size(); ++i)
+ {
+ std::memset(&out[i][0], '\0', sizeof(Data_Type));
+ std::memcpy(&out[i][0], tmp[i].data(), std::min(tmp[i].size(), sizeof(Data_Type) - 1));
+ }
+ }
+ }
+};
+// string
+template < typename Data_Type >
+struct Reader_helper< 3, Data_Type >
+{
+ void operator () (Reader_Base& reader_base, Data_Type * out) const
+ {
+ static_assert(std::is_same< Data_Type, std::string >::value, "Data_Type not std::string");
+ auto tmp = String_reader()(reader_base);
+ for (size_t i = 0; i < tmp.size(); ++i)
+ {
+ std::swap(out[i], tmp[i]);
+ }
+ }
+};
+// compound
+template < typename Data_Type >
+struct Reader_helper< 4, Data_Type >
+{
+ void operator () (Reader_Base& reader_base, Data_Type * out, const Compound_Map & cm) const
+ {
+ // get member list
+ auto mptr_l = cm.get_member_ptr_list();
+ // go through members, check they exist, decide if they need conversion
+ std::set< const detail::Compound_Member_Description * > conversion_needed_s;
+ for (const auto& p : mptr_l)
+ {
+ HDF_Object_Holder file_stype_id_holder(
+ Compound_Map::get_compound_member(reader_base.file_dtype_id_holder.id, p.first));
+ if (p.first.back()->is_string()
+ or (p.first.back()->is_char_array()
+ and Util::wrap(H5Tget_class, file_stype_id_holder.id) == H5T_STRING
+ and Util::wrap(H5Tis_variable_str, file_stype_id_holder.id)))
{
- if (file_stype_class != H5T_INTEGER and file_stype_class != H5T_FLOAT)
- throw Exception(loc_full_name + ": member \"" + e.name + "\" is numeric, but file_stype is not numeric");
- status = H5Tinsert(mem_type_id, e.name.c_str(), e.offset, e.numeric_type_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tinsert(\"" + e.name + "\")");
+ conversion_needed_s.insert(p.first.back());
}
+ }
+ // read all members that do not need conversion all-at-once
+ auto implicit_conversion = [&] (const detail::Compound_Member_Description& e) {
+ return conversion_needed_s.count(&e) == 0;
+ };
+ HDF_Object_Holder mem_dtype_id_holder(cm.build_type(sizeof(Data_Type), implicit_conversion, true));
+ if (mem_dtype_id_holder.id > 0)
+ {
+ reader_base.reader(mem_dtype_id_holder.id, out);
+ }
+ // read members that need conversion one-by-one
+ for (const auto& p : mptr_l)
+ {
+ const detail::Compound_Member_Description& e = *p.first.back();
+ if (implicit_conversion(e)) continue;
+ // read member into vector of strings
+ auto tmp = String_reader()(reader_base, &p.first);
+ assert(tmp.size() == reader_base.dspace_size);
+ // write it to destination
+ assert(e.is_char_array() or e.is_string());
if (e.is_char_array())
{
- if (file_stype_class != H5T_STRING)
- throw Exception(loc_full_name + ": member \"" + e.name + "\" is char_array, but file_stype is not H5T_STRING");
- status = H5Tis_variable_str(file_stype_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tis_variable_str(\"" + e.name + "\")");
- if (status) throw Exception(loc_full_name + ": member \"" + e.name + "\" is a char_array, but file_stype is a variable len string");
- //size_t file_stype_size = H5Tget_size(file_stype_id);
- //if (file_stype_size == 0) throw Exception(loc_full_name + ": H5Tget_size(\"" + e.name + "\") returned 0");
- hid_t mem_stype_id = H5Tcopy(H5T_C_S1);
- if (mem_stype_id < 0) throw Exception(loc_full_name + ": member \"" + e.name + "\": error in H5Tcopy");
- status = H5Tset_size(mem_stype_id, e.char_array_size);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(\"" + e.name + "\")");
- status = H5Tinsert(mem_type_id, e.name.c_str(), e.offset, mem_stype_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tinsert(\"" + e.name + "\")");
- mem_stype_id_v.push_back(mem_stype_id);
+ for (size_t i = 0; i < tmp.size(); ++i)
+ {
+ std::memset(reinterpret_cast< char * >(&out[i]) + p.second, '\0', e.char_array_size);
+ std::memcpy(reinterpret_cast< char * >(&out[i]) + p.second,
+ tmp[i].data(),
+ std::min(tmp[i].size(), e.char_array_size - 1));
+ }
}
- status = H5Tclose(file_stype_id);
- if (status < 0) throw Exception(loc_full_name + ": member \"" + e.name + "\": error in H5Tclose(file_stype)");
- }
- // perform the actual read
- status = read_fcn(obj_id, mem_type_id, static_cast< void* >(dest.data()));
- if (status < 0) throw Exception(loc_full_name + ": pass 1: error in " + read_fcn_name);
- // release the memory types
- for (const auto& mem_stype_id : mem_stype_id_v)
- {
- status = H5Tclose(mem_stype_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_stype)");
- }
- mem_stype_id_v.clear();
- status = H5Tclose(mem_type_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_type)");
-
- // pass 2
- // read strings
- for (const auto& e : compound_map_ptr->members())
- {
- assert(not e.is_compound()); // not implemented yet
- if (e.is_numeric() or e.is_char_array()) continue;
- //TODO
- assert(false);
- }
-
- status = H5Tclose(file_type_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(file_type_id)");
- }
-}; //struct Extent_Compound_Reader
-
-// TempSpec: read extent of atomic types
-template < typename Out_Data_Type, typename Out_Data_Storage, bool = true >
-struct Extent_Reader_as_atomic
- : Extent_Atomic_Reader< Out_Data_Type, Out_Data_Storage >
-{};
-
-// TempSpec: read extent of compound types
-template < typename Out_Data_Type, typename Out_Data_Storage >
-struct Extent_Reader_as_atomic< Out_Data_Type, Out_Data_Storage, false >
- : Extent_Compound_Reader< Out_Data_Type, Out_Data_Storage >
-{};
-
-// branch on atomic/compound destination
-template < typename Out_Data_Type, typename Out_Data_Storage >
-struct Extent_Reader
- : public Extent_Reader_as_atomic< Out_Data_Type, Out_Data_Storage, read_as_atomic< Out_Data_Type >::value >
-{};
-
-template < typename, typename, bool >
-struct Object_Reader_impl;
-
-// TempSpec: reading scalars
-template < typename Out_Data_Type >
-struct Object_Reader_impl< Out_Data_Type, Out_Data_Type, true >
-{
- void operator () (const std::string& loc_full_name, Out_Data_Type& dest,
- const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t obj_space_id,
- const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
- const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
- {
- H5S_class_t obj_class_t = H5Sget_simple_extent_type(obj_space_id);
- if (obj_class_t == H5S_NO_CLASS) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_type");
- if (obj_class_t != H5S_SCALAR)
- throw Exception(loc_full_name + ": reading as scalar, but dataspace not H5S_SCALAR");
- std::vector< Out_Data_Type > tmp(1);
- Extent_Reader< Out_Data_Type, std::vector< Out_Data_Type > >()(
- loc_full_name, tmp, compound_map_ptr, obj_id, obj_space_id,
- get_type_fcn_name, get_type_fcn,
- read_fcn_name, read_fcn);
- dest = std::move(tmp[0]);
+ else if (e.is_string())
+ {
+ for (size_t i = 0; i < tmp.size(); ++i)
+ {
+ std::swap(
+ *reinterpret_cast< std::string * >(reinterpret_cast< char * >(&out[i]) + p.second),
+ tmp[i]);
+ }
+ }
+ }
}
};
-// TempSpec: reading vectors
-template < typename Out_Data_Type, typename Out_Data_Storage >
-struct Object_Reader_impl< Out_Data_Type, Out_Data_Storage, false >
-{
- void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
- const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t obj_space_id,
- const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
- const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
- {
- H5S_class_t obj_class_t = H5Sget_simple_extent_type(obj_space_id);
- if (obj_class_t == H5S_NO_CLASS) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_type");
- if (obj_class_t != H5S_SIMPLE)
- throw Exception(loc_full_name + ": reading as vector, but dataspace not H5S_SIMPLE");
- int status = H5Sget_simple_extent_dims(obj_space_id, nullptr, nullptr);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_dims");
- if (status != 1) throw Exception(loc_full_name + ": expected extent of dimension 1");
- hsize_t sz;
- H5Sget_simple_extent_dims(obj_space_id, &sz, nullptr);
- dest.clear();
- dest.resize(sz);
- Extent_Reader< Out_Data_Type, Out_Data_Storage >()(
- loc_full_name, dest, compound_map_ptr, obj_id, obj_space_id,
- get_type_fcn_name, get_type_fcn,
- read_fcn_name, read_fcn);
+template < typename Data_Type >
+struct Reader
+{
+ template < typename ...Args >
+ void operator () (hid_t grp_id, const std::string& name,
+ Data_Type & out, Args&& ...args) const
+ {
+ Reader_Base reader_base(grp_id, name);
+ if (reader_base.dspace_size == 1)
+ {
+ Reader_helper< mem_type_class< Data_Type >::value, Data_Type >()(
+ reader_base, &out, std::forward< Args >(args)...);
+ }
+ else if (std::is_same< Data_Type, std::string >::value
+ and reader_base.file_dtype_class == H5T_STRING
+ and not reader_base.file_dtype_is_vlen_str
+ and reader_base.file_dtype_size == 1)
+ {
+ std::vector< std::array< char, 1 > > char_buff(reader_base.dspace_size);
+ Reader_helper< 2, std::array< char, 1 > >()(
+ reader_base, char_buff.data(), std::forward< Args >(args)...);
+ reinterpret_cast< std::string& >(out).assign(&char_buff[0][0], reader_base.dspace_size);
+ }
+ else
+ {
+ throw Exception("reading scalar, but dataspace size is not 1");
+ }
+ }
+};
+template < typename Data_Type >
+struct Reader< std::vector< Data_Type > >
+{
+ template < typename ...Args >
+ void operator () (hid_t grp_id, const std::string& name,
+ std::vector< Data_Type> & out, Args&& ...args) const
+ {
+ Reader_Base reader_base(grp_id, name);
+ out.clear();
+ out.resize(reader_base.dspace_size);
+ Reader_helper< mem_type_class< Data_Type >::value, Data_Type >()(
+ reader_base, out.data(), std::forward< Args >(args)...);
}
};
-// TempMetaFunc: split scalar & vector reading branches
-template < typename Out_Data_Type, typename Out_Data_Storage >
-struct Object_Reader
- : public Object_Reader_impl< Out_Data_Type, Out_Data_Storage, std::is_same< Out_Data_Type, Out_Data_Storage >::value > {};
-
-// open object and object space, then delegate
-template < typename Out_Data_Type, typename Out_Data_Storage >
-void read_obj_helper(const std::string& loc_full_name, Out_Data_Storage& dest, const Compound_Map* compound_map_ptr,
- const std::string& open_fcn_name, std::function< hid_t(void) > open_fcn,
- const std::string& close_fcn_name, std::function< herr_t(hid_t) > close_fcn,
- const std::string& get_space_fcn_name, std::function< hid_t(hid_t) > get_space_fcn,
- const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
- const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
-{
- int status;
- // open object
- hid_t obj_id = open_fcn();
- if (obj_id < 0) throw Exception(loc_full_name + ": error in " + open_fcn_name);
- // open object space, check reading ode matches storage mode (scalar/vector)
- hid_t obj_space_id = get_space_fcn(obj_id);
- if (obj_space_id < 0) throw Exception(loc_full_name + ": error in " + get_space_fcn_name);
- // read object
- Object_Reader< Out_Data_Type, Out_Data_Storage >()(
- loc_full_name, dest, compound_map_ptr, obj_id, obj_space_id,
- get_type_fcn_name, get_type_fcn,
- read_fcn_name, read_fcn);
- // close object space & object
- status = H5Sclose(obj_space_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Sclose");
- status = close_fcn(obj_id);
- if (status < 0) throw Exception(loc_full_name + ": error in " + close_fcn_name);
-}
+// Writer_helper_base
+// Common base for Write_helper atomic/compound
+struct Writer_helper_base
+{
+ static HDF_Object_Holder create(hid_t grp_id, const std::string& loc_name, bool as_ds,
+ hid_t dspace_id, hid_t file_dtype_id)
+ {
+ HDF_Object_Holder obj_id_holder;
+ if (as_ds)
+ {
+ obj_id_holder.load(
+ Util::wrap(H5Dcreate2, grp_id, loc_name.c_str(), file_dtype_id, dspace_id,
+ H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT),
+ Util::wrapped_closer(H5Dclose));
+ }
+ else
+ {
+ obj_id_holder.load(
+ Util::wrap(H5Acreate2, grp_id, loc_name.c_str(), file_dtype_id, dspace_id,
+ H5P_DEFAULT, H5P_DEFAULT),
+ Util::wrapped_closer(H5Aclose));
+ }
+ return obj_id_holder;
+ }
+ static void write(hid_t obj_id, bool as_ds, hid_t mem_dtype_id, const void* in)
+ {
+ if (as_ds)
+ {
+ Util::wrap(H5Dwrite, obj_id, mem_dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, in);
+ }
+ else
+ {
+ Util::wrap(H5Awrite, obj_id, mem_dtype_id, in);
+ }
+ }
+ static void create_and_write(hid_t grp_id, const std::string& loc_name, bool as_ds,
+ hid_t dspace_id, hid_t mem_dtype_id, hid_t file_dtype_id,
+ const void* in)
+ {
+ HDF_Object_Holder obj_id_holder(create(grp_id, loc_name, as_ds, dspace_id, file_dtype_id));
+ write(obj_id_holder.id, as_ds, mem_dtype_id, in);
+ }
+}; // struct Writer_helper_base
+
+// Writer_helper
+// Branch on memory type classes
+template < int, typename >
+struct Writer_helper;
+
+// numeric
+template < typename In_Data_Type >
+struct Writer_helper< 1, In_Data_Type >
+ : public Writer_helper_base
+{
+ void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds,
+ hid_t dspace_id, size_t,
+ const In_Data_Type * in, hid_t file_dtype_id = 0) const
+ {
+ assert(std::is_integral< In_Data_Type >::value or std::is_floating_point< In_Data_Type >::value);
+ hid_t mem_dtype_id = get_mem_type< In_Data_Type >::id();
+ if (file_dtype_id == 0)
+ {
+ file_dtype_id = mem_dtype_id;
+ }
+ Writer_helper_base::create_and_write(
+ grp_id, loc_name, as_ds,
+ dspace_id, mem_dtype_id, file_dtype_id,
+ in);
+ }
+};
-// determine if address is attribute or dataset, then delegate
-template < typename Out_Data_Type, typename Out_Data_Storage >
-void read_addr(hid_t root_id, const std::string& loc_path, const std::string& loc_name,
- Out_Data_Storage& dest, const Compound_Map* compound_map_ptr)
-{
- assert(root_id > 0);
- std::string loc_full_name = loc_path + loc_name;
- // determine if object is an attribute; otherwise, assume it's a dataset
- int status;
- status = H5Aexists_by_name(root_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Aexists_by_name");
- bool is_attr = status > 0;
- if (is_attr)
- {
- read_obj_helper< Out_Data_Type, Out_Data_Storage >(
- loc_full_name, dest, compound_map_ptr,
- "H5Aopen_by_name",
- [&root_id, &loc_path, &loc_name] ()
+// fixed-length string
+template < typename In_Data_Type >
+struct Writer_helper< 2, In_Data_Type >
+ : public Writer_helper_base
+{
+ void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds,
+ hid_t dspace_id, size_t sz,
+ const In_Data_Type * in, hid_t file_dtype_id = 0) const
+ {
+ HDF_Object_Holder mem_dtype_id_holder;
+ HDF_Object_Holder file_dtype_id_holder;
+ std::vector< const char * > charptr_buff;
+ const void * vptr_in = in;
+ if (file_dtype_id >= 0)
+ {
+ mem_dtype_id_holder = Util::make_str_type(sizeof(In_Data_Type));
+ if (file_dtype_id == 0)
{
- return H5Aopen_by_name(root_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT, H5P_DEFAULT);
- },
- "H5Aclose", &H5Aclose,
- "H5Aget_space", &H5Aget_space,
- "H5Aget_type", &H5Aget_type,
- "H5Aread",
- [] (hid_t id, hid_t mem_type_id, void* dest_p)
+ file_dtype_id = mem_dtype_id_holder.id;
+ }
+ else // file_dtype_id > 0
{
- return H5Aread(id, mem_type_id, dest_p);
- });
+ file_dtype_id_holder = Util::make_str_type(file_dtype_id);
+ file_dtype_id = file_dtype_id_holder.id;
+ }
+ }
+ else // file_dtype_id < 0: write as varlen strings
+ {
+ mem_dtype_id_holder = Util::make_str_type(-1);
+ file_dtype_id = mem_dtype_id_holder.id;
+ // prepare array of pointers
+ charptr_buff.resize(sz);
+ for (hsize_t i = 0; i < sz; ++i)
+ {
+ charptr_buff[i] = &in[i][0];
+ }
+ vptr_in = charptr_buff.data();
+ }
+ Writer_helper_base::create_and_write(
+ grp_id, loc_name, as_ds,
+ dspace_id, mem_dtype_id_holder.id, file_dtype_id,
+ vptr_in);
}
- else
+};
+
+// variable-length string
+template <>
+struct Writer_helper< 3, std::string >
+ : public Writer_helper_base
+{
+ void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds,
+ hid_t dspace_id, size_t sz,
+ const std::string * in, hid_t file_dtype_id = -1) const
{
- read_obj_helper< Out_Data_Type, Out_Data_Storage >(
- loc_full_name, dest, compound_map_ptr,
- "H5Dopen",
- [&root_id, &loc_full_name] ()
+ HDF_Object_Holder mem_dtype_id_holder;
+ std::vector< const char * > charptr_buff;
+ std::vector< char > char_buff;
+ const void * vptr_in;
+ if (file_dtype_id == -1) // varlen to varlen
+ {
+ mem_dtype_id_holder = Util::make_str_type(-1);
+ charptr_buff.resize(sz);
+ for (hsize_t i = 0; i < sz; ++i)
{
- return H5Dopen(root_id, loc_full_name.c_str(), H5P_DEFAULT);
- },
- "H5Dclose", &H5Dclose,
- "H5Dget_space", &H5Dget_space,
- "H5Dget_type", &H5Dget_type,
- "H5Dread",
- [] (hid_t id, hid_t mem_type_id, void* dest_p)
+ charptr_buff[i] = in[i].data();
+ }
+ vptr_in = charptr_buff.data();
+ }
+ else // varlen to fixlen
+ {
+ assert(file_dtype_id > 0 or sz == 1); // file_dtype_id == 0 only allowed for single strings
+ size_t slen = file_dtype_id > 0 ? file_dtype_id : in[0].size() + 1;
+ assert(slen <= std::numeric_limits< long >::max());
+ mem_dtype_id_holder = Util::make_str_type(slen);
+ char_buff.resize(sz * slen);
+ for (hsize_t i = 0; i < sz; ++i)
{
- return H5Dread(id, mem_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, dest_p);
- });
+ for (size_t j = 0; j < slen - 1; ++j)
+ {
+ char_buff[i * slen + j] = j < in[i].size()? in[i][j] : '\0';
+ }
+ char_buff[i * slen + slen - 1] = '\0';
+ }
+ vptr_in = char_buff.data();
+ }
+ Writer_helper_base::create_and_write(
+ grp_id, loc_name, as_ds,
+ dspace_id, mem_dtype_id_holder.id, mem_dtype_id_holder.id,
+ vptr_in);
}
-} // read_addr
+};
-// TempSpec: for atomic types
-template < typename Out_Data_Type, bool = true >
-struct Reader_as_atomic
+// compound
+template < typename In_Data_Type >
+struct Writer_helper< 4, In_Data_Type >
+ : public Writer_helper_base
{
- template < typename Out_Data_Storage >
- void operator () (hid_t root_id, const std::string& loc_path, const std::string& loc_name,
- Out_Data_Storage& dest)
+ void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds,
+ hid_t dspace_id, size_t sz,
+ const In_Data_Type * in, const Compound_Map& cm) const
{
- static_assert(can_read< Out_Data_Type >::value,
- "Reader_impl<Out_Data_Type,true>: expected a readable destination");
- static_assert(read_as_atomic< Out_Data_Type >::value,
- "Reader_impl<Out_Data_Type,true>: expected a type readable as atomic");
- read_addr< Out_Data_Type, Out_Data_Storage >(root_id, loc_path, loc_name, dest, nullptr);
+ HDF_Object_Holder obj_id_holder;
+ // create object
+ {
+ // create the file type
+ HDF_Object_Holder file_dtype_id_holder(
+ cm.build_type(sizeof(In_Data_Type), nullptr, false));
+ obj_id_holder = Writer_helper_base::create(
+ grp_id, loc_name, as_ds,
+ dspace_id, file_dtype_id_holder.id);
+ }
+ // define functor that selects members which can be written with implicit conversion
+ auto implicit_conversion = [] (const detail::Compound_Member_Description& e) {
+ return (e.is_numeric()
+ or e.is_char_array());
+ };
+ // write fields which do not need conversion, all-in-one
+ {
+ HDF_Object_Holder mem_dtype_id_holder(
+ cm.build_type(sizeof(In_Data_Type), implicit_conversion, true));
+ Writer_helper_base::write(obj_id_holder.id, as_ds, mem_dtype_id_holder.id, in);
+ }
+ // write fields which need conversion, one-by-one
+ {
+ auto mptr_l = cm.get_member_ptr_list();
+ for (const auto& p : mptr_l)
+ {
+ const detail::Compound_Member_Description& e = *p.first.back();
+ if (implicit_conversion(e)) continue;
+ if (not as_ds) throw Exception("string in compound is supported in datasets, but not attributes");
+ size_t mem_offset = p.second;
+ if (e.is_string())
+ {
+ // prepare memory vector of char*
+ std::vector< const char * > charptr_buff(sz);
+ for (size_t i = 0; i < sz; ++i)
+ {
+ charptr_buff[i] = reinterpret_cast< const std::string * >(
+ reinterpret_cast< const char * >(&in[i]) + mem_offset)->data();
+ }
+ // create flat hdf5 type
+ //HDF_Object_Holder mem_dtype_id_holder(Compound_Map::build_flat_type(p.first));
+ HDF_Object_Holder mem_dtype_id_holder(
+ cm.build_type(sizeof(In_Data_Type),
+ [&e] (const detail::Compound_Member_Description& _e) {
+ return &_e == &e;
+ },
+ false));
+ Writer_helper_base::write(obj_id_holder.id, as_ds, mem_dtype_id_holder.id, charptr_buff.data());
+ }
+ }
+ }
}
};
-// TempSpec: for compound types
-template < typename Out_Data_Type >
-struct Reader_as_atomic< Out_Data_Type, false >
+// Writer
+// Struct branches on data argument type:
+// if std::vector, it writes a simple extent;
+// if not std::vector, it writes a scalar.
+template < typename In_Data_Type >
+struct Writer
{
- template < typename Out_Data_Storage >
- void operator () (hid_t root_id, const std::string& loc_path, const std::string& loc_name,
- Out_Data_Storage& dest, const Compound_Map* compound_map_ptr)
+ template < typename ...Args >
+ void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds,
+ const In_Data_Type & in, Args&& ...args) const
{
- static_assert(can_read< Out_Data_Type >::value,
- "Reader_impl<Out_Data_Type,false>: expected a readable destination");
- static_assert(not read_as_atomic< Out_Data_Type >::value,
- "Reader_impl<Out_Data_Type,false>: expected a type readable as compound");
- read_addr< Out_Data_Type, Out_Data_Storage >(root_id, loc_path, loc_name, dest, compound_map_ptr);
+ // create dataspace
+ HDF_Object_Holder dspace_id_holder(
+ Util::wrap(H5Screate, H5S_SCALAR),
+ Util::wrapped_closer(H5Sclose));
+ Writer_helper< mem_type_class< In_Data_Type >::value, In_Data_Type >()(
+ grp_id, loc_name, as_ds,
+ dspace_id_holder.id, 1,
+ &in, std::forward< Args >(args)...);
}
};
-template < typename Out_Data_Type >
-struct Reader : public Reader_as_atomic< Out_Data_Type, read_as_atomic< Out_Data_Type >::value >
-{};
+
+template < typename In_Data_Type >
+struct Writer< std::vector< In_Data_Type > >
+{
+ template < typename ...Args >
+ void operator () (hid_t grp_id, const std::string& loc_name, bool as_ds,
+ const std::vector< In_Data_Type > & in, Args&& ...args) const
+ {
+ assert(not in.empty());
+ // create dataspace
+ hsize_t sz = in.size();
+ HDF_Object_Holder dspace_id_holder(
+ Util::wrap(H5Screate_simple, 1, &sz, nullptr),
+ Util::wrapped_closer(H5Sclose));
+ Writer_helper< mem_type_class< In_Data_Type >::value, In_Data_Type >()(
+ grp_id, loc_name, as_ds,
+ dspace_id_holder.id, sz,
+ in.data(), std::forward< Args >(args)...);
+ }
+};
} // namespace detail
/// An HDF5 file reader
-class File_Reader
+class File
{
public:
- File_Reader() : _file_id(0) {}
- File_Reader(const std::string& file_name) : _file_id(0) { open(file_name); }
- File_Reader(const File_Reader&) = delete;
- File_Reader& operator = (const File_Reader&) = delete;
- ~File_Reader() { if (is_open()) close(); }
+ File() : _file_id(0) {}
+ File(const std::string& file_name, bool rw = false) : _file_id(0) { open(file_name, rw); }
+ File(const File&) = delete;
+ File& operator = (const File&) = delete;
+ ~File() { if (is_open()) close(); }
bool is_open() const { return _file_id > 0; }
+ bool is_rw() const { return _rw; }
const std::string& file_name() const { return _file_name; }
- void open(const std::string& file_name)
+ void create(const std::string& file_name, bool truncate = false)
{
assert(not is_open());
_file_name = file_name;
- _file_id = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+ _rw = true;
+ _file_id = H5Fcreate(file_name.c_str(), truncate? H5F_ACC_TRUNC : H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT);
+ if (not is_open()) throw Exception(_file_name + ": error in H5Fcreate");
+ }
+ void open(const std::string& file_name, bool rw = false)
+ {
+ assert(not is_open());
+ _file_name = file_name;
+ _rw = rw;
+ _file_id = H5Fopen(file_name.c_str(), not rw? H5F_ACC_RDONLY : H5F_ACC_RDWR, H5P_DEFAULT);
if (not is_open()) throw Exception(_file_name + ": error in H5Fopen");
}
void close()
@@ -574,9 +1444,55 @@ public:
_file_id = 0;
_file_name.clear();
}
+ static bool is_valid_file(const std::string& file_name)
+ {
+ std::ifstream ifs(file_name);
+ if (not ifs) return false;
+ (void)ifs.peek();
+ if (not ifs) return false;
+ ifs.close();
+ auto status = H5Fis_hdf5(file_name.c_str());
+ if (status <= 0) return 0;
+ auto file_id = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT); // error if file is truncated
+ if (file_id < 0) return 0;
+ status = H5Fclose(file_id);
+ if (status < 0) throw Exception(file_name + ": error in H5Fclose");
+ return 1;
+ }
- /// Determine if address is an attribute or dataset
- bool exists(const std::string& loc_full_name) const
+ static int get_object_count()
+ {
+ return H5Fget_obj_count(H5F_OBJ_ALL, H5F_OBJ_ALL);
+ }
+
+ /// Check if a group exists
+ bool group_exists(const std::string& loc_full_name) const
+ {
+ assert(is_open());
+ assert(not loc_full_name.empty() and loc_full_name[0] == '/');
+ std::string loc_path;
+ std::string loc_name;
+ std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+ // check all path elements exist, except for what is to the right of the last '/'
+ // sets active path
+ if (not path_exists(loc_path)) return false;
+ return check_object_type(loc_full_name, H5O_TYPE_GROUP);
+ }
+ /// Check if a dataset exists
+ bool dataset_exists(const std::string& loc_full_name) const
+ {
+ assert(is_open());
+ assert(not loc_full_name.empty() and loc_full_name[0] == '/');
+ std::string loc_path;
+ std::string loc_name;
+ std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+ // check all path elements exist, except for what is to the right of the last '/'
+ // sets active path
+ if (not path_exists(loc_path)) return false;
+ return check_object_type(loc_full_name, H5O_TYPE_DATASET);
+ }
+ /// Check if attribute exists
+ bool attribute_exists(const std::string& loc_full_name) const
{
assert(is_open());
assert(not loc_full_name.empty() and loc_full_name[0] == '/');
@@ -585,45 +1501,173 @@ public:
std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
int status;
// check all path elements exist, except for what is to the right of the last '/'
- size_t pos = 0;
- while (true)
- {
- ++pos;
- pos = loc_full_name.find('/', pos);
- if (pos == std::string::npos) break;
- std::string tmp = loc_full_name.substr(0, pos);
- status = H5Lexists(_file_id, tmp.c_str(), H5P_DEFAULT);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Lexists");
- if (not status) return false;
- status = H5Oexists_by_name(_file_id, tmp.c_str(), H5P_DEFAULT);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Oexists_by_name");
- if (not status) return false;
- }
+ // sets active path
+ if (not path_exists(loc_path)) return false;
+ // check if target is an attribute
status = H5Aexists_by_name(_file_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Aexists_by_name");
- if (status) return true;
- // not an attribute: try to open as a dataset
- hid_t ds_id = H5Dopen(_file_id, loc_full_name.c_str(), H5P_DEFAULT);
- if (ds_id < 0) return false;
- status = H5Dclose(ds_id);
- if (status < 0) throw Exception(loc_full_name + ": error in H5Dclose");
- return true;
+ if (status < 0) throw Exception("error in H5Aexists_by_name");
+ return status > 0;
+ }
+ bool exists(const std::string& loc_full_name) const
+ {
+ return attribute_exists(loc_full_name) or dataset_exists(loc_full_name);
}
+
/// Read attribute or dataset at address
- template < typename Out_Data_Type, typename ...Args >
- void read(const std::string& loc_full_name, Args&& ...args) const
+ template < typename Data_Storage, typename ...Args >
+ void read(const std::string& loc_full_name, Data_Storage& out, Args&& ...args) const
{
assert(is_open());
assert(not loc_full_name.empty() and loc_full_name[0] == '/');
std::string loc_path;
std::string loc_name;
std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
- detail::Reader< Out_Data_Type >()(_file_id, loc_path, loc_name, std::forward< Args >(args)...);
+ Exception::active_path() = loc_full_name;
+ detail::HDF_Object_Holder grp_id_holder(
+ detail::Util::wrap(H5Oopen, _file_id, loc_path.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Oclose));
+ detail::Reader< Data_Storage >()(grp_id_holder.id, loc_name,
+ out, std::forward< Args >(args)...);
}
+ /// Write attribute or dataset
+ template < typename In_Data_Storage, typename ...Args >
+ void write(const std::string& loc_full_name, bool as_ds, const In_Data_Storage& in, Args&& ...args) const
+ {
+ assert(is_open());
+ assert(is_rw());
+ assert(not loc_full_name.empty() and loc_full_name[0] == '/');
+ assert(not exists(loc_full_name));
+ std::string loc_path;
+ std::string loc_name;
+ std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+ Exception::active_path() = loc_full_name;
+ detail::HDF_Object_Holder grp_id_holder;
+ std::string grp_path = loc_path != "/"? loc_path.substr(0, loc_path.size() - 1) : "/";
+ if (group_exists(grp_path) or dataset_exists(grp_path))
+ {
+ grp_id_holder.load(
+ detail::Util::wrap(H5Oopen, _file_id, grp_path.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Oclose));
+ }
+ else
+ {
+ detail::HDF_Object_Holder lcpl_id_holder(
+ detail::Util::wrap(H5Pcreate, H5P_LINK_CREATE),
+ detail::Util::wrapped_closer(H5Pclose));
+ detail::Util::wrap(H5Pset_create_intermediate_group, lcpl_id_holder.id, 1);
+ grp_id_holder.load(
+ detail::Util::wrap(H5Gcreate2, _file_id, grp_path.c_str(), lcpl_id_holder.id, H5P_DEFAULT, H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Gclose));
+ }
+ detail::Writer< In_Data_Storage >()(grp_id_holder.id, loc_name, as_ds, in, std::forward< Args >(args)...);
+ }
+ template < typename In_Data_Storage, typename ...Args >
+ void write_dataset(const std::string& loc_full_name, const In_Data_Storage& in, Args&& ...args) const
+ {
+ write(loc_full_name, true, in, std::forward< Args >(args)...);
+ }
+ template < typename In_Data_Storage, typename ...Args >
+ void write_attribute(const std::string& loc_full_name, const In_Data_Storage& in, Args&& ...args) const
+ {
+ write(loc_full_name, false, in, std::forward< Args >(args)...);
+ }
+
+ /// Return a list of names (groups/datasets) in the given group
+ std::vector< std::string > list_group(const std::string& group_full_name) const
+ {
+ std::vector< std::string > res;
+ Exception::active_path() = group_full_name;
+ assert(group_exists(group_full_name));
+ detail::HDF_Object_Holder g_id_holder(
+ detail::Util::wrap(H5Gopen2, _file_id, group_full_name.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Gclose));
+ H5G_info_t g_info;
+ detail::Util::wrap(H5Gget_info, g_id_holder.id, &g_info);
+ res.resize(g_info.nlinks);
+ for (unsigned i = 0; i < res.size(); ++i)
+ {
+ // find size first
+ long sz1 = detail::Util::wrap(H5Lget_name_by_idx, _file_id, group_full_name.c_str(),
+ H5_INDEX_NAME, H5_ITER_NATIVE, i, nullptr, 0, H5P_DEFAULT);
+ res[i].resize(sz1);
+ long sz2 = detail::Util::wrap(H5Lget_name_by_idx, _file_id, group_full_name.c_str(),
+ H5_INDEX_NAME, H5_ITER_NATIVE, i, &res[i][0], sz1+1, H5P_DEFAULT);
+ if (sz1 != sz2) throw Exception("error in H5Lget_name_by_idx: sz1!=sz2");
+ }
+ return res;
+ } // list_group
+ /// Return a list of attributes of the given object
+ std::vector< std::string > get_attr_list(const std::string& loc_full_name) const
+ {
+ std::vector< std::string > res;
+ Exception::active_path() = loc_full_name;
+ assert(group_exists(loc_full_name) or dataset_exists(loc_full_name));
+ detail::HDF_Object_Holder id_holder(
+ detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Oclose));
+ H5O_info_t info;
+ detail::Util::wrap(H5Oget_info, id_holder.id, &info);
+ // num_attrs in info.num_attrs
+ for (unsigned i = 0; i < (unsigned)info.num_attrs; ++i)
+ {
+ int name_sz = detail::Util::wrap(H5Aget_name_by_idx, id_holder.id, ".",
+ H5_INDEX_NAME, H5_ITER_NATIVE, i, nullptr, 0, H5P_DEFAULT);
+ std::string tmp(name_sz, '\0');
+ detail::Util::wrap(H5Aget_name_by_idx, id_holder.id, ".",
+ H5_INDEX_NAME, H5_ITER_NATIVE, i, &tmp[0], name_sz + 1, H5P_DEFAULT);
+ res.emplace_back(std::move(tmp));
+ }
+ return res;
+ } // get_attr_list
+ /// Return a list of struct field names in the given dataset/attribute
+ std::vector< std::string > get_struct_members(const std::string& loc_full_name) const
+ {
+ std::vector< std::string > res;
+ Exception::active_path() = loc_full_name;
+ assert(attribute_exists(loc_full_name) or dataset_exists(loc_full_name));
+ detail::HDF_Object_Holder attr_id_holder;
+ detail::HDF_Object_Holder ds_id_holder;
+ detail::HDF_Object_Holder type_id_holder;
+ if (attribute_exists(loc_full_name))
+ {
+ std::string loc_path;
+ std::string loc_name;
+ std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+ attr_id_holder.load(
+ detail::Util::wrap(H5Aopen_by_name, _file_id, loc_path.c_str(), loc_name.c_str(),
+ H5P_DEFAULT, H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Aclose));
+ type_id_holder.load(
+ detail::Util::wrap(H5Aget_type, attr_id_holder.id),
+ detail::Util::wrapped_closer(H5Tclose));
+ }
+ else
+ {
+ ds_id_holder.load(
+ detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Oclose));
+ type_id_holder.load(
+ detail::Util::wrap(H5Dget_type, ds_id_holder.id),
+ detail::Util::wrapped_closer(H5Tclose));
+ }
+ if (detail::Util::wrap(H5Tget_class, type_id_holder.id) == H5T_COMPOUND)
+ {
+ // type is indeed a struct
+ int nmem = detail::Util::wrap(H5Tget_nmembers, type_id_holder.id);
+ for (int i = 0; i < nmem; ++i)
+ {
+ char* s = detail::Util::wrap(H5Tget_member_name, type_id_holder.id, i);
+ res.emplace_back(s);
+ free(s);
+ }
+ }
+ return res;
+ } // get_struct_members
private:
std::string _file_name;
hid_t _file_id;
+ bool _rw;
/// Split a full name into path and name
static std::pair< std::string, std::string > split_full_name(const std::string& full_name)
@@ -633,7 +1677,57 @@ private:
std::string name = last_slash_pos != std::string::npos? full_name.substr(last_slash_pos + 1) : full_name;
return std::make_pair(path, name);
} // split_full_name
-}; // class File_Reader
+
+ /// Determine if a path to an element exists
+ bool path_exists(const std::string& full_path_name) const
+ {
+ assert(is_open());
+ assert(not full_path_name.empty()
+ and full_path_name[0] == '/'
+ and full_path_name[full_path_name.size() - 1] == '/');
+ Exception::active_path() = full_path_name;
+ // check all path elements exist, except for what is to the right of the last '/'
+ size_t pos = 0;
+ while (true)
+ {
+ ++pos;
+ pos = full_path_name.find('/', pos);
+ if (pos == std::string::npos) break;
+ std::string tmp = full_path_name.substr(0, pos);
+ // check link exists
+ if (not detail::Util::wrap(H5Lexists, _file_id, tmp.c_str(), H5P_DEFAULT)) return false;
+ // check object exists
+ if (not detail::Util::wrap(H5Oexists_by_name, _file_id, tmp.c_str(), H5P_DEFAULT)) return false;
+ // open object in order to check type
+ detail::HDF_Object_Holder o_id_holder(
+ detail::Util::wrap(H5Oopen, _file_id, tmp.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Oclose));
+ // check object is a group
+ H5O_info_t o_info;
+ detail::Util::wrap(H5Oget_info, o_id_holder.id, &o_info);
+ if (o_info.type != H5O_TYPE_GROUP) return false;
+ }
+ return true;
+ } // path_exists()
+
+ /// Check if a group exists
+ bool check_object_type(const std::string& loc_full_name, H5O_type_t type_id) const
+ {
+ // check link exists
+ if (loc_full_name != "/"
+ and not detail::Util::wrap(H5Lexists, _file_id, loc_full_name.c_str(), H5P_DEFAULT)) return false;
+ // check object exists
+ if (not detail::Util::wrap(H5Oexists_by_name, _file_id, loc_full_name.c_str(), H5P_DEFAULT)) return false;
+ // open object in order to check type
+ detail::HDF_Object_Holder o_id_holder(
+ detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT),
+ detail::Util::wrapped_closer(H5Oclose));
+ // check object is a group
+ H5O_info_t o_info;
+ detail::Util::wrap(H5Oget_info, o_id_holder.id, &o_info);
+ return o_info.type == type_id;
+ }
+}; // class File
} // namespace hdf5_tools
diff --git a/src/tmp.cpp b/src/tmp.cpp
new file mode 100644
index 0000000..fb9a363
--- /dev/null
+++ b/src/tmp.cpp
@@ -0,0 +1,207 @@
+#include <cassert>
+#include <exception>
+#include <functional>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+#include <deque>
+
+#include <hdf5.h>
+
+using namespace std;
+
+template < typename T, typename U >
+std::size_t offset_of(U T::* mem_ptr)
+{
+ return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr));
+}
+
+struct A
+{
+ int val_1;
+ unsigned val_2;
+ float val_3;
+ int val_4;
+ string val_5;
+};
+
+int main(int argc, char * argv[])
+{
+ if (argc != 2)
+ {
+ cerr << "use: " << argv[0] << " <file>" << endl;
+ exit(EXIT_FAILURE);
+ }
+ //
+ // create file, fail if existing
+ //
+ auto file_id = H5Fcreate(argv[1], H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
+ assert(file_id > 0);
+ auto scalar_space_id = H5Screate(H5S_SCALAR);
+ assert(scalar_space_id > 0);
+ auto lcpl_id = H5Pcreate(H5P_LINK_CREATE);
+ H5Pset_create_intermediate_group(lcpl_id, 1);
+
+ //
+ // write numeric scalar attribute
+ //
+ // create group
+ auto grp_id = H5Gcreate2(file_id, "/Group_1/Subgroup_1_1", lcpl_id, H5P_DEFAULT, H5P_DEFAULT);
+ assert(grp_id > 0);
+ auto attr1_id = H5Acreate2(grp_id, "Attribute_1_1_1", H5T_NATIVE_INT, scalar_space_id,
+ H5P_DEFAULT, H5P_DEFAULT);
+ assert(attr1_id > 0);
+ int i = 42;
+ auto status = H5Awrite(attr1_id, H5T_NATIVE_INT, &i);
+ assert(status >= 0);
+ H5Gclose(grp_id);
+ H5Aclose(attr1_id);
+
+ //
+ // write numeric vector dataset
+ //
+ {
+ vector< float > v = { 1.0, 2.0, 3.0 };
+ hsize_t v_size = v.size();
+ auto v_space_id = H5Screate_simple(1, &v_size, nullptr);
+ auto ds1_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_1", H5T_NATIVE_FLOAT, v_space_id,
+ lcpl_id, H5P_DEFAULT, H5P_DEFAULT);
+ status = H5Dwrite(ds1_id, H5T_NATIVE_FLOAT, H5S_ALL, H5S_ALL, H5P_DEFAULT, v.data());
+ assert(status >= 0);
+ H5Dclose(ds1_id);
+ H5Sclose(v_space_id);
+ }
+
+ //
+ // write compound scalar dataset
+ //
+ {
+ //A a{ 1, 2, 3.2, 4, true };
+ A a{ 1, 2, 3.2, 4, "xoxo" };
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A));
+ vector< hid_t > a_stype_id;
+ status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT);
+ assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT);
+ assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT);
+ assert(status >= 0);
+ auto ds2_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_2", a_type_id, scalar_space_id,
+ lcpl_id, H5P_DEFAULT, H5P_DEFAULT);
+ status = H5Dwrite(ds2_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &a);
+ assert(status >= 0);
+ H5Dclose(ds2_id);
+ H5Tclose(a_type_id);
+ }
+
+ //
+ // write compound scalar dataset
+ //
+ {
+ //vector< A > a_v{{ 1, 2, 3.1, 4, true }, { 11, 12, 13.1, 14, false }, { 21, 22, 23.1, 24, true }};
+ vector< A > a_v{{ 1, 2, 3.1, 4, "xoxo" }, { 11, 12, 13.1, 14, "xexe" }, { 21, 22, 23.1, 24, "xixi" }};
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A));
+ status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT);
+ assert(status >= 0);
+ //status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT);
+ //assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT);
+ assert(status >= 0);
+ hsize_t a_v_size = a_v.size();
+ auto a_v_space_id = H5Screate_simple(1, &a_v_size, nullptr);
+ auto ds3_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_3", a_type_id, a_v_space_id,
+ lcpl_id, H5P_DEFAULT, H5P_DEFAULT);
+ status = H5Dwrite(ds3_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data());
+ assert(status >= 0);
+ H5Dclose(ds3_id);
+ H5Sclose(a_v_space_id);
+ H5Tclose(a_type_id);
+ }
+
+ //
+ // write compound scalar dataset in 2 steps
+ //
+ {
+ //vector< A > a_v{{ 1, 2, 3.1, 4, true }, { 11, 12, 13.1, 14, false }, { 21, 22, 23.1, 24, true }};
+ vector< A > a_v{{ 100, 2, 3.1, 4, "xoxo" }, { 111, 12, 13.1, 14, "xexe" }, { 121, 22, 23.1, 24, "xixi" }};
+ hid_t ds4_id;
+ // create dataset
+ {
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A));
+ status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT);
+ assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT);
+ assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT);
+ assert(status >= 0);
+ hid_t val_5_type_id = H5Tcopy(H5T_C_S1);
+ status = H5Tset_size(val_5_type_id, H5T_VARIABLE);
+ assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_5", offset_of(&A::val_5), val_5_type_id);
+ assert(status >= 0);
+ hsize_t a_v_size = a_v.size();
+ auto a_v_space_id = H5Screate_simple(1, &a_v_size, nullptr);
+ ds4_id = H5Dcreate2(file_id, "/Group_2/Subgroup_2_1/Dataset_2_1_4", a_type_id, a_v_space_id,
+ lcpl_id, H5P_DEFAULT, H5P_DEFAULT);
+ H5Sclose(a_v_space_id);
+ H5Tclose(val_5_type_id);
+ H5Tclose(a_type_id);
+ }
+ // write val_1
+ {
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A));
+ status = H5Tinsert(a_type_id, "val_1", offset_of(&A::val_1), H5T_NATIVE_INT);
+ assert(status >= 0);
+ status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data());
+ assert(status >= 0);
+ H5Tclose(a_type_id);
+ }
+ // write val_2
+ {
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A));
+ status = H5Tinsert(a_type_id, "val_2", offset_of(&A::val_2), H5T_NATIVE_UINT);
+ assert(status >= 0);
+ status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data());
+ assert(status >= 0);
+ H5Tclose(a_type_id);
+ }
+ // write val_3
+ {
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(A));
+ status = H5Tinsert(a_type_id, "val_3", offset_of(&A::val_3), H5T_NATIVE_FLOAT);
+ assert(status >= 0);
+ //status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, a_v.data());
+ assert(status >= 0);
+ H5Tclose(a_type_id);
+ }
+ // write val_5
+ {
+ auto a_type_id = H5Tcreate(H5T_COMPOUND, sizeof(const char *));
+ hid_t val_5_type_id = H5Tcopy(H5T_C_S1);
+ status = H5Tset_size(val_5_type_id, H5T_VARIABLE);
+ assert(status >= 0);
+ status = H5Tinsert(a_type_id, "val_5", 0, val_5_type_id);
+ assert(status >= 0);
+ H5Tclose(val_5_type_id);
+ vector< const char * > charptr_buff(a_v.size());
+ for (size_t i = 0; i < a_v.size(); ++i)
+ {
+ charptr_buff[i] = a_v[i].val_5.data();
+ }
+ status = H5Dwrite(ds4_id, a_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, charptr_buff.data());
+ assert(status >= 0);
+ H5Tclose(a_type_id);
+ }
+ H5Dclose(ds4_id);
+ }
+
+ //
+ // clean up
+ //
+ H5Sclose(scalar_space_id);
+ H5Pclose(lcpl_id);
+ H5Fclose(file_id);
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fast5.git
More information about the debian-med-commit
mailing list