[med-svn] [fast5] 01/02: Imported Upstream version 0~20150918
Afif Elghraoui
afif-guest at moszumanska.debian.org
Tue Jan 19 09:16:36 UTC 2016
This is an automated email from the git hooks/post-receive script.
afif-guest pushed a commit to branch master
in repository fast5.
commit 4cdda2a659f26ffa52fac876a5ea7cbec2811c4d
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Mon Jan 18 14:26:10 2016 -0800
Imported Upstream version 0~20150918
---
README.md | 11 +
src/Makefile | 2 +
src/a.cpp | 86 +++++++
src/fast5.hpp | 313 ++++++++++++++++++++++++++
src/hdf5_tools.hpp | 640 +++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 1052 insertions(+)
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e7bdea4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,11 @@
+# FAST5
+
+A lightweight C++11 library to read raw signal data from Oxford Nanopore's FAST5 files.
+
+## Installation instructions
+
+This library is provided as header files only, so you only need to copy ```fast5.hpp``` and ```hdf5_tools.hpp``` into your project.
+
+## Usage instructions
+
+See ```a.cpp``` for an example.
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..0b3456c
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,2 @@
+a: a.cpp fast5.hpp hdf5_tools.hpp
+ g++ -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -pedantic -Wno-unused-parameter -o $@ $^ -L /usr/local/lib -lhdf5
diff --git a/src/a.cpp b/src/a.cpp
new file mode 100644
index 0000000..2c89933
--- /dev/null
+++ b/src/a.cpp
@@ -0,0 +1,86 @@
+#include <cassert>
+#include <iostream>
+#include <string>
+
+#include "fast5.hpp"
+
+using namespace std;
+
+
+int main(int argc, char* argv[])
+{
+ assert(argc == 2);
+ string file_name(argv[1]);
+ //string ds_name(argv[2]);
+
+ // Open the FAST5 file for reading
+ fast5::File* f_p;
+ f_p = new fast5::File(file_name);
+
+ // Check that it opened successfully
+ assert(f_p->is_open());
+
+ // Extract version information for the ONT software used to generate this dataset
+ cout << "file_version=" << f_p->file_version() << endl;
+ cout << "basecall_version=" << f_p->basecall_version() << endl;
+ cout << "eventdetection_version=" << f_p->eventdetection_version() << endl;
+ cout << "sequences_version=" << f_p->sequences_version() << endl;
+
+ // This function checks to see if 2D basecalls are available
+ if(f_p->have_basecalled_2D())
+ {
+ cout << "basecalled_2D=" << f_p->basecalled_2D() << endl;
+
+ // Extract the alignment between template and complement events
+ // which were generated by the 2D basecaller
+ auto v = f_p->get_event_alignments();
+ cout << "event_alignment().size()=" << v.size() << endl;
+ for (const auto& e : v)
+ {
+ cout << "(template=" << e.template_index << ", complement=" << e.complement_index << ", kmer=" << e.kmer << ")" << endl;
+ }
+ }
+
+ // Iterate over the template/complement strands
+ for (size_t i = 0; i < 2; ++i)
+ {
+ // Check if a pore model for this strand exists
+ if (f_p->have_model(i))
+ {
+ // Print the name of ONT's reference model used to basecall
+ cout << "Model file: " << f_p->get_model_file(i) << endl;
+
+ // Extract the global scaling parameters for the pore model
+ auto params = f_p->get_model_parameters(i);
+ cout << "model drift=" << params.drift <<
+ ", scale=" << params.scale <<
+ ", scale_sd=" << params.scale_sd <<
+ ", shift=" << params.shift <<
+ ", var=" << params.var <<
+ ", var_sd=" << params.var_sd << endl;
+
+ // Extract the expected current levels for each k-mer
+ auto v = f_p->get_model(i);
+ cout << "model(" << i << ").size()=" << v.size() << endl;
+ for (const auto& e : v)
+ {
+ cout << "(kmer=" << e.kmer << ", level_mean=" << e.level_mean << ", level_stdv=" << e.level_stdv << ")" << endl;
+ }
+ }
+
+ // Check if this strand has event observations
+ if (f_p->have_events(i))
+ {
+ // Extract each event
+ auto v = f_p->get_events(i);
+ cout << "events(" << i << ").size()=" << v.size() << endl;
+ for (const auto& e : v)
+ {
+ cout << "(mean=" << e.mean << ", start=" << e.start << ", stdv=" << e.stdv << ", length=" << e.length << ")" << endl;
+ }
+ }
+ }
+
+ // Cleanup the file pointer, which closes the file
+ delete f_p;
+}
diff --git a/src/fast5.hpp b/src/fast5.hpp
new file mode 100644
index 0000000..9f03de3
--- /dev/null
+++ b/src/fast5.hpp
@@ -0,0 +1,313 @@
+#ifndef __FAST5_HPP
+#define __FAST5_HPP
+
+#include <cassert>
+#include <exception>
+#include <iostream>
+#include <sstream>
+#include <iomanip>
+#include <string>
+#include <vector>
+
+#include "hdf5_tools.hpp"
+#define MAX_K_LEN 8
+
+namespace fast5
+{
+
+//
+// This struct represents the expected signal measured
+// given the kmer sequence that is in the pore when the
+// the observations are made. A pore model consists
+// of 1024 of these entries (one per 5-mer) and global
+// shift/scaling parameters.
+//
+struct Model_Entry
+{
+ char kmer[MAX_K_LEN];
+ long long variant;
+ double level_mean;
+ double level_stdv;
+ double sd_mean;
+ double sd_stdv;
+ double weight;
+}; // struct Model_Entry
+
+//
+// This struct represents the global transformations
+// that must be applied to each Model_Entry
+//
+struct Model_Parameters
+{
+ double drift;
+ double scale;
+ double scale_sd;
+ double shift;
+ double var;
+ double var_sd;
+}; // struct Model_Parameters
+
+//
+// This struct represents an observed event.
+// The members of the struct are the same as
+// the fields encoded in the FAST5 file.
+//
+struct Event_Entry
+{
+ double mean;
+ double start;
+ double stdv;
+ double length;
+ char model_state[MAX_K_LEN];
+ double model_level;
+ long long move;
+ double p_model_state;
+ char mp_state[MAX_K_LEN];
+ double p_mp_state;
+ double p_A;
+ double p_C;
+ double p_G;
+ double p_T;
+}; // struct Event_Entry
+
+//
+// This struct represents a template-to-complement
+// match that is emitted by ONT's 2D basecaller
+//
+struct Event_Alignment_Entry
+{
+ long long template_index;
+ long long complement_index;
+ char kmer[MAX_K_LEN];
+}; // struct Event_Alignment_Entry
+
+class File
+ : private hdf5_tools::File_Reader
+{
+private:
+ typedef hdf5_tools::File_Reader Base;
+public:
+ using Base::Base;
+
+ using Base::is_open;
+ using Base::file_name;
+ using Base::open;
+ using Base::close;
+
+ std::string file_version() const
+ {
+ double v;
+ assert(Base::exists("/file_version"));
+ Base::read< double >("/file_version", v);
+ // convert it to string
+ std::ostringstream os;
+ os << v;
+ return os.str();
+ }
+
+ std::string basecall_version() const
+ {
+ std::string res;
+ std::string path = get_bc_2d_root() + "/version";
+ assert(Base::exists(path));
+ Base::read< std::string >(path, res);
+ return res;
+ }
+
+ std::string eventdetection_version() const
+ {
+ std::string res;
+ // only support eventdetection group 000 for now
+ std::string path = "/Analyses/EventDetection_000/version";
+ assert(Base::exists(path));
+ Base::read< std::string >(path, res);
+ return res;
+ }
+
+ std::string get_log() const
+ {
+ std::string res;
+ std::string path = get_bc_2d_root() + "/Log";
+ assert(Base::exists(path));
+ Base::read< std::string >(path, res);
+ return res;
+ }
+
+ double get_sampling_rate() const
+ {
+ assert(have_sampling_rate());
+
+ auto lg = get_log();
+ auto idx = lg.find("Sampling rate is");
+
+ std::string line;
+ std::stringstream ss1(lg.substr(idx));
+ std::getline(ss1,line,'\n');
+
+ std::stringstream ss2(line);
+
+ std::string token;
+ std::getline(ss2,token,' '); //Sampling
+ std::getline(ss2,token,' '); //rate
+ std::getline(ss2,token,' '); //is
+ std::getline(ss2,token,' '); //Hz value
+
+ return std::atof(token.c_str());
+ }
+
+ bool have_sampling_rate() const
+ {
+ auto lg = get_log();
+ auto idx = lg.find("Sampling rate is");
+ return idx != std::string::npos;
+ }
+
+ std::string get_model_file(size_t i) const
+ {
+ std::string res;
+ assert(Base::exists(model_file_path(i)));
+ Base::read< std::string >(model_file_path(i), res);
+ return res;
+ }
+
+ std::string sequences_version() const
+ {
+ std::vector< std::string > tmp;
+ assert(Base::exists("/Sequences/Meta/version"));
+ Base::read< std::string >("/Sequences/Meta/version", tmp);
+ std::string res;
+ for (const auto& s: tmp)
+ {
+ res += s;
+ }
+ return res;
+ }
+
+ bool have_basecalled_2D() const
+ {
+ return Base::exists(get_bc_2d_root() + "/BaseCalled_2D/Fastq");
+ }
+
+ std::string basecalled_2D() const
+ {
+ std::string res;
+ Base::read< std::string >(get_bc_2d_root() + "/BaseCalled_2D/Fastq", res);
+
+ // Split the FASTQ record on newlines
+ size_t nl1 = res.find_first_of('\n');
+ size_t nl2 = res.find_first_of('\n', nl1 + 1);
+
+ if(nl1 == std::string::npos || nl2 == std::string::npos)
+ return "";
+ else
+ return res.substr(nl1 + 1, nl2 - nl1 - 1);
+ }
+
+ std::vector< Event_Alignment_Entry > get_event_alignments() const
+ {
+ std::vector< Event_Alignment_Entry > res;
+ hdf5_tools::Compound_Map m;
+ m.add_member("template", &Event_Alignment_Entry::template_index);
+ m.add_member("complement", &Event_Alignment_Entry::complement_index);
+ m.add_member("kmer", &Event_Alignment_Entry::kmer);
+ Base::read< Event_Alignment_Entry >(get_bc_2d_root() + "/BaseCalled_2D/Alignment", res, &m);
+ return res;
+ }
+
+ bool have_model(size_t i) const
+ {
+ return Base::exists(model_path(i));
+ }
+ bool have_events(size_t i) const
+ {
+ return Base::exists(events_path(i));
+ }
+
+ std::vector< Model_Entry > get_model(size_t i) const
+ {
+ std::vector< Model_Entry > res;
+ hdf5_tools::Compound_Map m;
+ m.add_member("kmer", &Model_Entry::kmer);
+ m.add_member("level_mean", &Model_Entry::level_mean);
+ m.add_member("level_stdv", &Model_Entry::level_stdv);
+ m.add_member("sd_mean", &Model_Entry::sd_mean);
+ m.add_member("sd_stdv", &Model_Entry::sd_stdv);
+ Base::read< Model_Entry >(model_path(i), res, &m);
+ return res;
+ }
+
+ Model_Parameters get_model_parameters(size_t i) const
+ {
+ Model_Parameters res;
+ std::string path = model_path(i);
+ Base::read< double >(path + "/drift", res.drift);
+ Base::read< double >(path + "/scale", res.scale);
+ Base::read< double >(path + "/scale_sd", res.scale_sd);
+ Base::read< double >(path + "/shift", res.shift);
+ Base::read< double >(path + "/var", res.var);
+ Base::read< double >(path + "/var_sd", res.var_sd);
+ return res;
+ }
+
+ std::vector< Event_Entry > get_events(size_t i) const
+ {
+ std::vector< Event_Entry > res;
+ hdf5_tools::Compound_Map m;
+ m.add_member("mean", &Event_Entry::mean);
+ m.add_member("start", &Event_Entry::start);
+ m.add_member("stdv", &Event_Entry::stdv);
+ m.add_member("length", &Event_Entry::length);
+ Base::read< Event_Entry >(events_path(i), res, &m);
+ return res;
+ }
+
+ void set_basecalled_group_id(size_t i)
+ {
+ assert(i <= 999);
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(3) << i;
+ _basecalled_group_id = ss.str();
+ }
+
+
+private:
+
+ // Returns the root path of the form:
+ // Analyses/Basecall_2D_ddd/ where ddd is the group
+ std::string get_bc_2d_root() const
+ {
+ return "/Analyses/Basecall_2D_" + _basecalled_group_id;
+ }
+
+ std::string model_path(size_t i) const
+ {
+ static std::vector< std::string > _model_path =
+ { "/BaseCalled_template/Model",
+ "/BaseCalled_complement/Model" };
+ return get_bc_2d_root() + _model_path.at(i);
+ }
+
+ std::string events_path(size_t i) const
+ {
+ static std::vector< std::string > _events_path =
+ { "/BaseCalled_template/Events",
+ "/BaseCalled_complement/Events" };
+ return get_bc_2d_root() + _events_path.at(i);
+ }
+
+ std::string model_file_path(size_t i) const
+ {
+ static std::vector< std::string > _model_file_path =
+ { "/Summary/basecall_1d_template/model_file",
+ "/Summary/basecall_1d_complement/model_file" };
+ return get_bc_2d_root() + _model_file_path.at(i);
+ }
+
+ // default to using the 000 analysis group
+ std::string _basecalled_group_id = "000";
+
+}; // class File
+
+} // namespace fast5
+
+#endif
diff --git a/src/hdf5_tools.hpp b/src/hdf5_tools.hpp
new file mode 100644
index 0000000..6532d74
--- /dev/null
+++ b/src/hdf5_tools.hpp
@@ -0,0 +1,640 @@
+#ifndef __HDF5_TOOLS_HPP
+#define __HDF5_TOOLS_HPP
+
+#include <cassert>
+#include <exception>
+#include <functional>
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <tuple>
+#include <vector>
+
+namespace hdf5
+{
+#include <hdf5.h>
+}
+
+namespace hdf5_tools
+{
+using namespace hdf5;
+
+/// Exception class thrown by failed hdf5 operations.
+class Exception
+ : public std::exception
+{
+public:
+ Exception(const std::string& msg) : _msg(msg) {}
+ const char* what() const noexcept { return _msg.c_str(); }
+private:
+ std::string _msg;
+}; // class Exception
+
+// Forward declaration
+class Compound_Map;
+
+namespace detail
+{
+
+/// TempMetaFunc: Given destination type, deduce memory type to be used in hdf5 read operation.
+/// Only useful for numeric types.
+/// HDF5 idiosyncracy:
+/// Types such as H5T_NATIVE_INT are not constants(!?), so id() is not a constexpr.
+template < typename T > struct get_mem_type { static hid_t id() { return -1; } };
+// signed integral:
+template <> struct get_mem_type< char > { static hid_t id() { return H5T_NATIVE_CHAR; } };
+template <> struct get_mem_type< short > { static hid_t id() { return H5T_NATIVE_SHORT; } };
+template <> struct get_mem_type< int > { static hid_t id() { return H5T_NATIVE_INT; } };
+template <> struct get_mem_type< long > { static hid_t id() { return H5T_NATIVE_LONG; } };
+template <> struct get_mem_type< long long > { static hid_t id() { return H5T_NATIVE_LLONG; } };
+// unsigned integral:
+template <> struct get_mem_type< unsigned char > { static hid_t id() { return H5T_NATIVE_UCHAR; } };
+template <> struct get_mem_type< unsigned short > { static hid_t id() { return H5T_NATIVE_USHORT; } };
+template <> struct get_mem_type< unsigned > { static hid_t id() { return H5T_NATIVE_UINT; } };
+template <> struct get_mem_type< unsigned long > { static hid_t id() { return H5T_NATIVE_ULONG; } };
+template <> struct get_mem_type< unsigned long long > { static hid_t id() { return H5T_NATIVE_ULLONG; } };
+// float:
+template <> struct get_mem_type< float > { static hid_t id() { return H5T_NATIVE_FLOAT; } };
+template <> struct get_mem_type< double > { static hid_t id() { return H5T_NATIVE_DOUBLE; } };
+template <> struct get_mem_type< long double > { static hid_t id() { return H5T_NATIVE_LDOUBLE; } };
+
+/// TempMetaFunc: Given destination type, can we read it
+template < typename Out_Data_Type >
+struct can_read
+{
+ static const bool value =
+ std::is_integral< Out_Data_Type >::value
+ or std::is_floating_point< Out_Data_Type >::value
+ or std::is_same< typename std::remove_extent< Out_Data_Type >::type, char >::value
+ or std::is_same< Out_Data_Type, std::string >::value
+ or std::is_class< Out_Data_Type >:: value;
+};
+
+/// TempMetaFunc: Given a destination type, does it need a compound map
+template < typename Out_Data_Type >
+struct read_as_atomic
+{
+ static const bool value =
+ std::is_integral< Out_Data_Type >::value
+ or std::is_floating_point< Out_Data_Type >::value
+ or std::is_same< typename std::remove_extent< Out_Data_Type >::type, char >::value
+ or std::is_same< Out_Data_Type, std::string >::value;
+};
+
+/// Compute offset of a struct member from a member pointer (runtime version).
+template < typename T, typename U >
+std::size_t offset_of(U T::* mem_ptr)
+{
+ return reinterpret_cast< std::size_t >(&(((T*)0)->*mem_ptr));
+}
+
+/// Description of a member inside a compound
+/// Only works with numeric, string, and struct types.
+struct Compound_Member_Description
+{
+public:
+ Compound_Member_Description(const std::string& _name, size_t _offset, hid_t _numeric_type_id)
+ : name(_name), offset(_offset), numeric_type_id(_numeric_type_id)
+ {
+ type = numeric;
+ }
+ Compound_Member_Description(const std::string& _name, size_t _offset, size_t _char_array_size)
+ : name(_name), offset(_offset), char_array_size(_char_array_size)
+ {
+ type = char_array;
+ }
+ Compound_Member_Description(const std::string& _name, size_t _offset)
+ : name(_name), offset(_offset)
+ {
+ type = string;
+ }
+ Compound_Member_Description(const std::string& _name, size_t _offset, const Compound_Map* _compound_map_ptr)
+ : name(_name), offset(_offset), compound_map_ptr(_compound_map_ptr)
+ {
+ type = compound;
+ }
+
+ bool is_numeric() const { return type == numeric; }
+ bool is_char_array() const { return type == char_array; }
+ bool is_string() const { return type == string; }
+ bool is_compound() const { return type == compound; }
+
+ std::string name;
+ size_t offset;
+ union
+ {
+ hid_t numeric_type_id;
+ size_t char_array_size;
+ const Compound_Map* compound_map_ptr;
+ };
+
+private:
+ enum member_type
+ {
+ numeric,
+ char_array,
+ string,
+ compound
+ };
+ member_type type;
+}; // Compound_Member_Description
+
+} // namespace detail
+
+/// A map of struct fields to tags that is used to read compound datatypes
+class Compound_Map
+{
+public:
+ Compound_Map() = default;
+ Compound_Map(const Compound_Map&) = delete;
+ Compound_Map(Compound_Map&&) = default;
+ Compound_Map& operator = (const Compound_Map&) = delete;
+ Compound_Map& operator = (Compound_Map&&) = default;
+ ~Compound_Map() = default;
+
+ template < typename T, typename U >
+ void add_member(const std::string& name, U T::* mem_ptr)
+ {
+ static_assert(std::is_integral< U >::value
+ or std::is_floating_point< U >::value
+ or std::is_same< typename std::remove_extent< U >::type, char >::value
+ or std::is_same< U, std::string >::value,
+ "add_member(name, mem_ptr) overload expects numerical or string types only ");
+ if (std::is_integral< U >::value or std::is_floating_point< U >::value)
+ {
+ _members.emplace_back(name, detail::offset_of(mem_ptr), detail::get_mem_type< U >::id());
+ }
+ else if (std::is_same< typename std::remove_extent< U >::type, char >::value)
+ {
+ _members.emplace_back(name, detail::offset_of(mem_ptr), sizeof(U));
+ }
+ else if (std::is_same< U, std::string >::value)
+ {
+ _members.emplace_back(name, detail::offset_of(mem_ptr));
+ }
+ }
+
+ template < typename T, typename U >
+ void add_member(const std::string& name, U T::* mem_ptr, const Compound_Map* compound_map_ptr)
+ {
+ assert(false); // not currently implemented
+ static_assert(std::is_class< U >::value,
+ "add_member(name, mem_ptr, compound_map_ptr) overload expects class types only ");
+ _members.emplace_back(name, detail::offset_of(mem_ptr), compound_map_ptr);
+ }
+
+ const std::vector< detail::Compound_Member_Description >& members() const { return _members; }
+
+private:
+ std::vector< detail::Compound_Member_Description > _members;
+}; // Compound_Map
+
+namespace detail
+{
+
+// TempSpec: reading numerics
+template < typename Out_Data_Type, typename Out_Data_Storage >
+struct Extent_Atomic_Reader
+{
+ void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
+ const Compound_Map*, hid_t obj_id, hid_t,
+ const std::string&, std::function< hid_t(hid_t) >,
+ const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ {
+ hid_t mem_type_id = get_mem_type< Out_Data_Type >::id();
+ assert(mem_type_id != -1);
+ int status = read_fcn(obj_id, mem_type_id, static_cast< void* >(dest.data()));
+ if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name);
+ }
+}; // struct Extent_Atomic_Reader
+
+// TempSpec: for reading strings
+template < typename Out_Data_Storage >
+struct Extent_Atomic_Reader< std::string, Out_Data_Storage >
+{
+ void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
+ const Compound_Map*, hid_t obj_id, hid_t obj_space_id,
+ const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
+ const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ {
+ int status;
+ int file_type_id = get_type_fcn(obj_id);
+ if (file_type_id < 0) throw Exception(loc_full_name + ": error in " + get_type_fcn_name);
+ int is_vlen_str = H5Tis_variable_str(file_type_id);
+ if (is_vlen_str < 0) throw Exception(loc_full_name + ": error in H5Tis_variable_str");
+ hid_t mem_type_id = H5Tcopy(H5T_C_S1);
+ if (mem_type_id < 0) throw Exception(loc_full_name + ": error in H5Tcopy");
+ if (is_vlen_str) // stored as variable-length string
+ {
+ // compute mem_type
+ status = H5Tset_size(mem_type_id, H5T_VARIABLE);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(variable)");
+ // prepare buffer to receive data
+ std::vector< char* > char_p_buff(dest.size(), nullptr);
+ // perform the read
+ status = read_fcn(obj_id, mem_type_id, static_cast< void* >(char_p_buff.data()));
+ if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name);
+ // transfer strings to destination
+ for (size_t i = 0; i < dest.size(); ++i)
+ {
+ if (not char_p_buff[i]) throw Exception(loc_full_name + ": " + read_fcn_name + " did not fill buffer");
+ dest[i] = char_p_buff[i];
+ }
+ // reclaim memory allocated by libhdf5
+ status = H5Dvlen_reclaim(mem_type_id, obj_space_id, H5P_DEFAULT, char_p_buff.data());
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Dvlen_reclaim");
+ }
+ else // stored as fixed-length string
+ {
+ // compute mem_type
+ size_t sz = H5Tget_size(file_type_id);
+ if (sz == 0) throw Exception(loc_full_name + ": H5Tget_size returned 0; is this an error?!");
+ status = H5Tset_size(mem_type_id, sz + 1);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(fixed)");
+ // prepare buffer to receieve data
+ std::vector< char > char_buff(dest.size() * (sz + 1));
+ // perform the read
+ status = read_fcn(obj_id, mem_type_id, static_cast< void* >(char_buff.data()));
+ if (status < 0) throw Exception(loc_full_name + ": error in " + read_fcn_name);
+ // transfer strings to destination
+ for (size_t i = 0; i < dest.size(); ++i)
+ {
+ dest[i] = std::string(&char_buff[i * (sz + 1)], sz);
+ }
+ }
+ status = H5Tclose(mem_type_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_type_id)");
+ status = H5Tclose(file_type_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(file_type_id)");
+ }
+}; // struct Extent_Atomic_Reader< std::string >
+
+template < typename Out_Data_Type, typename Out_Data_Storage >
+struct Extent_Compound_Reader
+{
+ void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
+ const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t,
+ const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
+ const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ {
+ int status;
+ assert(compound_map_ptr);
+ hid_t file_type_id = get_type_fcn(obj_id);
+ if (file_type_id < 0) throw Exception(loc_full_name + ": error in " + get_type_fcn_name);
+ H5T_class_t file_type_class = H5Tget_class(file_type_id);
+ if (file_type_class == H5T_NO_CLASS) throw Exception(loc_full_name + ": error in H5Tget_class(file_type)");
+ if (file_type_class != H5T_COMPOUND) throw Exception(loc_full_name + ": expected H5T_COMPOUND datatype");
+
+ // pass 1
+ // read numeric and char_array members only
+ hid_t mem_type_id = H5Tcreate(H5T_COMPOUND, sizeof(Out_Data_Type));
+ std::vector< hid_t > mem_stype_id_v;
+ for (const auto& e : compound_map_ptr->members())
+ {
+ assert(not e.is_compound()); // not implemented yet
+ if (e.is_string()) continue;
+ int file_stype_idx = H5Tget_member_index(file_type_id, e.name.c_str());
+ if (file_stype_idx < 0) throw Exception(loc_full_name + ": missing member \"" + e.name + "\"");
+ hid_t file_stype_id = H5Tget_member_type(file_type_id, file_stype_idx);
+ if (file_stype_id < 0) throw Exception(loc_full_name + ": error in H5Tget_member_type");
+ H5T_class_t file_stype_class = H5Tget_class(file_stype_id);
+ if (file_stype_class == H5T_NO_CLASS) throw Exception(loc_full_name + ": error in H5Tget_class(file_stype)");
+ if (e.is_numeric())
+ {
+ if (file_stype_class != H5T_INTEGER and file_stype_class != H5T_FLOAT)
+ throw Exception(loc_full_name + ": member \"" + e.name + "\" is numeric, but file_stype is not numeric");
+ status = H5Tinsert(mem_type_id, e.name.c_str(), e.offset, e.numeric_type_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tinsert(\"" + e.name + "\")");
+ }
+ if (e.is_char_array())
+ {
+ if (file_stype_class != H5T_STRING)
+ throw Exception(loc_full_name + ": member \"" + e.name + "\" is char_array, but file_stype is not H5T_STRING");
+ status = H5Tis_variable_str(file_stype_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tis_variable_str(\"" + e.name + "\")");
+ if (status) throw Exception(loc_full_name + ": member \"" + e.name + "\" is a char_array, but file_stype is a variable len string");
+ //size_t file_stype_size = H5Tget_size(file_stype_id);
+ //if (file_stype_size == 0) throw Exception(loc_full_name + ": H5Tget_size(\"" + e.name + "\") returned 0");
+ hid_t mem_stype_id = H5Tcopy(H5T_C_S1);
+ if (mem_stype_id < 0) throw Exception(loc_full_name + ": member \"" + e.name + "\": error in H5Tcopy");
+ status = H5Tset_size(mem_stype_id, e.char_array_size);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tset_size(\"" + e.name + "\")");
+ status = H5Tinsert(mem_type_id, e.name.c_str(), e.offset, mem_stype_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tinsert(\"" + e.name + "\")");
+ mem_stype_id_v.push_back(mem_stype_id);
+ }
+ status = H5Tclose(file_stype_id);
+ if (status < 0) throw Exception(loc_full_name + ": member \"" + e.name + "\": error in H5Tclose(file_stype)");
+ }
+ // perform the actual read
+ status = read_fcn(obj_id, mem_type_id, static_cast< void* >(dest.data()));
+ if (status < 0) throw Exception(loc_full_name + ": pass 1: error in " + read_fcn_name);
+ // release the memory types
+ for (const auto& mem_stype_id : mem_stype_id_v)
+ {
+ status = H5Tclose(mem_stype_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_stype)");
+ }
+ mem_stype_id_v.clear();
+ status = H5Tclose(mem_type_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(mem_type)");
+
+ // pass 2
+ // read strings
+ for (const auto& e : compound_map_ptr->members())
+ {
+ assert(not e.is_compound()); // not implemented yet
+ if (e.is_numeric() or e.is_char_array()) continue;
+ //TODO
+ assert(false);
+ }
+
+ status = H5Tclose(file_type_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Tclose(file_type_id)");
+ }
+}; //struct Extent_Compound_Reader
+
+// TempSpec: read extent of atomic types
+template < typename Out_Data_Type, typename Out_Data_Storage, bool = true >
+struct Extent_Reader_as_atomic
+ : Extent_Atomic_Reader< Out_Data_Type, Out_Data_Storage >
+{};
+
+// TempSpec: read extent of compound types
+template < typename Out_Data_Type, typename Out_Data_Storage >
+struct Extent_Reader_as_atomic< Out_Data_Type, Out_Data_Storage, false >
+ : Extent_Compound_Reader< Out_Data_Type, Out_Data_Storage >
+{};
+
+// branch on atomic/compound destination
+template < typename Out_Data_Type, typename Out_Data_Storage >
+struct Extent_Reader
+ : public Extent_Reader_as_atomic< Out_Data_Type, Out_Data_Storage, read_as_atomic< Out_Data_Type >::value >
+{};
+
+template < typename, typename, bool >
+struct Object_Reader_impl;
+
+// TempSpec: reading scalars
+template < typename Out_Data_Type >
+struct Object_Reader_impl< Out_Data_Type, Out_Data_Type, true >
+{
+ void operator () (const std::string& loc_full_name, Out_Data_Type& dest,
+ const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t obj_space_id,
+ const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
+ const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ {
+ H5S_class_t obj_class_t = H5Sget_simple_extent_type(obj_space_id);
+ if (obj_class_t == H5S_NO_CLASS) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_type");
+ if (obj_class_t != H5S_SCALAR)
+ throw Exception(loc_full_name + ": reading as scalar, but dataspace not H5S_SCALAR");
+ std::vector< Out_Data_Type > tmp(1);
+ Extent_Reader< Out_Data_Type, std::vector< Out_Data_Type > >()(
+ loc_full_name, tmp, compound_map_ptr, obj_id, obj_space_id,
+ get_type_fcn_name, get_type_fcn,
+ read_fcn_name, read_fcn);
+ dest = std::move(tmp[0]);
+ }
+};
+
+// TempSpec: reading vectors
+template < typename Out_Data_Type, typename Out_Data_Storage >
+struct Object_Reader_impl< Out_Data_Type, Out_Data_Storage, false >
+{
+ void operator () (const std::string& loc_full_name, Out_Data_Storage& dest,
+ const Compound_Map* compound_map_ptr, hid_t obj_id, hid_t obj_space_id,
+ const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
+ const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+ {
+ H5S_class_t obj_class_t = H5Sget_simple_extent_type(obj_space_id);
+ if (obj_class_t == H5S_NO_CLASS) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_type");
+ if (obj_class_t != H5S_SIMPLE)
+ throw Exception(loc_full_name + ": reading as vector, but dataspace not H5S_SIMPLE");
+ int status = H5Sget_simple_extent_dims(obj_space_id, nullptr, nullptr);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Sget_simple_extent_dims");
+ if (status != 1) throw Exception(loc_full_name + ": expected extent of dimension 1");
+ hsize_t sz;
+ H5Sget_simple_extent_dims(obj_space_id, &sz, nullptr);
+ dest.clear();
+ dest.resize(sz);
+ Extent_Reader< Out_Data_Type, Out_Data_Storage >()(
+ loc_full_name, dest, compound_map_ptr, obj_id, obj_space_id,
+ get_type_fcn_name, get_type_fcn,
+ read_fcn_name, read_fcn);
+ }
+};
+
+// TempMetaFunc: split scalar & vector reading branches
+template < typename Out_Data_Type, typename Out_Data_Storage >
+struct Object_Reader
+ : public Object_Reader_impl< Out_Data_Type, Out_Data_Storage, std::is_same< Out_Data_Type, Out_Data_Storage >::value > {};
+
+// open object and object space, then delegate
+template < typename Out_Data_Type, typename Out_Data_Storage >
+void read_obj_helper(const std::string& loc_full_name, Out_Data_Storage& dest, const Compound_Map* compound_map_ptr,
+ const std::string& open_fcn_name, std::function< hid_t(void) > open_fcn,
+ const std::string& close_fcn_name, std::function< herr_t(hid_t) > close_fcn,
+ const std::string& get_space_fcn_name, std::function< hid_t(hid_t) > get_space_fcn,
+ const std::string& get_type_fcn_name, std::function< hid_t(hid_t) > get_type_fcn,
+ const std::string& read_fcn_name, std::function< herr_t(hid_t, hid_t, void*) > read_fcn)
+{
+ int status;
+ // open object
+ hid_t obj_id = open_fcn();
+ if (obj_id < 0) throw Exception(loc_full_name + ": error in " + open_fcn_name);
+ // open object space, check reading ode matches storage mode (scalar/vector)
+ hid_t obj_space_id = get_space_fcn(obj_id);
+ if (obj_space_id < 0) throw Exception(loc_full_name + ": error in " + get_space_fcn_name);
+ // read object
+ Object_Reader< Out_Data_Type, Out_Data_Storage >()(
+ loc_full_name, dest, compound_map_ptr, obj_id, obj_space_id,
+ get_type_fcn_name, get_type_fcn,
+ read_fcn_name, read_fcn);
+ // close object space & object
+ status = H5Sclose(obj_space_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Sclose");
+ status = close_fcn(obj_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in " + close_fcn_name);
+}
+
+// determine if address is attribute or dataset, then delegate
+template < typename Out_Data_Type, typename Out_Data_Storage >
+void read_addr(hid_t root_id, const std::string& loc_path, const std::string& loc_name,
+ Out_Data_Storage& dest, const Compound_Map* compound_map_ptr)
+{
+ assert(root_id > 0);
+ std::string loc_full_name = loc_path + loc_name;
+ // determine if object is an attribute; otherwise, assume it's a dataset
+ int status;
+ status = H5Aexists_by_name(root_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Aexists_by_name");
+ bool is_attr = status > 0;
+ if (is_attr)
+ {
+ read_obj_helper< Out_Data_Type, Out_Data_Storage >(
+ loc_full_name, dest, compound_map_ptr,
+ "H5Aopen_by_name",
+ [&root_id, &loc_path, &loc_name] ()
+ {
+ return H5Aopen_by_name(root_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT, H5P_DEFAULT);
+ },
+ "H5Aclose", &H5Aclose,
+ "H5Aget_space", &H5Aget_space,
+ "H5Aget_type", &H5Aget_type,
+ "H5Aread",
+ [] (hid_t id, hid_t mem_type_id, void* dest_p)
+ {
+ return H5Aread(id, mem_type_id, dest_p);
+ });
+ }
+ else
+ {
+ read_obj_helper< Out_Data_Type, Out_Data_Storage >(
+ loc_full_name, dest, compound_map_ptr,
+ "H5Dopen",
+ [&root_id, &loc_full_name] ()
+ {
+ return H5Dopen(root_id, loc_full_name.c_str(), H5P_DEFAULT);
+ },
+ "H5Dclose", &H5Dclose,
+ "H5Dget_space", &H5Dget_space,
+ "H5Dget_type", &H5Dget_type,
+ "H5Dread",
+ [] (hid_t id, hid_t mem_type_id, void* dest_p)
+ {
+ return H5Dread(id, mem_type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, dest_p);
+ });
+ }
+} // read_addr
+
+// TempSpec: for atomic types
+template < typename Out_Data_Type, bool = true >
+struct Reader_as_atomic
+{
+ template < typename Out_Data_Storage >
+ void operator () (hid_t root_id, const std::string& loc_path, const std::string& loc_name,
+ Out_Data_Storage& dest)
+ {
+ static_assert(can_read< Out_Data_Type >::value,
+ "Reader_impl<Out_Data_Type,true>: expected a readable destination");
+ static_assert(read_as_atomic< Out_Data_Type >::value,
+ "Reader_impl<Out_Data_Type,true>: expected a type readable as atomic");
+ read_addr< Out_Data_Type, Out_Data_Storage >(root_id, loc_path, loc_name, dest, nullptr);
+ }
+};
+
+// TempSpec: for compound types
+template < typename Out_Data_Type >
+struct Reader_as_atomic< Out_Data_Type, false >
+{
+ template < typename Out_Data_Storage >
+ void operator () (hid_t root_id, const std::string& loc_path, const std::string& loc_name,
+ Out_Data_Storage& dest, const Compound_Map* compound_map_ptr)
+ {
+ static_assert(can_read< Out_Data_Type >::value,
+ "Reader_impl<Out_Data_Type,false>: expected a readable destination");
+ static_assert(not read_as_atomic< Out_Data_Type >::value,
+ "Reader_impl<Out_Data_Type,false>: expected a type readable as compound");
+ read_addr< Out_Data_Type, Out_Data_Storage >(root_id, loc_path, loc_name, dest, compound_map_ptr);
+ }
+};
+
+template < typename Out_Data_Type >
+struct Reader : public Reader_as_atomic< Out_Data_Type, read_as_atomic< Out_Data_Type >::value >
+{};
+
+} // namespace detail
+
+/// An HDF5 file reader
+class File_Reader
+{
+public:
+ File_Reader() : _file_id(0) {}
+ File_Reader(const std::string& file_name) : _file_id(0) { open(file_name); }
+ File_Reader(const File_Reader&) = delete;
+ File_Reader& operator = (const File_Reader&) = delete;
+ ~File_Reader() { if (is_open()) close(); }
+
+ bool is_open() const { return _file_id > 0; }
+ const std::string& file_name() const { return _file_name; }
+
+ void open(const std::string& file_name)
+ {
+ assert(not is_open());
+ _file_name = file_name;
+ _file_id = H5Fopen(file_name.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+ if (not is_open()) throw Exception(_file_name + ": error in H5Fopen");
+ }
+ void close()
+ {
+ assert(is_open());
+ assert(H5Fget_obj_count(_file_id, H5F_OBJ_ALL | H5F_OBJ_LOCAL) == 1);
+ int status = H5Fclose(_file_id);
+ if (status < 0) throw Exception(_file_name + ": error in H5Fclose");
+ _file_id = 0;
+ _file_name.clear();
+ }
+
+ /// Determine if address is an attribute or dataset
+ bool exists(const std::string& loc_full_name) const
+ {
+ assert(is_open());
+ assert(not loc_full_name.empty() and loc_full_name[0] == '/');
+ std::string loc_path;
+ std::string loc_name;
+ std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+ int status;
+ // check all path elements exist, except for what is to the right of the last '/'
+ size_t pos = 0;
+ while (true)
+ {
+ ++pos;
+ pos = loc_full_name.find('/', pos);
+ if (pos == std::string::npos) break;
+ std::string tmp = loc_full_name.substr(0, pos);
+ status = H5Lexists(_file_id, tmp.c_str(), H5P_DEFAULT);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Lexists");
+ if (not status) return false;
+ status = H5Oexists_by_name(_file_id, tmp.c_str(), H5P_DEFAULT);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Oexists_by_name");
+ if (not status) return false;
+ }
+ status = H5Aexists_by_name(_file_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Aexists_by_name");
+ if (status) return true;
+ // not an attribute: try to open as a dataset
+ hid_t ds_id = H5Dopen(_file_id, loc_full_name.c_str(), H5P_DEFAULT);
+ if (ds_id < 0) return false;
+ status = H5Dclose(ds_id);
+ if (status < 0) throw Exception(loc_full_name + ": error in H5Dclose");
+ return true;
+ }
+ /// Read attribute or dataset at address
+ template < typename Out_Data_Type, typename ...Args >
+ void read(const std::string& loc_full_name, Args&& ...args) const
+ {
+ assert(is_open());
+ assert(not loc_full_name.empty() and loc_full_name[0] == '/');
+ std::string loc_path;
+ std::string loc_name;
+ std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+ detail::Reader< Out_Data_Type >()(_file_id, loc_path, loc_name, std::forward< Args >(args)...);
+ }
+
+private:
+ std::string _file_name;
+ hid_t _file_id;
+
+ /// Split a full name into path and name
+ static std::pair< std::string, std::string > split_full_name(const std::string& full_name)
+ {
+ auto last_slash_pos = full_name.find_last_of('/');
+ std::string path = last_slash_pos != std::string::npos? full_name.substr(0, last_slash_pos + 1) : std::string();
+ std::string name = last_slash_pos != std::string::npos? full_name.substr(last_slash_pos + 1) : full_name;
+ return std::make_pair(path, name);
+ } // split_full_name
+}; // class File_Reader
+
+} // namespace hdf5_tools
+
+#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fast5.git
More information about the debian-med-commit
mailing list