[med-svn] [fast5] 01/03: New upstream version 0.6.2

Steffen Möller moeller at moszumanska.debian.org
Fri Sep 15 12:18:30 UTC 2017


This is an automated email from the git hooks/post-receive script.

moeller pushed a commit to branch master
in repository fast5.

commit 1a33a450a6cf3f504cc66a0a1fc6c8956df3cfa1
Author: Steffen Moeller <moeller at debian.org>
Date:   Fri Sep 15 13:45:50 2017 +0200

    New upstream version 0.6.2
---
 .travis.Dockerfile.in             |   17 +-
 .travis.yml                       |    2 +-
 .version_files                    |    2 +-
 README.org                        |   91 +-
 VERSION                           |    2 +-
 python/.gitignore                 |    1 +
 python/Makefile                   |   20 +-
 python/bin/f5ls                   |  249 ++++
 python/bin/f5pack                 |  240 ++++
 python/fast5/.version.py.in       |    1 -
 python/fast5/__init__.py          |   10 -
 python/fast5/fast5.pyx            |  523 +++++++
 python/fast5/source/fast5.cpp     |  182 ---
 python/fast5/version.py           |    1 -
 python/setup.py                   |   86 +-
 src/.fast5_version.hpp.in         |   16 +
 src/.gitignore                    |    1 +
 src/Bit_Packer.hpp                |  152 ++
 src/File_Packer.hpp               |  982 +++++++++++++
 src/Huffman_Packer.hpp            |  357 +++++
 src/Makefile                      |   42 +-
 src/cwmap.fast5_ed_len_1.inl      |  103 ++
 src/cwmap.fast5_ed_skip_1.inl     |    4 +
 src/cwmap.fast5_ev_move_1.inl     |    6 +
 src/cwmap.fast5_ev_rel_skip_1.inl |    4 +
 src/cwmap.fast5_fq_bp_1.inl       |    7 +
 src/cwmap.fast5_fq_qv_1.inl       |   35 +
 src/cwmap.fast5_rw_1.inl          |  204 +++
 src/f5-mod.cpp                    |   14 +-
 src/f5dump.cpp                    |   96 +-
 src/f5ls-full.cpp                 |   35 +-
 src/f5ls.cpp                      |   17 +-
 src/f5pack.cpp                    |  185 +++
 src/fast5.hpp                     | 2788 ++++++++++++++++++++++++++++---------
 src/fast5_version.hpp             |   16 +
 src/hdf5-mod.cpp                  |   12 +
 src/hdf5_tools.hpp                |  316 ++++-
 src/huffman-decode.cpp            |   55 +
 src/huffman-encode.cpp            |   44 +
 src/hufftk                        |  171 +++
 src/logger.hpp                    |  378 +++++
 src/tmp.cpp                       |    7 +
 42 files changed, 6340 insertions(+), 1134 deletions(-)

diff --git a/.travis.Dockerfile.in b/.travis.Dockerfile.in
index 18e40bb..d90ecb5 100644
--- a/.travis.Dockerfile.in
+++ b/.travis.Dockerfile.in
@@ -11,21 +11,22 @@ RUN apt-get update && \
     apt-get install -y \
         build-essential \
         libhdf5-dev \
-        libboost-python-dev \
-        python2.7-minimal \
-        python-setuptools \
-        python-virtualenv
+        libpython2.7-dev \
+        python2.7-minimal
+RUN curl https://bootstrap.pypa.io/get-pip.py | python - && \
+    pip install \
+        cython \
+        setuptools \
+        virtualenv
 
 # expose prerequisites settings
 ENV HDF5_INCLUDE_DIR=/usr/include/hdf5/serial
 ENV HDF5_LIB_DIR=/usr/lib/x86_64-linux-gnu/hdf5/serial
-ENV BOOST_INCLUDE_DIR=/usr/include
-ENV BOOST_LIB_DIR=/usr/lib/x86_64-linux-gnu
 
 # if necessary, specify compiler
 #RUN apt-get install -y g++-4.9 g++-5 g++-6
-#ENV CC=gcc-4.9
-#ENV CXX=g++-4.9
+#ENV CC=gcc-6
+#ENV CXX=g++-6
 
 # use host id
 RUN groupadd --gid ${GROUP_ID} ${GROUP_NAME}
diff --git a/.travis.yml b/.travis.yml
index 2d95ffb..b1bd9f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ before_install:
 
 install:
     - docker run --rm -v $PWD:/data fast5 make -C src -e
-    - docker run --rm -v $PWD:/data fast5 bash -c 'virtualenv build-venv && source build-venv/bin/activate && make -C python -e develop'
+    - docker run --rm -v $PWD:/data fast5 bash -c 'virtualenv build-venv --system-site-packages && source build-venv/bin/activate && make -C python -e develop'
 
 script:
     - docker run --rm -v $PWD:/data fast5 bash -c 'src/hdf5-mod -f file.000.fast5 && src/f5-mod file.000.fast5 && src/f5ls file.000.fast5 && src/f5ls-full file.000.fast5'
diff --git a/.version_files b/.version_files
index c822d11..3491213 100644
--- a/.version_files
+++ b/.version_files
@@ -1,2 +1,2 @@
 VERSION
-python/fast5/version.py
+src/fast5_version.hpp
diff --git a/README.org b/README.org
index 27a8903..bd589ba 100644
--- a/README.org
+++ b/README.org
@@ -4,91 +4,38 @@
 
 [[http://travis-ci.org/mateidavid/fast5][http://travis-ci.org/mateidavid/fast5.svg?branch=master]] [[https://tldrlegal.com/license/mit-license][http://img.shields.io/:license-mit-blue.svg]]
 
-A lightweight C++11 library to read raw signal data from Oxford Nanopore's Fast5 files.
+A lightweight C++ library for accessing Oxford Nanopore Technologies sequencing data.
 
-*** C++
+*** Installation
 
-**** Installation
+**** Core C++ Library
 
-This is a header-only library. You only need to copy [[file:src/fast5.hpp][src/fast5.hpp]] and [[file:src/hdf5_tools.hpp][src/hdf5_tools.hpp]] into your C++ project.
+The core library is written in header-only C++11, and it enables read-write access to fast5 files from C++ code.
 
-**** Usage
+The core library requires no installation, other than setting the compiler's include path to find [[file:src/fast5.hpp][fast5.hpp]]. See [[file:src/f5ls-full.cpp][f5ls-full.cpp]] for an example.
 
-See [[file:src/f5ls.cpp][src/f5ls.cpp]] for an example.
+The core library is built on top the HDF5 C API, so the compiler must also be able to find the HDF5 headers and libraries. See the project's Travis CI [[file:.travis.Dockerfile.in][Dockerfile]] for an example of how to install prerequisites on Debian Jessie.
 
-*** Python Wrapper
+**** Python Wrapper
 
-An optional python wrapper for this library is available through Boost.Python. The wrapper currently implements only read-only access.
+The Python wrapper for the core library enables read-only access to fast5 files from Python code. The wrapper also adds several Python scripts:
 
-**** Installation
+- [[file:python/bin/f5ls][f5ls]] :: Summarize contents of fast5 files.
+- [[file:python/bin/f5pack][f5pack]] :: Pack and unpack fast5 files. For a detailed description of this tool, see our [[http://simpsonlab.github.io/2017/02/27/packing_fast5/][blog post]].
 
-#+BEGIN_EXAMPLE
-cd python
-HDF5_DIR=/usr/local BOOST_DIR=/usr/local make develop-user
-#+END_EXAMPLE
-
-Notes:
-
-- HDF5 and Boost.Python must be available, and their locations can be passed on to the Python setup process using the environment variables =HDF5_DIR= and =BOOST_DIR=. Alternatively, the respective include directories, library directories, and library names may be specified explicitly with: =HDF5_INCLUDE_DIR=, =HDF5_LIB_DIR=, =HDF5_LIB=, =BOOST_INCLUDE_DIR=, =BOOST_LIB_DIR=, =BOOST_PYTHON_LIB=. For details, see [[file:python/setup.py][python/setup.py]] and [[file:.travis.yml][.travis.yml]].
-
-- To install =fast5= as a package in a virtualenv, use the target =develop=. To install as a user package, use the target =develop-user=. For details, see [[file:python/Makefile][python/Makefile]].
-
-**** Usage
+The Python wrapper also depends on Cython. To build the Python wrapper:
 
 #+BEGIN_EXAMPLE
-import fast5
-f = fast5.File("file.000.fast5")
-print(f.file_version())
-print(f.have_eventdetection_events())
+# set paths to HDF5
+export HDF5_INCLUDE_DIR=/path/to/hdf5.h
+export HDF5_LIB_DIR=/path/to/libhdf5.so
+
+# either
+make -C python develop         # to install in a virtualenv
+# or
+make -C python develop-user    # to install in user mode
 #+END_EXAMPLE
 
-*** f5dump
-
-The program =f5dump= can be used to list and extract some of the contents of =fast5= files, including: raw signals, event-detection events, basecall events, and basecall fastq.
-
-**** Installation
-
-In addition to this =fast5= repository, you will need HDF5 (headers and libraries), as well the the header-only libraries [[https://github.com/mateidavid/tclap.git][TCLAP]] and [[https://github.com/mateidavid/hpptools.git][HPPTOOLS]]. To build =f5dump=, run =make f5dump [VAR1=VALUE1] ...=, where =VAR=-s are used to instruct the [[file:src/Makefile][Makefile]] where to find various dependencies.
-
-**** Usage
-
-In each run, =f5dump= requires exactly one command among: =--ls/--id/--rw/--ed/--ev/--fq=. If no command is given, =--ls= is assumed. It also requires exactly one =fast5= file to inspect.
-
-- In =--ls= mode, =f5dump= lists some of the contents of the file. Sample output:
-
-  #+BEGIN_EXAMPLE
-rw      Read_1019
-ed      000     Read_1019
-ed      001     Read_1019
-bc2d    2D_000  2       1       1       1D_000
-bc1d    1D_000  0       1       1       001
-bc1d    1D_000  1       1       1       001
-#+END_EXAMPLE
-
-  Explanations:
-
-  - =rw=: the file contains raw samples from one read, =Read_1019=.
-
-  - =ed=: the file contains 2 event-detection groups, =000= and =001=, both run on raw samples from =Read_1019=.
-
-  - =bc2d=: the file contains 1 basecall group =2D_000= with 2D data (=2=); this group has both fastq data and events (=1 1=); its corresponding 1D basecall group is =1D_000=.
-
-  - =bc1d=: the file contains 1 basecall group =1D_000= with 1D data for each strand (=0= and =1=); each contains fastq data and events (=1 1=); its corresponding event-detection group is =001=.
-
-  Notes:
-
-  - The group names are suffixes understood by the =fast5= library. E.g., the basecall group =RNN_1D_000= would correspond to the HDF5 group =/Analyses/Basecall_RNN_1D_000=.
-
-  - Not all the links between groups are always available. Notably, some =fast5= files are missing the link between a 1D basecall group and its original event-detection group.
-
-- In =--id= mode, =f5dump= dumps =channel_id= and =tracking_id= metadata.
-
-- In =--rw/--ed/--ev/--fq= mode, =f5dump= dumps: raw signal data/event-detection events/basecall events/basecall fastq data.
-
-- Optional selector flags =--gr/--st/--rn= can be used to specify a group name, strand (=0/1/2=), or read name. Not all combinations make sense: e.g, =--st= is ignored for event-detection data.
-
-- Optional output flags =--time-int/--curr-int/--rw-time= can modify the output: convert times into integers, dump raw signal currents in internal integer encoding, and add time stamps to raw signals.
-
 *** License
 
 [[file:LICENSE][MIT License]].
diff --git a/VERSION b/VERSION
index 416bfb0..b616048 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-0.5.9
+0.6.2
diff --git a/python/.gitignore b/python/.gitignore
index 44dafa0..f227ead 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -1,3 +1,4 @@
+fast5.cpp
 build/
 dist/
 *.egg-info/
diff --git a/python/Makefile b/python/Makefile
index 8c4047e..e3fc9b8 100755
--- a/python/Makefile
+++ b/python/Makefile
@@ -1,3 +1,10 @@
+#
+# Part of: https://github.com/mateidavid/fast5
+#
+# (c) 2017: Matei David, Ontario Institute for Cancer Research
+# MIT License
+#
+
 .SUFFIXES:
 MAKEFLAGS += -r
 SHELL := /bin/bash
@@ -16,16 +23,21 @@ help: ## This help.
 
 clean: ## Remove build products
 	${PYTHON} setup.py clean
-	rm -rf fast5.egg-info build dist
-	find fast5/ \( -name '*.pyc' -o -name '*.so' \) -delete
+	rm -rf build dist fast5.egg-info fast5*.so
 
 check_virtualenv:
 	@[ "$$VIRTUAL_ENV" ] || { echo "not in a virtualenv" >&2; exit 1; }
 
-develop: check_virtualenv clean ## Install in develop mode to current virtualenv
+install: check_virtualenv ## Install to current virtualenv
+	${PYTHON} setup.py install
+
+install-user: ## Install to current user
+	${PYTHON} setup.py install --user
+
+develop: check_virtualenv ## Install in develop mode to current virtualenv
 	${PYTHON} setup.py develop
 
-develop-user: clean ## Install in develop mode to current user
+develop-user: ## Install in develop mode to current user
 	${PYTHON} setup.py develop --user
 
 develop-uninstall: check_virtualenv clean ## Uninstall from current virtualenv
diff --git a/python/bin/f5ls b/python/bin/f5ls
new file mode 100755
index 0000000..14e78fc
--- /dev/null
+++ b/python/bin/f5ls
@@ -0,0 +1,249 @@
+#!/usr/bin/env python
+
+#
+# Part of: https://github.com/mateidavid/fast5
+#
+# (c) 2017: Matei David, Ontario Institute for Cancer Research
+# MIT License
+#
+
+import argparse
+import datetime
+import dateutil.parser
+import logging
+import math
+import os
+import sys
+
+import fast5
+
+import signal
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+def add_fast5(fn, rel_dn, args):
+    logger.info("adding fast5 fn=" + fn + " rel_dn=" + rel_dn)
+    return [fn]
+
+def add_dir(dn, args):
+    l = list()
+    logger.info("processing dir dn=" + dn)
+    for t in os.walk(dn):
+        rel_dn = os.path.relpath(t[0], dn)
+        for rel_fn in t[2]:
+            fn = os.path.join(t[0], rel_fn)
+            if fast5.File.is_valid_file(fn):
+                l += add_fast5(fn, rel_dn, args)
+        if not args.recurse:
+            break
+    return l
+
+def add_fofn(fn, args):
+    l = list()
+    logger.info("processing fofn fn=" + fn)
+    if fn != "-":
+        f = open(fn)
+    else:
+        f = sys.stdin
+    for p in f:
+        p = p.strip()
+        if fast5.File.is_valid_file(p):
+            l += add_fast5(p, "", args)
+        else:
+            logger.warning("fofn line not a fast5 file: " + p)
+    if fn != "-":
+        f.close()
+    return l
+
+def add_paths(pl, args):
+    l = list()
+    if len(pl) == 0:
+        pl.append("-")
+    for p in pl:
+        if os.path.isdir(p):
+            l += add_dir(p, args)
+        elif fast5.File.is_valid_file(p):
+            l += add_fast5(p, "", args)
+        else:
+            l += add_fofn(p, args)
+    return l
+
+def stat_file(ifn, args):
+    d = dict()
+    try:
+        f = fast5.File(ifn)
+        # cid params
+        d["cid"] = f.get_channel_id_params()
+        d["tid"] = f.get_tracking_id_params()
+        # raw samples
+        d["rs_rn_l"] = f.get_raw_samples_read_name_list()
+        d["rs"] = dict()
+        for rn in d["rs_rn_l"]:
+            d["rs"][rn] = dict()
+            d["rs"][rn]["params"] = f.get_raw_samples_params(rn)
+            d["rs"][rn]["packed"] = not f.have_raw_samples_unpack(rn)
+        # basecall groups
+        d["bc_gr_l"] = f.get_basecall_group_list()
+        d["bc"] = dict()
+        d["bc_desc"] = dict()
+        d["bc_summary"] = dict()
+        for gr in d["bc_gr_l"]:
+            d["bc"][gr] = dict()
+            d["bc"][gr]["desc"] = f.get_basecall_group_description(gr)
+            d["bc"][gr]["summary"] = f.get_basecall_summary(gr)
+            d["bc"][gr]["start"] = dict()
+            d["bc"][gr]["length"] = dict()
+            d["bc"][gr]["count"] = dict()
+            d["bc"][gr]["packed_fastq"] = dict()
+            d["bc"][gr]["packed_events"] = dict()
+            for st in [0, 1, 2]:
+                d["bc"][gr]["packed_fastq"][st] = not f.have_basecall_fastq_unpack(st, gr)
+                if st < 2:
+                    d["bc"][gr]["packed_events"][st] = not f.have_basecall_events_unpack(st, gr)
+                    if d["bc"][gr]["desc"]["have_events"][st]:
+                        ev_params = f.get_basecall_events_params(st, gr)
+                        d["bc"][gr]["start"][st] = ev_params["start_time"]
+                        d["bc"][gr]["length"][st] = ev_params["duration"]
+                        if d["bc"][gr]["start"][st] < 1e-3:
+                            d["bc"][gr]["start"][st] = float('nan')
+                        if d["bc"][gr]["length"][st] < 1e-3:
+                            d["bc"][gr]["length"][st] = float('nan')
+                        if False:
+                            e = f.get_basecall_events(st, gr)
+                            d["bc"][gr]["start"][st] = e[0]["start"]
+                            d["bc"][gr]["length"][st] = e[-1]["start"] + e[-1]["length"] - e[0]["start"]
+                            d["bc"][gr]["count"][st] = len(e)
+                else:
+                    d["bc"][gr]["packed_alignment"] = not f.have_basecall_alignment_unpack(gr)
+        # eventdetection groups
+        d["ed_gr_l"] = f.get_eventdetection_group_list()
+        d["ed"] = dict()
+        for gr in d["ed_gr_l"]:
+            d["ed"][gr] = dict()
+            d["ed"][gr]["rn_l"] = f.get_eventdetection_read_name_list(gr)
+            d["ed"][gr]["rn"] = dict()
+            for rn in d["ed"][gr]["rn_l"]:
+                d["ed"][gr]["rn"][rn] = dict()
+                d["ed"][gr]["rn"][rn]["packed"] = not f.have_eventdetection_events_unpack(gr, rn)
+    except RuntimeError as e:
+        d = dict()
+    return d
+
+def as_time(v, r):
+    if math.isnan(v):
+        return 'nan'
+    x = float(v)/r
+    m, s = divmod(x, 60)
+    h, m = divmod(m, 60)
+    return "%d:%02d:%02d.%03d" % (h, m, s, (x * 1000) % 1000)
+
+def print_path(p, v, args):
+    if type(v) == list:
+        print(args.delim[1].join(str(e) for e in p) + args.delim[0] + args.delim[1].join(str(e) for e in v))
+    else:
+        print(args.delim[1].join(str(e) for e in p) + args.delim[0] + str(v))
+
+def list_file(ifn, include_fn, args):
+    d = stat_file(ifn, args)
+    if include_fn:
+        print_path(["file"], ifn, args)
+    if "cid" not in d:
+        return
+    # tid
+    for k in ["device_id", "asic_id", "flow_cell_id", "exp_script_purpose"]:
+        if k not in d["tid"]:
+            continue
+        print_path(["tid", k], d["tid"][k], args)
+    if "exp_start_time" in d["tid"]:
+        if 'T' in d["tid"]["exp_start_time"]:
+            exp_start_time = dateutil.parser.parse(d["tid"]["exp_start_time"])
+        else:
+            exp_start_time = datetime.datetime.fromtimestamp(int(d["tid"]["exp_start_time"]))
+        print_path(["tid", "exp_start_date"], exp_start_time.date().isoformat(), args)
+        print_path(["tid", "exp_start_time"], exp_start_time.time().isoformat(), args)
+    # cid
+    for k in ["channel_number", "sampling_rate"]:
+        if k not in d["cid"]:
+            continue
+        print_path(["cid", k], d["cid"][k], args)
+    sampling_rate = d["cid"]["sampling_rate"]
+    # rs
+    for rn in d["rs_rn_l"]:
+        print_path(["rs", rn, "packed"], int(d["rs"][rn]["packed"]), args)
+        print_path(["rs", rn, "read_number"], d["rs"][rn]["params"]["read_number"], args)
+        print_path(["rs", rn, "read_id"], d["rs"][rn]["params"]["read_id"], args)
+        print_path(["rs", rn, "start"], as_time(d["rs"][rn]["params"]["start_time"], sampling_rate), args)
+        print_path(["rs", rn, "length"], as_time(d["rs"][rn]["params"]["duration"], sampling_rate), args)
+    # bc
+    for gr in d["bc_gr_l"]:
+        print_path(["bc", gr, "id"], d["bc"][gr]["desc"]["name"] + ":" + d["bc"][gr]["desc"]["version"], args)
+        for st in [0, 1, 2]:
+            if not d["bc"][gr]["desc"]["have_subgroup"][st]:
+                continue
+            # fastq
+            fq_len = 0
+            print_path(["bc", gr, st, "fastq", "packed"], int(d["bc"][gr]["packed_fastq"][st]), args)
+            if d["bc"][gr]["desc"]["have_fastq"][st]:
+                for k in ["sequence_length", "mean_qscore"]:
+                    fk = ["basecall_1d_template", "basecall_1d_complement", "basecall_2d"][st] + "/" + k
+                    if fk not in d["bc"][gr]["summary"]:
+                        continue
+                    print_path(["bc", gr, st, "fastq", k], d["bc"][gr]["summary"][fk], args)
+                    if k == "sequence_length":
+                        fq_len = d["bc"][gr]["summary"][fk]
+            if st < 2:
+                # events
+                print_path(["bc", gr, st, "events", "packed"], int(d["bc"][gr]["packed_events"][st]), args)
+                if d["bc"][gr]["desc"]["have_events"][st]:
+                    for k in ["start", "length"]:
+                        print_path(["bc", gr, st, "events", k], as_time(float(d["bc"][gr][k][st]), 1.0), args)
+                    if st in d["bc"][gr]["count"]:
+                        print_path(["bc", gr, st, "events", "count"], d["bc"][gr]["count"][st], args)
+                    print_path(["bc", gr, st, "bps"], "%.2f" % (float(fq_len) / d["bc"][gr]["length"][st]), args)
+                # model
+                print_path(["bc", gr, st, "model"], int(d["bc"][gr]["desc"]["have_model"][st]), args)
+            else:
+                print_path(["bc", gr, st, "alignment", "packed"], int(d["bc"][gr]["packed_alignment"]), args)
+        if d["bc"][gr]["desc"]["have_subgroup"][2]:
+            print_path(["bc", gr, "bc_1d_gr"], d["bc"][gr]["desc"]["bc_1d_gr"], args)
+        if d["bc"][gr]["desc"]["have_subgroup"][0] or d["bc"][gr]["desc"]["have_subgroup"][1]:
+            print_path(["bc", gr, "ed_gr"], d["bc"][gr]["desc"]["ed_gr"], args)
+    # ed
+    for gr in d["ed_gr_l"]:
+        for rn in d["ed"][gr]["rn_l"]:
+            print_path(["ed", gr, rn, "packed"], int(d["ed"][gr]["rn"][rn]["packed"]), args)
+
+
+if __name__ == "__main__":
+    description = """
+    Summarize contents of ONT fast5 files.
+    """
+    parser = argparse.ArgumentParser(description=description, epilog="")
+    parser.add_argument("--log-level", default="warning",
+                        help="log level")
+    #
+    parser.add_argument("--delim", default="\t/",
+                        help="Delimiters list; first char used between path and value, second char used between path elements.")
+    parser.add_argument("-R", "--recurse", action="store_true",
+                        help="Recurse in input directories.")
+    #
+    parser.add_argument("inputs", nargs="*", default=[], action="append",
+                        help="Input directories, fast5 files, or files of fast5 file names.")
+    args = parser.parse_args()
+
+    numeric_log_level = getattr(logging, args.log_level.upper(), None)
+    if not isinstance(numeric_log_level, int):
+        raise ValueError("Invalid log level: '%s'" % args.log_level)
+    logging.basicConfig(level=numeric_log_level,
+                        format="%(asctime)s %(name)s.%(levelname)s %(message)s",
+                        datefmt="%Y/%m/%d %H:%M:%S")
+    logger = logging.getLogger(os.path.basename(__file__))
+    fast5.Logger.set_levels_from_options([args.log_level.lower()])
+    # fix delim
+    args.delim = list(args.delim)
+    while len(args.delim) < 2:
+        args.delim.append("")
+    logger.debug("args: " + str(args))
+
+    fl = add_paths(args.inputs[0], args)
+    for ifn in fl:
+        list_file(ifn, len(fl) > 1, args)
diff --git a/python/bin/f5pack b/python/bin/f5pack
new file mode 100755
index 0000000..e31bb90
--- /dev/null
+++ b/python/bin/f5pack
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+
+#
+# Part of: https://github.com/mateidavid/fast5
+#
+# (c) 2017: Matei David, Ontario Institute for Cancer Research
+# MIT License
+#
+
+import argparse
+import logging
+import os
+import sys
+
+import fast5
+
+import signal
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+policy_d = {
+    "drop": 0,
+    "pack": 1,
+    "unpack": 2,
+    "copy": 3,
+}
+
+def add_fast5(fn, rel_dn, args):
+    logger.info("adding fast5 fn=" + fn + " rel_dn=" + rel_dn)
+    return [(fn, os.path.normpath(os.path.join(args.output, rel_dn, os.path.basename(fn))))]
+
+def add_dir(dn, args):
+    l = list()
+    logger.info("processing dir dn=" + dn)
+    for t in os.walk(dn):
+        rel_dn = os.path.relpath(t[0], dn)
+        for rel_fn in t[2]:
+            fn = os.path.join(t[0], rel_fn)
+            if fast5.File.is_valid_file(fn):
+                l += add_fast5(fn, rel_dn, args)
+        if not args.recurse:
+            break
+    return l
+
+def add_fofn(fn, args):
+    l = list()
+    logger.info("processing fofn fn=" + fn)
+    if fn != "-":
+        f = open(fn)
+    else:
+        f = sys.stdin
+    for p in f:
+        p = p.strip()
+        if fast5.File.is_valid_file(p):
+            l += add_fast5(p, "", args)
+        else:
+            logger.warning("fofn line not a fast5 file: " + p)
+    if fn != "-":
+        f.close()
+    return l
+
+def add_paths(pl, args):
+    l = list()
+    if len(pl) == 0:
+        pl.append("-")
+    for p in pl:
+        if os.path.isdir(p):
+            l += add_dir(p, args)
+        elif fast5.File.is_valid_file(p):
+            l += add_fast5(p, "", args)
+        else:
+            l += add_fofn(p, args)
+    return l
+
+if __name__ == "__main__":
+    description = """
+    Pack and unpack ONT fast5 files.
+    """
+    parser = argparse.ArgumentParser(description=description, epilog="")
+    parser.add_argument("--log", default="warning",
+                        help="log level")
+    #
+    parser.add_argument("--pack", action="store_true",
+                        help="Pack data (default).")
+    parser.add_argument("--unpack", action="store_true",
+                        help="Unpack data.")
+    parser.add_argument("--archive", action="store_true",
+                        help="Pack raw samples data, drop rest.")
+    parser.add_argument("--fastq", action="store_true",
+                        help="Pack fastq data, drop rest.")
+    #
+    parser.add_argument("--rs", choices=["drop", "pack", "unpack", "copy"],
+                        help="Policy for raw samples.")
+    parser.add_argument("--ed", choices=["drop", "pack", "unpack", "copy"],
+                        help="Policy for eventdetection events.")
+    parser.add_argument("--fq", choices=["drop", "pack", "unpack", "copy"],
+                        help="Policy for fastq.")
+    parser.add_argument("--ev", choices=["drop", "pack", "unpack", "copy"],
+                        help="Policy for basecall events.")
+    parser.add_argument("--al", choices=["drop", "pack", "unpack", "copy"],
+                        help="Policy for basecall alignment.")
+    #
+    parser.add_argument("--force", action="store_true",
+                        help="Overwrite existing destination files.")
+    parser.add_argument("--qv-bits", type=int,
+                        help="QV bits to keep.")
+    parser.add_argument("--p-model-state-bits", type=int,
+                        help="p_model_state bits to keep.")
+    parser.add_argument("-R", "--recurse", action="store_true",
+                        help="Recurse in input directories.")
+    parser.add_argument("-o", "--output", required=True,
+                        help="Output directory.")
+    #
+    parser.add_argument("inputs", nargs="*", default=[], action="append",
+                        help="Input directories, fast5 files, or files of fast5 file names. For input directories, the subdirectory hierarchy (if traversed with --recurse) is recreated in the output directory.")
+    args = parser.parse_args()
+
+    numeric_log_level = getattr(logging, args.log.upper(), None)
+    if not isinstance(numeric_log_level, int):
+        raise ValueError("Invalid log level: '%s'" % args.log)
+    logging.basicConfig(level=numeric_log_level,
+                        format="%(asctime)s %(name)s.%(levelname)s %(message)s",
+                        datefmt="%Y/%m/%d %H:%M:%S")
+    logger = logging.getLogger(os.path.basename(__file__))
+    fast5.Logger.set_levels_from_options([args.log.lower()])
+    logger.debug("args: " + str(args))
+
+    if args.pack + args.unpack + args.archive + args.fastq > 1:
+        sys.exit("At most one of --pack/--unpack/--archive/--fastq may be specified")
+    if (not args.pack and
+        not args.unpack and
+        not args.archive and
+        not args.fastq and
+        args.rs is None and
+        args.ed is None and
+        args.fq is None and
+        args.ev is None and
+        args.al is None):
+        args.pack = True
+    if args.pack:
+        if args.rs is None: args.rs = "pack"
+        if args.ed is None: args.ed = "pack"
+        if args.fq is None: args.fq = "pack"
+        if args.ev is None: args.ev = "pack"
+        if args.al is None: args.al = "pack"
+    if args.unpack:
+        if args.rs is None: args.rs = "unpack"
+        if args.ed is None: args.ed = "unpack"
+        if args.fq is None: args.fq = "unpack"
+        if args.ev is None: args.ev = "unpack"
+        if args.al is None: args.al = "unpack"
+    if args.archive:
+        if args.rs is None: args.rs = "pack"
+        if args.ed is None: args.ed = "drop"
+        if args.fq is None: args.fq = "drop"
+        if args.ev is None: args.ev = "drop"
+        if args.al is None: args.al = "drop"
+    if args.fastq:
+        if args.rs is None: args.rs = "drop"
+        if args.ed is None: args.ed = "drop"
+        if args.fq is None: args.fq = "pack"
+        if args.ev is None: args.ev = "drop"
+        if args.al is None: args.al = "drop"
+    if args.rs is None: args.rs = "drop"
+    if args.ed is None: args.ed = "drop"
+    if args.fq is None: args.fq = "drop"
+    if args.ev is None: args.ev = "drop"
+    if args.al is None: args.al = "drop"
+    logger.info("rs: " + args.rs)
+    logger.info("ed: " + args.ed)
+    logger.info("fq: " + args.fq)
+    logger.info("ev: " + args.ev)
+    logger.info("al: " + args.al)
+    fp = fast5.File_Packer(
+        policy_d[args.rs],
+        policy_d[args.ed],
+        policy_d[args.fq],
+        policy_d[args.ev],
+        policy_d[args.al],
+    )
+    if args.force: fp.set_force(True)
+    if args.qv_bits: fp.set_qv_bits(args.qv_bits)
+    if args.p_model_state_bits: fp.set_p_model_state_bits(args.p_model_state_bits)
+    fl = add_paths(args.inputs[0], args)
+    errored_files_cnt = 0
+    input_bytes = 0
+    output_bytes = 0
+    for t in fl:
+        ifn = t[0]
+        ofn = t[1]
+        odn = os.path.dirname(t[1])
+        if not os.path.isdir(odn):
+            os.makedirs(odn)
+        logger.info("packing ifn=" + ifn + " ofn=" + ofn)
+        try:
+            fp.run(ifn, ofn)
+        except RuntimeError as e:
+            logger.warning("error packing " + ifn + ": " + str(e))
+            os.remove(ofn)
+            errored_files_cnt += 1
+            continue
+        input_bytes += os.stat(ifn).st_size
+        output_bytes += os.stat(ofn).st_size
+
+    cnt = fp.get_counts()
+    cnt_total_bits = dict()
+    output_ds_bytes = 0
+    print("bp_seq_count\t%d" % cnt["bp_seq_count"])
+    if cnt["bp_seq_count"] == 0:
+        cnt["bp_seq_count"] = float('nan')
+    for cl in [["rs_count", "rs_bits"],
+               ["ed_count", "ed_skip_bits", "ed_len_bits"],
+               ["fq_count", "fq_bp_bits", "fq_qv_bits"],
+               ["ev_count", "ev_rel_skip_bits", "ev_skip_bits", "ev_len_bits", "ev_move_bits", "ev_p_model_state_bits"],
+               ["al_count", "al_template_step_bits", "al_complement_step_bits", "al_move_bits"]]:
+        cnt_total_bits[cl[0]] = 0
+        if cnt[cl[0]] == 0:
+            continue
+        print(cl[0] + "\t%d" % cnt[cl[0]])
+        for c in cl[1:]:
+            cnt_total_bits[cl[0]] += cnt[c]
+            if cnt[c] == 0:
+                continue
+            print((c + "\t%d\t%.2f\t%.2f") % (cnt[c], float(cnt[c]) / cnt[cl[0]], float(cnt[c])/cnt["bp_seq_count"]))
+        output_ds_bytes += cnt_total_bits[cl[0]] / 8
+        print(cl[0].split('_')[0] + "_total_bits\t%d\t%.2f\t%.2f" % (cnt_total_bits[cl[0]], float(cnt_total_bits[cl[0]])/cnt[cl[0]], float(cnt_total_bits[cl[0]])/cnt["bp_seq_count"]))
+
+    if cnt["rs_total_duration"] > .001 and cnt["rs_called_duration"] > .001:
+        print("rs_total_duration\t%.2f" % cnt["rs_total_duration"])
+        print("rs_called_duration\t%.2f" % cnt["rs_called_duration"])
+        print("rs_frac_called\t%.2f" % (cnt["rs_called_duration"] / cnt["rs_total_duration"]))
+        print("bp_per_sec\t%.2f" % (float(cnt["bp_seq_count"]) / cnt["rs_called_duration"]))
+    print("input_bytes\t%d" % input_bytes)
+    print("output_bytes\t%d" % output_bytes)
+    print("output_overhead_bytes\t%d" % (output_bytes - output_ds_bytes))
+
+    print("processed_files\t%d" % len(fl))
+    if errored_files_cnt > 0:
+        print("errored_files\t%d" % errored_files_cnt)
+
+    sys.exit(errored_files_cnt > 0)
diff --git a/python/fast5/.version.py.in b/python/fast5/.version.py.in
deleted file mode 100644
index d8ed4d2..0000000
--- a/python/fast5/.version.py.in
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = '${VERSION}'
diff --git a/python/fast5/__init__.py b/python/fast5/__init__.py
deleted file mode 100755
index 14e4b5d..0000000
--- a/python/fast5/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-"""
-fast5.__init__.py
-(c) 2016: Matei David, Ontario Institute for Cancer Research
-MIT License
-"""
-
-from .version import __version__
-from fast5 import *
-
-__version_info__ = tuple([int(num) for num in __version__.split('.')]) 
diff --git a/python/fast5/fast5.pyx b/python/fast5/fast5.pyx
new file mode 100644
index 0000000..aa64764
--- /dev/null
+++ b/python/fast5/fast5.pyx
@@ -0,0 +1,523 @@
+#
+# Part of: https://github.com/mateidavid/fast5
+#
+# (c) 2017: Matei David, Ontario Institute for Cancer Research
+# MIT License
+#
+
+from cython.operator cimport dereference as deref
+
+from libc.stdint cimport int16_t
+from libcpp cimport bool
+from libcpp.map cimport map as cmap
+from libcpp.memory cimport unique_ptr
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+
+cdef extern from "fast5.hpp" namespace "fast5":
+
+    cdef string cpp_version "fast5::version"
+
+    cppclass Cpp_Logger "logger::Logger":
+        @staticmethod
+        void set_levels_from_options(vector[string]) except +
+
+    ctypedef cmap[string, string] Attr_Map
+
+    struct Channel_Id_Params:
+        string channel_number
+        double digitisation
+        double offset
+        double range
+        double sampling_rate
+
+    ctypedef Attr_Map Tracking_Id_Params
+
+    ctypedef Attr_Map Sequences_Params
+
+    struct Raw_Samples_Params:
+        string read_id
+        long long read_number
+        long long start_mux
+        long long start_time
+        long long duration
+
+    ctypedef float Raw_Sample
+
+    ctypedef int16_t Raw_Int_Sample
+
+    struct EventDetection_Events_Params:
+        string read_id
+        long long read_number
+        long long scaling_used
+        long long start_mux
+        long long start_time
+        long long duration
+        double median_before
+        unsigned abasic_found
+
+    struct EventDetection_Event:
+        double mean
+        double stdv
+        long long start
+        long long length
+
+    struct Basecall_Model_Params:
+        double scale
+        double shift
+        double drift
+        double var
+        double scale_sd
+        double var_sd
+
+    struct Basecall_Model_State:
+        double level_mean
+        double level_stdv
+        double sd_mean
+        double sd_stdv
+        #char kmer[8]
+
+    struct Basecall_Events_Params:
+        double start_time
+        double duration
+
+    struct Basecall_Event:
+        double mean
+        double stdv
+        double start
+        double length
+        double p_model_state
+        long long move
+        #char model_state[8]
+
+    struct Basecall_Alignment_Entry:
+        long long template_index
+        long long complement_index
+        #char kmer[8]
+
+    struct Basecall_Group_Description:
+        string name
+        string version
+        string ed_gr
+        string bc_1d_gr
+        bool have_subgroup[3]
+        bool have_fastq[3]
+        bool have_events[3]
+        bool have_model[2]
+        bool have_alignment
+
+    cppclass Cpp_File "fast5::File":
+        Cpp_File() except +
+        Cpp_File(string) except +
+        Cpp_File(string, bool) except +
+
+        bool is_open()
+        bool is_rw()
+        string file_name()
+        void open(string) except +
+        void open(string, bool) except +
+        void create(string) except +
+        void create(string, bool) except +
+        void close() except +
+        @staticmethod
+        bool is_valid_file(string)
+
+        string file_version() except +
+
+        bool have_channel_id_params()
+        Channel_Id_Params get_channel_id_params()
+        bool have_sampling_rate()
+        double get_sampling_rate()
+
+        bool have_tracking_id_params()
+        Tracking_Id_Params get_tracking_id_params() except +
+
+        bool have_sequences_params()
+        Sequences_Params get_sequences_params() except +
+
+        vector[string] get_raw_samples_read_name_list()
+        bool have_raw_samples()
+        bool have_raw_samples(string)
+        bool have_raw_samples_unpack(string) except +
+        bool have_raw_samples_pack(string) except +
+        Raw_Samples_Params get_raw_samples_params() except +
+        Raw_Samples_Params get_raw_samples_params(string) except +
+        vector[Raw_Int_Sample] get_raw_int_samples() except +
+        vector[Raw_Int_Sample] get_raw_int_samples(string) except +
+        vector[Raw_Sample] get_raw_samples() except +
+        vector[Raw_Sample] get_raw_samples(string) except +
+
+        vector[string] get_eventdetection_group_list()
+        bool have_eventdetection_group()
+        bool have_eventdetection_group(string)
+        vector[string] get_eventdetection_read_name_list()
+        vector[string] get_eventdetection_read_name_list(string)
+        bool have_eventdetection_events()
+        bool have_eventdetection_events(string)
+        bool have_eventdetection_events(string, string)
+        bool have_eventdetection_events_unpack(string, string) except +
+        bool have_eventdetection_events_pack(string, string) except +
+        Attr_Map get_eventdetection_params() except +
+        Attr_Map get_eventdetection_params(string) except +
+        EventDetection_Events_Params get_eventdetection_events_params() except +
+        EventDetection_Events_Params get_eventdetection_events_params(string) except +
+        EventDetection_Events_Params get_eventdetection_events_params(string, string) except +
+        vector[EventDetection_Event] get_eventdetection_events() except +
+        vector[EventDetection_Event] get_eventdetection_events(string) except +
+        vector[EventDetection_Event] get_eventdetection_events(string, string) except +
+
+        vector[string] get_basecall_group_list()
+        bool have_basecall_group()
+        bool have_basecall_group(string)
+        vector[string] get_basecall_strand_group_list(unsigned)
+        bool have_basecall_strand_group(unsigned)
+        bool have_basecall_strand_group(unsigned, string)
+        Basecall_Group_Description get_basecall_group_description(string) except +
+        string get_basecall_1d_group(string)
+        string get_basecall_eventdetection_group(string)
+        Attr_Map get_basecall_params(string) except +
+        bool have_basecall_log(string)
+        string get_basecall_log(string) except +
+        Attr_Map get_basecall_config(string) except +
+        Attr_Map get_basecall_summary(string) except +
+
+        bool have_basecall_fastq(unsigned)
+        bool have_basecall_fastq(unsigned, string)
+        bool have_basecall_fastq_unpack(unsigned, string) except +
+        bool have_basecall_fastq_pack(unsigned, string) except +
+        string get_basecall_fastq(unsigned) except +
+        string get_basecall_fastq(unsigned, string) except +
+        bool have_basecall_seq(unsigned)
+        bool have_basecall_seq(unsigned, string)
+        string get_basecall_seq(unsigned) except +
+        string get_basecall_seq(unsigned, string) except +
+
+        bool have_basecall_model(unsigned)
+        bool have_basecall_model(unsigned, string)
+        string get_basecall_model_file(unsigned) except +
+        string get_basecall_model_file(unsigned, string) except +
+        Basecall_Model_Params get_basecall_model_params(unsigned) except +
+        Basecall_Model_Params get_basecall_model_params(unsigned, string) except +
+        vector[Basecall_Model_State] get_basecall_model(unsigned) except +
+        vector[Basecall_Model_State] get_basecall_model(unsigned, string) except +
+
+        bool have_basecall_events(unsigned)
+        bool have_basecall_events(unsigned, string)
+        bool have_basecall_events_unpack(unsigned, string) except +
+        bool have_basecall_events_pack(unsigned, string) except +
+        Basecall_Events_Params get_basecall_events_params(unsigned) except +
+        Basecall_Events_Params get_basecall_events_params(unsigned, string) except +
+        vector[Basecall_Event] get_basecall_events(unsigned) except +
+        vector[Basecall_Event] get_basecall_events(unsigned, string) except +
+
+        bool have_basecall_alignment()
+        bool have_basecall_alignment(string)
+        bool have_basecall_alignment_unpack(string) except +
+        bool have_basecall_alignment_pack(string) except +
+        vector[Basecall_Alignment_Entry] get_basecall_alignment() except +
+        vector[Basecall_Alignment_Entry] get_basecall_alignment(string) except +
+
+cdef extern from "File_Packer.hpp" namespace "fast5":
+
+    struct Counts "fast5::File_Packer::Counts":
+        size_t rs_count
+        size_t rs_bits
+        size_t ed_count
+        size_t ed_skip_bits
+        size_t ed_len_bits
+        size_t fq_count
+        size_t bp_seq_count
+        size_t fq_bp_bits
+        size_t fq_qv_bits
+        size_t ev_count
+        size_t ev_rel_skip_bits
+        size_t ev_skip_bits
+        size_t ev_len_bits
+        size_t ev_move_bits
+        size_t ev_p_model_state_bits
+        size_t al_count
+        size_t al_template_step_bits
+        size_t al_complement_step_bits
+        size_t al_move_bits
+        double rs_total_duration
+        double rs_called_duration
+
+    cppclass Cpp_File_Packer "fast5::File_Packer":
+
+        Cpp_File_Packer()
+        Cpp_File_Packer(int)
+        Cpp_File_Packer(int, int, int, int, int)
+
+        void set_check(bool)
+        void set_force(bool)
+        void set_qv_bits(unsigned)
+        void set_p_model_state_bits(unsigned)
+
+        void run(string, string) except +
+        void reset_counts()
+        Counts get_counts()
+
+__version__ = cpp_version
+
+cdef class Logger:
+    @staticmethod
+    def set_levels_from_options(s):
+        Cpp_Logger.set_levels_from_options(s)
+
+cdef class File:
+    cdef unique_ptr[Cpp_File] thisptr
+
+    def __init__(self, name=None, rw=None):
+        if name is None:
+            self.thisptr.reset(new Cpp_File())
+        elif rw is None:
+            self.thisptr.reset(new Cpp_File(name))
+        else:
+            self.thisptr.reset(new Cpp_File(name, rw))
+
+    def is_open(self):
+        return deref(self.thisptr).is_open()
+    def is_rw(self):
+        return deref(self.thisptr).is_rw()
+    def file_name(self):
+        return deref(self.thisptr).file_name()
+    def open(self, file_name, rw=None):
+        if rw is None:
+            return deref(self.thisptr).open(file_name)
+        else:
+            return deref(self.thisptr).open(file_name, rw)
+    def create(self, file_name, trunc=None):
+        if trunc is None:
+            return deref(self.thisptr).open(file_name)
+        else:
+            return deref(self.thisptr).open(file_name, trunc)
+    def close(self):
+        return deref(self.thisptr).close()
+    @staticmethod
+    def is_valid_file(s):
+        return Cpp_File.is_valid_file(s)
+
+    def file_version(self):
+        return deref(self.thisptr).file_version()
+
+    def have_channel_id_params(self):
+        return deref(self.thisptr).have_channel_id_params()
+    def get_channel_id_params(self):
+        return deref(self.thisptr).get_channel_id_params()
+
+    def have_tracking_id_params(self):
+        return deref(self.thisptr).have_tracking_id_params()
+    def get_tracking_id_params(self):
+        return deref(self.thisptr).get_tracking_id_params()
+
+    def have_sequences_params(self):
+        return deref(self.thisptr).have_sequences_params()
+    def get_sequences_params(self):
+        return deref(self.thisptr).get_sequences_params()
+
+    def get_raw_samples_read_name_list(self):
+        return deref(self.thisptr).get_raw_samples_read_name_list()
+    def have_raw_samples(self, rn=None):
+        if rn is None:
+            return deref(self.thisptr).have_raw_samples()
+        else:
+            return deref(self.thisptr).have_raw_samples(rn)
+    def have_raw_samples_unpack(self, rn):
+        return deref(self.thisptr).have_raw_samples_unpack(rn)
+    def have_raw_samples_pack(self, rn):
+        return deref(self.thisptr).have_raw_samples_pack(rn)
+    def get_raw_samples_params(self, rn=None):
+        if rn is None:
+            return deref(self.thisptr).get_raw_samples_params()
+        else:
+            return deref(self.thisptr).get_raw_samples_params(rn)
+    def get_raw_int_samples(self, rn=None):
+        if rn is None:
+            return deref(self.thisptr).get_raw_int_samples()
+        else:
+            return deref(self.thisptr).get_raw_int_samples(rn)
+    def get_raw_samples(self, rn=None):
+        if rn is None:
+            return deref(self.thisptr).get_raw_samples()
+        else:
+            return deref(self.thisptr).get_raw_samples(rn)
+
+    def get_eventdetection_group_list(self):
+        return deref(self.thisptr).get_eventdetection_group_list()
+    def have_eventdetection_group(self, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_eventdetection_group()
+        else:
+            return deref(self.thisptr).have_eventdetection_group(gr)
+    def get_eventdetection_params(self, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_eventdetection_params()
+        else:
+            return deref(self.thisptr).get_eventdetection_params(gr)
+    def get_eventdetection_read_name_list(self, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_eventdetection_read_name_list()
+        else:
+            return deref(self.thisptr).get_eventdetection_read_name_list(gr)
+    def have_eventdetection_events(self, gr=None, rn=None):
+        if gr is None:
+            return deref(self.thisptr).have_eventdetection_events()
+        elif rn is None:
+            return deref(self.thisptr).have_eventdetection_events(gr)
+        else:
+            return deref(self.thisptr).have_eventdetection_events(gr, rn)
+    def have_eventdetection_events_unpack(self, gr, rn):
+        return deref(self.thisptr).have_eventdetection_events_unpack(gr, rn)
+    def have_eventdetection_events_pack(self, gr, rn):
+        return deref(self.thisptr).have_eventdetection_events_pack(gr, rn)
+    def get_eventdetection_events_params(self, gr=None, rn=None):
+        if gr is None:
+            return deref(self.thisptr).get_eventdetection_events_params()
+        elif rn is None:
+            return deref(self.thisptr).get_eventdetection_events_params(gr)
+        else:
+            return deref(self.thisptr).get_eventdetection_events_params(gr, rn)
+    def get_eventdetection_events(self, gr=None, rn=None):
+        if gr is None:
+            return deref(self.thisptr).get_eventdetection_events()
+        elif rn is None:
+            return deref(self.thisptr).get_eventdetection_events(gr)
+        else:
+            return deref(self.thisptr).get_eventdetection_events(gr, rn)
+
+    def get_basecall_group_list(self):
+        return deref(self.thisptr).get_basecall_group_list()
+    def get_basecall_strand_group_list(self, st):
+        return deref(self.thisptr).get_basecall_strand_group_list(st)
+    def have_basecall_group(self, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_group()
+        else:
+            return deref(self.thisptr).have_basecall_group(gr)
+    def have_basecall_strand_group(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_strand_group(st)
+        else:
+            return deref(self.thisptr).have_basecall_strand_group(st, gr)
+    def get_basecall_group_description(self, gr):
+        return deref(self.thisptr).get_basecall_group_description(gr)
+    def get_basecall_1d_group(self, gr):
+        return deref(self.thisptr).get_basecall_1d_group(gr)
+    def get_basecall_eventdetection_group(self, gr):
+        return deref(self.thisptr).get_basecall_eventdetection_group(gr)
+    def get_basecall_params(self, gr):
+        return deref(self.thisptr).get_basecall_params(gr)
+    def get_basecall_log(self, gr):
+        return deref(self.thisptr).get_basecall_log(gr)
+    def get_basecall_config(self, gr):
+        return deref(self.thisptr).get_basecall_config(gr)
+    def get_basecall_summary(self, gr):
+        return deref(self.thisptr).get_basecall_summary(gr)
+
+    def have_basecall_fastq(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_fastq(st)
+        else:
+            return deref(self.thisptr).have_basecall_fastq(st, gr)
+    def have_basecall_fastq_unpack(self, st, gr):
+        return deref(self.thisptr).have_basecall_fastq_unpack(st, gr)
+    def have_basecall_fastq_pack(self, st, gr):
+        return deref(self.thisptr).have_basecall_fastq_pack(st, gr)
+    def get_basecall_fastq(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_fastq(st)
+        else:
+            return deref(self.thisptr).get_basecall_fastq(st, gr)
+    def have_basecall_seq(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_seq(st)
+        else:
+            return deref(self.thisptr).have_basecall_seq(st, gr)
+    def get_basecall_seq(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_seq(st)
+        else:
+            return deref(self.thisptr).get_basecall_seq(st, gr)
+
+    def have_basecall_model(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_model(st)
+        else:
+            return deref(self.thisptr).have_basecall_model(st, gr)
+    def get_basecall_model_file(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_model_file(st)
+        else:
+            return deref(self.thisptr).get_basecall_model_file(st, gr)
+    def get_basecall_model_params(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_model_params(st)
+        else:
+            return deref(self.thisptr).get_basecall_model_params(st, gr)
+    def get_basecall_model(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_model(st)
+        else:
+            return deref(self.thisptr).get_basecall_model(st, gr)
+
+    def have_basecall_events(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_events(st)
+        else:
+            return deref(self.thisptr).have_basecall_events(st, gr)
+    def have_basecall_events_unpack(self, st, gr):
+        return deref(self.thisptr).have_basecall_events_unpack(st, gr)
+    def have_basecall_events_pack(self, st, gr):
+        return deref(self.thisptr).have_basecall_events_pack(st, gr)
+    def get_basecall_events_params(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_events_params(st)
+        else:
+            return deref(self.thisptr).get_basecall_events_params(st, gr)
+    def get_basecall_events(self, st, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_events(st)
+        else:
+            return deref(self.thisptr).get_basecall_events(st, gr)
+
+    def have_basecall_alignment(self, gr=None):
+        if gr is None:
+            return deref(self.thisptr).have_basecall_alignment()
+        else:
+            return deref(self.thisptr).have_basecall_alignment(gr)
+    def have_basecall_alignment_unpack(self, gr):
+        return deref(self.thisptr).have_basecall_alignment_unpack(gr)
+    def have_basecall_alignment_pack(self, gr):
+        return deref(self.thisptr).have_basecall_alignment_pack(gr)
+    def get_basecall_alignment(self, gr=None):
+        if gr is None:
+            return deref(self.thisptr).get_basecall_alignment()
+        else:
+            return deref(self.thisptr).get_basecall_alignment(gr)
+
+cdef class File_Packer:
+    cdef unique_ptr[Cpp_File_Packer] thisptr
+
+    def __init__(self, a1=None, a2=None, a3=None, a4=None, a5=None):
+        if a1 is None:
+            self.thisptr.reset(new Cpp_File_Packer())
+        elif a2 is None:
+            self.thisptr.reset(new Cpp_File_Packer(a1))
+        else:
+            self.thisptr.reset(new Cpp_File_Packer(a1, a2, a3, a4, a5))
+
+    def set_check(self, _check):
+        deref(self.thisptr).set_check(_check)
+    def set_force(self, _force):
+        deref(self.thisptr).set_force(_force)
+    def set_qv_bits(self, _qv_bits):
+        deref(self.thisptr).set_qv_bits(_qv_bits)
+    def set_p_model_state_bits(self, _p_model_state_bits):
+        deref(self.thisptr).set_p_model_state_bits(_p_model_state_bits)
+
+    def run(self, ifn, ofn):
+        deref(self.thisptr).run(ifn, ofn)
+    def reset_counts(self):
+        deref(self.thisptr).reset_counts()
+    def get_counts(self):
+        return deref(self.thisptr).get_counts()
diff --git a/python/fast5/source/fast5.cpp b/python/fast5/source/fast5.cpp
deleted file mode 100644
index 2e51dd8..0000000
--- a/python/fast5/source/fast5.cpp
+++ /dev/null
@@ -1,182 +0,0 @@
-#include <boost/python.hpp>
-#include <boost/python/suite/indexing/map_indexing_suite.hpp>
-#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
-#include <boost/python/overloads.hpp>
-
-#include "fast5.hpp"
-
-namespace bp = boost::python;
-
-// member functions with default arguments
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_raw_samples_params_overloads, get_raw_samples_params, 0, 1)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_raw_samples_overloads, get_raw_samples, 0, 1)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_read_name_list_overloads, get_eventdetection_read_name_list, 0, 1)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_eventdetection_events_overloads, have_eventdetection_events, 0, 1)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_params_overloads, get_eventdetection_params, 0, 1)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_event_params_overloads, get_eventdetection_event_params, 0, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_eventdetection_events_overloads, get_eventdetection_events, 0, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_fastq_overlords, have_basecall_fastq, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_fastq_overlords, get_basecall_fastq, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_seq_overlords, have_basecall_seq, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_seq_overlords, get_basecall_seq, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_model_overlords, have_basecall_model, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_file_overlords, get_basecall_model_file, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_params_overlords, get_basecall_model_params, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_model_overlords, get_basecall_model, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_events_overlords, have_basecall_events, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_events_overlords, get_basecall_events, 1, 2)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(have_basecall_event_alignment_overlords, have_basecall_event_alignment, 0, 1)
-BOOST_PYTHON_MEMBER_FUNCTION_OVERLOADS(get_basecall_event_alignment_overlords, get_basecall_event_alignment, 0, 1)
-
-BOOST_PYTHON_MODULE(fast5)
-{
-    bp::class_<fast5::Channel_Id_Parameters>("Channel_Id_Parameters")
-        .def_readwrite("channel_number", &fast5::Channel_Id_Parameters::channel_number)
-        .def_readwrite("digitisation", &fast5::Channel_Id_Parameters::digitisation)
-        .def_readwrite("offset", &fast5::Channel_Id_Parameters::offset)
-        .def_readwrite("range", &fast5::Channel_Id_Parameters::range)
-        .def_readwrite("sampling_rate", &fast5::Channel_Id_Parameters::sampling_rate)
-        ;
-    bp::class_<fast5::Raw_Samples_Parameters>("Raw_Samples_Parameters")
-        .def_readwrite("read_id", &fast5::Raw_Samples_Parameters::read_id)
-        .def_readwrite("read_number", &fast5::Raw_Samples_Parameters::read_number)
-        .def_readwrite("start_mux", &fast5::Raw_Samples_Parameters::start_mux)
-        .def_readwrite("start_time", &fast5::Raw_Samples_Parameters::start_time)
-        .def_readwrite("duration", &fast5::Raw_Samples_Parameters::duration)
-        ;;
-    bp::class_<fast5::EventDetection_Event_Parameters>("EventDetection_Event_Parameters")
-        .def_readwrite("read_id", &fast5::EventDetection_Event_Parameters::read_id)
-        .def_readwrite("read_number", &fast5::EventDetection_Event_Parameters::read_number)
-        .def_readwrite("scaling_used", &fast5::EventDetection_Event_Parameters::scaling_used)
-        .def_readwrite("start_mux", &fast5::EventDetection_Event_Parameters::start_mux)
-        .def_readwrite("start_time", &fast5::EventDetection_Event_Parameters::start_time)
-        .def_readwrite("duration", &fast5::EventDetection_Event_Parameters::duration)
-        .def_readwrite("median_before", &fast5::EventDetection_Event_Parameters::median_before)
-        .def_readwrite("abasic_found", &fast5::EventDetection_Event_Parameters::abasic_found)
-        ;
-    bp::class_<fast5::EventDetection_Event_Entry>("EventDetection_Event_Entry")
-        .def_readwrite("mean", &fast5::EventDetection_Event_Entry::mean)
-        .def_readwrite("stdv", &fast5::EventDetection_Event_Entry::stdv)
-        .def_readwrite("start", &fast5::EventDetection_Event_Entry::start)
-        .def_readwrite("length", &fast5::EventDetection_Event_Entry::length)
-        ;
-    bp::class_<fast5::Model_Entry>("Model_Entry")
-        .def_readwrite("variant", &fast5::Model_Entry::variant)
-        .def_readwrite("level_mean", &fast5::Model_Entry::level_mean)
-        .def_readwrite("level_stdv", &fast5::Model_Entry::level_stdv)
-        .def_readwrite("sd_mean", &fast5::Model_Entry::sd_mean)
-        .def_readwrite("sd_stdv", &fast5::Model_Entry::sd_stdv)
-        .def_readwrite("weight", &fast5::Model_Entry::weight)
-        .def_readwrite("kmer", &fast5::Model_Entry::kmer)
-        ;
-    bp::class_<fast5::Model_Parameters>("Model_Parameters")
-        .def_readwrite("scale", &fast5::Model_Parameters::scale)
-        .def_readwrite("shift", &fast5::Model_Parameters::shift)
-        .def_readwrite("drift", &fast5::Model_Parameters::drift)
-        .def_readwrite("var", &fast5::Model_Parameters::var)
-        .def_readwrite("scale_sd", &fast5::Model_Parameters::scale_sd)
-        .def_readwrite("var_sd", &fast5::Model_Parameters::var_sd)
-        ;
-    bp::class_<fast5::Event_Entry>("Event_Entry")
-        .def_readwrite("mean", &fast5::Event_Entry::mean)
-        .def_readwrite("stdv", &fast5::Event_Entry::stdv)
-        .def_readwrite("start", &fast5::Event_Entry::start)
-        .def_readwrite("length", &fast5::Event_Entry::length)
-        .def_readwrite("p_model_state", &fast5::Event_Entry::p_model_state)
-        .def_readwrite("p_mp_state", &fast5::Event_Entry::p_mp_state)
-        .def_readwrite("p_A", &fast5::Event_Entry::p_A)
-        .def_readwrite("p_C", &fast5::Event_Entry::p_C)
-        .def_readwrite("p_G", &fast5::Event_Entry::p_G)
-        .def_readwrite("p_T", &fast5::Event_Entry::p_T)
-        .def_readwrite("move", &fast5::Event_Entry::move)
-        .def_readwrite("model_state", &fast5::Event_Entry::model_state)
-        .def_readwrite("mp_state", &fast5::Event_Entry::mp_state)
-        ;;
-    bp::class_<fast5::Event_Alignment_Entry>("Event_Alignment_Entry")
-        .def_readwrite("template_index", &fast5::Event_Alignment_Entry::template_index)
-        .def_readwrite("complement_index", &fast5::Event_Alignment_Entry::complement_index)
-        .def("get_kmer", &fast5::Event_Alignment_Entry::get_kmer)
-        ;;
-
-    bp::class_<std::map<std::string, std::string>>("Map_Str_Str")
-        .def(bp::map_indexing_suite<std::map<std::string, std::string>>())
-        ;
-    bp::class_<std::vector<std::string>>("Vec_Str")
-        .def(bp::vector_indexing_suite<std::vector<std::string>>())
-        ;
-    bp::class_<std::vector<fast5::Raw_Samples_Entry>>("Vec_Raw_Samples_Entry")
-        .def(bp::vector_indexing_suite<std::vector<fast5::Raw_Samples_Entry>>())
-        ;
-    bp::class_<std::vector<fast5::EventDetection_Event_Entry>>("Vec_EventDetection_Event_Entry")
-        .def(bp::vector_indexing_suite<std::vector<fast5::EventDetection_Event_Entry>>())
-        ;
-    bp::class_<std::vector<fast5::Model_Entry>>("Vec_Model_Entry")
-        .def(bp::vector_indexing_suite<std::vector<fast5::Model_Entry>>())
-        ;
-    bp::class_<std::vector<fast5::Event_Entry>>("Vec_Event_Entry")
-        .def(bp::vector_indexing_suite<std::vector<fast5::Event_Entry>>())
-        ;
-    bp::class_<std::vector<fast5::Event_Alignment_Entry>>("Vec_Event_Alignment_Entry")
-        .def(bp::vector_indexing_suite<std::vector<fast5::Event_Alignment_Entry>>())
-        ;
-
-    bp::class_<fast5::File, boost::noncopyable>("File")
-        .def(bp::init<std::string, bp::optional<bool>>())
-        .def("is_open", &fast5::File::is_open)
-        .def("is_rw", &fast5::File::is_rw)
-        .def("file_name", &fast5::File::file_name, bp::return_value_policy<bp::copy_const_reference>())
-        .def("open", &fast5::File::open)
-        .def("create", &fast5::File::create)
-        .def("close", &fast5::File::close)
-        .def("is_valid_file", &hdf5_tools::File::is_valid_file).staticmethod("is_valid_file")
-        .def("get_object_count", &hdf5_tools::File::get_object_count).staticmethod("get_object_count")
-        //
-        .def("file_version", &fast5::File::file_version)
-        //
-        .def("have_channel_id_params", &fast5::File::have_channel_id_params)
-        .def("get_channel_id_params", &fast5::File::get_channel_id_params)
-        //
-        .def("have_sampling_rate", &fast5::File::have_sampling_rate)
-        .def("get_sampling_rate", &fast5::File::get_sampling_rate)
-        //
-        .def("have_tracking_id_params", &fast5::File::have_tracking_id_params)
-        .def("get_tracking_id_params", &fast5::File::get_tracking_id_params)
-        //
-        .def("have_sequences_params", &fast5::File::have_sequences_params)
-        .def("get_sequences_params", &fast5::File::get_sequences_params)
-        //
-        .def("get_raw_samples_read_name_list", &fast5::File::get_raw_samples_read_name_list, bp::return_value_policy<bp::copy_const_reference>())
-        .def("have_raw_samples", &fast5::File::have_raw_samples)
-        .def("get_raw_samples_params", &fast5::File::get_raw_samples_params, get_raw_samples_params_overloads())
-        .def("get_raw_samples", &fast5::File::get_raw_samples, get_raw_samples_overloads())
-        //
-        .def("get_eventdetection_group_list", &fast5::File::get_eventdetection_group_list, bp::return_value_policy<bp::copy_const_reference>())
-        .def("have_eventdetection_groups", &fast5::File::have_eventdetection_groups)
-        .def("get_eventdetection_read_name_list", &fast5::File::get_eventdetection_read_name_list, get_eventdetection_read_name_list_overloads())
-        .def("have_eventdetection_events", &fast5::File::have_eventdetection_events, have_eventdetection_events_overloads())
-        .def("get_eventdetection_params", &fast5::File::get_eventdetection_params, get_eventdetection_params_overloads())
-        .def("get_eventdetection_event_params", &fast5::File::get_eventdetection_event_params, get_eventdetection_event_params_overloads())
-        .def("get_eventdetection_events", &fast5::File::get_eventdetection_events, get_eventdetection_events_overloads())
-        //
-        .def("get_basecall_group_list", &fast5::File::get_basecall_group_list, bp::return_value_policy<bp::copy_const_reference>())
-        .def("have_basecall_groups", &fast5::File::have_basecall_groups)
-        .def("get_basecall_strand_group_list", &fast5::File::get_basecall_strand_group_list, bp::return_value_policy<bp::copy_const_reference>())
-        .def("have_basecall_strand_groups", &fast5::File::have_basecall_strand_groups)
-        .def("have_basecall_log", &fast5::File::have_basecall_log)
-        .def("get_basecall_log", &fast5::File::get_basecall_log)
-        .def("have_basecall_fastq", &fast5::File::have_basecall_fastq, have_basecall_fastq_overlords())
-        .def("get_basecall_fastq", &fast5::File::get_basecall_fastq, get_basecall_fastq_overlords())
-        .def("add_basecall_fastq", &fast5::File::add_basecall_fastq)
-        .def("have_basecall_seq", &fast5::File::have_basecall_seq, have_basecall_seq_overlords())
-        .def("get_basecall_seq", &fast5::File::get_basecall_seq, get_basecall_seq_overlords())
-        .def("add_basecall_seq", &fast5::File::add_basecall_seq)
-        .def("have_basecall_model", &fast5::File::have_basecall_model, have_basecall_model_overlords())
-        .def("get_basecall_model_file", &fast5::File::get_basecall_model_file, get_basecall_model_file_overlords())
-        .def("get_basecall_model_params", &fast5::File::get_basecall_model_params, get_basecall_model_params_overlords())
-        .def("get_basecall_model", &fast5::File::get_basecall_model, get_basecall_model_overlords())
-        .def("have_basecall_events", &fast5::File::have_basecall_events, have_basecall_events_overlords())
-        .def("get_basecall_events", &fast5::File::get_basecall_events, get_basecall_events_overlords())
-        .def("have_basecall_event_alignment", &fast5::File::have_basecall_event_alignment, have_basecall_event_alignment_overlords())
-        .def("get_basecall_event_alignment", &fast5::File::get_basecall_event_alignment, get_basecall_event_alignment_overlords())
-        ;
-}
diff --git a/python/fast5/version.py b/python/fast5/version.py
deleted file mode 100644
index eaddd12..0000000
--- a/python/fast5/version.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = '0.5.9'
diff --git a/python/setup.py b/python/setup.py
old mode 100644
new mode 100755
index 487dede..f969514
--- a/python/setup.py
+++ b/python/setup.py
@@ -1,16 +1,18 @@
-"""
-fast5.setup.py
-(c) 2016: Matei David, Ontario Institute for Cancer Research
-MIT License
-"""
+#!/usr/bin/env python
+
+#
+# Part of: https://github.com/mateidavid/fast5
+#
+# (c) 2017: Matei David, Ontario Institute for Cancer Research
+# MIT License
+#
 
 import os
-import re
-import pkg_resources
 import sys
+
 from setuptools import setup, Extension
 
-exec(open('fast5/version.py').read())
+use_cython = True #os.environ.get('USE_CYTHON', '') != ''
 
 # check HDF5 include and lib dirs
 hdf5_dir = os.environ.get('HDF5_DIR', '/usr')
@@ -23,18 +25,9 @@ if (not os.path.isfile(os.path.join(hdf5_lib_dir, 'lib' + hdf5_lib + '.so'))
     and not os.path.isfile(os.path.join(hdf5_lib_dir, 'lib' + hdf5_lib + '.a'))):
     sys.exit(hdf5_lib_dir + ': could not find HDF5 library file; use HDF5_DIR or HDF5_LIB_DIR/HDF5_LIB')
 
-# check Boost.Python include and lib dirs
-boost_dir = os.environ.get('BOOST_DIR', '/usr')
-boost_include_dir = os.environ.get('BOOST_INCLUDE_DIR', os.path.join(boost_dir, 'include'))
-boost_lib_dir = os.environ.get('BOOST_LIB_DIR', os.path.join(boost_dir, 'lib'))
-boost_python_lib = os.environ.get('BOOST_PYTHON_LIB', 'boost_python')
-if not os.path.isfile(os.path.join(boost_include_dir, 'boost', 'python.hpp')):
-    sys.exit(boost_include_dir + ': could not find Boost Python header files; use BOOST_DIR or BOOST_INCLUDE_DIR')
-if (not os.path.isfile(os.path.join(boost_lib_dir, 'lib' + boost_python_lib + '.so'))
-    and not os.path.isfile(os.path.join(boost_lib_dir, 'lib' + boost_python_lib + '.a'))):
-    sys.exit(boost_lib_dir + ': could not find Boost Python library file; use BOOST_DIR or BOOST_LIB_DIR/BOOST_PYTHON_LIB')
-
-fast5_dir = os.environ.get('FAST5_DIR', os.path.join('..', 'src'))
+fast5_dir = os.environ.get('FAST5_DIR', '..')
+fast5_src_dir = os.path.join(fast5_dir, 'src')
+fast5_version = open(os.path.join(fast5_dir, 'VERSION')).readline().strip()
 
 extra_compile_args = [
     '-std=c++11',
@@ -42,57 +35,46 @@ extra_compile_args = [
 ]
 # don't indiscriminately add /usr/include to work around bug:
 # https://lists.fedoraproject.org/archives/list/devel@lists.fedoraproject.org/thread/Q5SWCUUMWQ4EMS7CU2CBOZHV3WZYOOTT/
-for d in [hdf5_include_dir, boost_include_dir]:
+for d in [hdf5_include_dir]:
     if d != '/usr/include':
         extra_compile_args += ['-isystem', d]
-
 #extra_compile_args += ['-O0', '-g3', '-ggdb', '-fno-eliminate-unused-debug-types', '-v']
+
 extra_link_args = []
 #extra_link_args += ['-v']
 
+#if sys.platform == 'darwin':
+#    extra_compile_args.append('-mmacosx-version-min=10.7')
+
 extensions = [
     Extension(
-        'fast5.fast5',
-        include_dirs=[
-            fast5_dir,
-        ],
-        sources=[
-            os.path.join('fast5', 'source', 'fast5.cpp'),
-        ],
-        depends=[
-            os.path.join(fast5_dir, fn)
-            for fn in ['fast5.hpp', 'hdf5_tools.hpp']
-        ],
+        'fast5',
+        language='c++',
+        sources=['fast5/fast5.' + ['cpp', 'pyx'][use_cython]],
+        include_dirs=[fast5_src_dir],
+        library_dirs=[hdf5_lib_dir],
+        runtime_library_dirs=[hdf5_lib_dir],
+        libraries=[hdf5_lib],
         extra_compile_args=extra_compile_args,
         extra_link_args=extra_link_args,
-        library_dirs=[
-            hdf5_lib_dir,
-            boost_lib_dir,
-        ],
-        runtime_library_dirs=[
-            hdf5_lib_dir,
-            boost_lib_dir,
-        ],
-        libraries=[
-            hdf5_lib,
-            boost_python_lib,
-        ],
     ),
 ]
 
+if use_cython:
+    from Cython.Build import cythonize
+    extensions = cythonize(extensions)
+
 setup(
     name='fast5',
     description='Fast5 file interface.',
-    version=__version__,
-    #long_description=open('README').read(),
+    version=fast5_version,
     author='Matei David, Ontario Institute for Cancer Research',
     author_email='matei.david at oicr.on.ca',
     license='MIT',
     url='https://github.com/mateidavid/fast5',
-    packages=['fast5'],
-    exclude_package_data={
-        '': ['*.c', '*.cpp', '*.h', '*.hpp'],
-    },
     ext_modules=extensions,
-    scripts=[],
+    scripts=[
+        os.path.join('bin', 'f5ls'),
+        os.path.join('bin', 'f5pack'),
+    ],
 )
diff --git a/src/.fast5_version.hpp.in b/src/.fast5_version.hpp.in
new file mode 100644
index 0000000..1d485dc
--- /dev/null
+++ b/src/.fast5_version.hpp.in
@@ -0,0 +1,16 @@
+#ifndef __FAST5_VERSION_HPP
+#define __FAST5_VERSION_HPP
+
+namespace fast5
+{
+
+namespace
+{
+
+static char const * const version = "${VERSION}";
+
+}
+
+}
+
+#endif
diff --git a/src/.gitignore b/src/.gitignore
index 70906a8..1126707 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -3,3 +3,4 @@ f5ls-full
 hdf5-mod
 f5-mod
 f5dump
+f5pack
diff --git a/src/Bit_Packer.hpp b/src/Bit_Packer.hpp
new file mode 100644
index 0000000..eb92f49
--- /dev/null
+++ b/src/Bit_Packer.hpp
@@ -0,0 +1,152 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
+#ifndef __BIT_PACKER_HPP
+#define __BIT_PACKER_HPP
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <map>
+#include <limits>
+#include <stdexcept>
+#include <cassert>
+
+#include "logger.hpp"
+
+namespace fast5
+{
+
+class Bit_Packer
+{
+public:
+    typedef std::vector< std::uint8_t > Code_Type;
+    typedef std::map< std::string, std::string > Code_Params_Type;
+
+    template < typename Int_Type >
+    std::pair< Code_Type, Code_Params_Type >
+    encode(std::vector< Int_Type > const & v, unsigned num_bits) const
+    {
+        Code_Type res;
+        Code_Params_Type res_params;
+        res_params["packer"] = "bit_packer";
+        num_bits = std::min(num_bits, (unsigned)sizeof(Int_Type) * 8);
+        std::ostringstream oss;
+        oss << num_bits;
+        res_params["num_bits"] = oss.str();
+        oss.str("");
+        oss << v.size();
+        res_params["size"] = oss.str();
+        long long unsigned buff = 0;
+        unsigned buff_len = 0;
+        auto val_mask = (1llu << num_bits) - 1;
+        for (unsigned i = 0; i < v.size(); ++i)
+        {
+            // flush out buff
+            while (buff_len >= 8)
+            {
+                res.push_back(buff & 0xFF);
+                buff >>= 8;
+                buff_len -= 8;
+            }
+            assert(buff_len < 8);
+            long long unsigned x = v[i];
+            if (buff_len + num_bits <= 64)
+            {
+                buff |= (x & val_mask) << buff_len;
+                buff_len += num_bits;
+            }
+            else
+            {
+                assert(num_bits > 56);
+                buff |= (x & 0xFF) << buff_len;
+                res.push_back(buff & 0xFF);
+                buff >>= 8;
+                x >>= 8;
+                buff |= (x & (val_mask >> 8)) << buff_len;
+                buff_len += num_bits - 8;
+            }
+        }
+        while (buff_len >= 8)
+        {
+            res.push_back(buff & 0xFF);
+            buff >>= 8;
+            buff_len -= 8;
+        }
+        if (buff_len > 0)
+        {
+            res.push_back(buff & 0xFF);
+        }
+        return std::make_pair(std::move(res), std::move(res_params));
+    } // encode()
+
+    template < typename Int_Type >
+    std::vector< Int_Type >
+    decode(Code_Type const & v, Code_Params_Type const & v_params) const
+    {
+        std::vector< Int_Type > res;
+        unsigned num_bits;
+        size_t sz;
+        std::istringstream(v_params.at("num_bits")) >> num_bits;
+        std::istringstream(v_params.at("size")) >> sz;
+        if (v.size() != (sz * num_bits) / 8 + ((sz * num_bits) % 8 > 0? 1 : 0))
+        {
+            LOG_THROW
+                << "incorrect size: v_size=" << v.size();
+        }
+        long long unsigned buff = 0;
+        unsigned buff_len = 0;
+        unsigned j = 0;
+        auto val_mask = (1llu << num_bits) - 1;
+        for (unsigned i = 0; i < sz; ++i)
+        {
+            while (j < v.size() and buff_len <= 64 - 8)
+            {
+                buff |= ((long long unsigned)v.at(j) << buff_len);
+                ++j;
+                buff_len += 8;
+            }
+            long long unsigned x;
+            if (buff_len >= num_bits)
+            {
+                x = buff & val_mask;
+                buff >>= num_bits;
+                buff_len -= num_bits;
+            }
+            else
+            {
+                // 56 < buff_len < num_bits
+                x = buff & 0xFF;
+                buff >>= 8;
+                buff_len -= 8;
+                buff |= (v.at(j) << buff_len);
+                ++j;
+                buff_len += 8;
+                x |= ((buff & (val_mask >> 8)) << 8);
+                buff >>= (num_bits - 8);
+                buff_len -= num_bits - 8;
+            }
+            res.push_back(x);
+        }
+        return res;
+    } // decode()
+
+    //
+    // static packer access
+    //
+    static Bit_Packer const &
+    get_packer()
+    {
+        static Bit_Packer _packer;
+        return _packer;
+    }
+}; // class Bit_Packer
+
+} // namespace fast5
+
+#endif
diff --git a/src/File_Packer.hpp b/src/File_Packer.hpp
new file mode 100644
index 0000000..990cc38
--- /dev/null
+++ b/src/File_Packer.hpp
@@ -0,0 +1,982 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
+#ifndef __FILE_PACKER_HPP
+#define __FILE_PACKER_HPP
+
+#include <string>
+#include <set>
+
+#include "fast5.hpp"
+#include "logger.hpp"
+
+#define STATIC_MEMBER_WRAPPER(_type, _id, _init) \
+    static _type & _id() { static _type _ ## _id = _init; return _ ## _id; }
+
+namespace fast5
+{
+
+class File_Packer
+{
+public:
+    struct Counts
+    {
+        //
+        size_t rs_count;
+        size_t rs_bits;
+        //
+        size_t ed_count;
+        size_t ed_skip_bits;
+        size_t ed_len_bits;
+        //
+        size_t fq_count;
+        size_t bp_seq_count;
+        size_t fq_bp_bits;
+        size_t fq_qv_bits;
+        //
+        size_t ev_count;
+        size_t ev_rel_skip_bits;
+        size_t ev_skip_bits;
+        size_t ev_len_bits;
+        size_t ev_move_bits;
+        size_t ev_p_model_state_bits;
+        //
+        size_t al_count;
+        size_t al_template_step_bits;
+        size_t al_complement_step_bits;
+        size_t al_move_bits;
+        //
+        double rs_total_duration;
+        double rs_called_duration;
+
+        Counts() :
+            //
+            rs_count(0),
+            rs_bits(0),
+            //
+            ed_count(0),
+            ed_skip_bits(0),
+            ed_len_bits(0),
+            //
+            fq_count(0),
+            bp_seq_count(0),
+            fq_bp_bits(0),
+            fq_qv_bits(0),
+            //
+            ev_count(0),
+            ev_rel_skip_bits(0),
+            ev_skip_bits(0),
+            ev_len_bits(0),
+            ev_move_bits(0),
+            ev_p_model_state_bits(0),
+            //
+            al_count(0),
+            al_template_step_bits(0),
+            al_complement_step_bits(0),
+            al_move_bits(0),
+            //
+            rs_total_duration(0.0),
+            rs_called_duration(0.0)
+        {}
+        Counts & operator += (Counts const & other)
+        {
+            //
+            rs_count += other.rs_count;
+            rs_bits += other.rs_bits;
+            //
+            ed_count += other.ed_count;
+            ed_skip_bits += other.ed_skip_bits;
+            ed_len_bits += other.ed_len_bits;
+            //
+            fq_count += other.fq_count;
+            bp_seq_count += other.bp_seq_count;
+            fq_bp_bits += other.fq_bp_bits;
+            fq_qv_bits += other.fq_qv_bits;
+            //
+            ev_count += other.ev_count;
+            ev_rel_skip_bits += other.ev_rel_skip_bits;
+            ev_skip_bits += other.ev_skip_bits;
+            ev_len_bits += other.ev_len_bits;
+            ev_move_bits += other.ev_move_bits;
+            ev_p_model_state_bits += other.ev_p_model_state_bits;
+            //
+            al_count += other.al_count;
+            al_template_step_bits += other.al_template_step_bits;
+            al_complement_step_bits += other.al_complement_step_bits;
+            al_move_bits += other.al_move_bits;
+            //
+            rs_total_duration += other.rs_total_duration;
+            rs_called_duration += other.rs_called_duration;
+            return *this;
+        }
+    };
+
+    File_Packer() :
+        File_Packer(1)
+    {}
+
+    File_Packer(int _policy) :
+        File_Packer(_policy, _policy, _policy, _policy, _policy)
+    {}
+
+    File_Packer(int _rw_policy, int _ed_policy, int _fq_policy, int _ev_policy, int _al_policy) :
+        rw_policy(_rw_policy),
+        ed_policy(_ed_policy),
+        fq_policy(_fq_policy),
+        ev_policy(_ev_policy),
+        al_policy(_al_policy),
+        check(true),
+        force(false),
+        qv_bits(max_qv_bits()),
+        p_model_state_bits(default_p_model_state_bits())
+    {}
+
+    void set_check(bool _check) { check = _check; }
+    void set_force(bool _force) { force = _force; }
+    void set_qv_bits(unsigned _qv_bits) { qv_bits = _qv_bits; }
+    void set_p_model_state_bits(unsigned _p_model_state_bits) { p_model_state_bits = _p_model_state_bits; }
+
+    STATIC_MEMBER_WRAPPER(unsigned const, max_qv_bits, 5)
+    STATIC_MEMBER_WRAPPER(unsigned const, max_qv_mask, ((unsigned)1 << max_qv_bits()) - 1)
+    STATIC_MEMBER_WRAPPER(unsigned const, default_p_model_state_bits, 2)
+
+    void
+    run(std::string const & ifn, std::string const & ofn) const
+    {
+        File src_f;
+        File dst_f;
+        Counts cnt;
+        try
+        {
+            // open files
+            src_f.open(ifn);
+            dst_f.create(ofn, force);
+            assert(src_f.is_open());
+            assert(dst_f.is_open());
+            assert(dst_f.is_rw());
+            // copy attributes under / and /UniqueGlobalKey
+            copy_attributes(src_f, dst_f, "", false);
+            copy_attributes(src_f, dst_f, "/UniqueGlobalKey", true);
+            std::set< std::string > bc_gr_s;
+            // process raw samples
+            if (rw_policy == 1)
+            {
+                pack_rw(src_f, dst_f, cnt);
+            }
+            else if (rw_policy == 2)
+            {
+                unpack_rw(src_f, dst_f);
+            }
+            else if (rw_policy == 3)
+            {
+                copy_rw(src_f, dst_f);
+            }
+            // process eventdetection events
+            if (ed_policy == 1)
+            {
+                pack_ed(src_f, dst_f, cnt);
+            }
+            else if (ed_policy == 2)
+            {
+                unpack_ed(src_f, dst_f);
+            }
+            else if (ed_policy == 3)
+            {
+                copy_ed(src_f, dst_f);
+            }
+            // process basecall fastq
+            if (fq_policy == 1)
+            {
+                pack_fq(src_f, dst_f, bc_gr_s, cnt);
+            }
+            else if (fq_policy == 2)
+            {
+                unpack_fq(src_f, dst_f, bc_gr_s);
+            }
+            else if (fq_policy == 3)
+            {
+                copy_fq(src_f, dst_f, bc_gr_s);
+            }
+            // process basecall events
+            if (ev_policy == 1)
+            {
+                pack_ev(src_f, dst_f, bc_gr_s, cnt);
+            }
+            else if (ev_policy == 2)
+            {
+                unpack_ev(src_f, dst_f, bc_gr_s);
+            }
+            else if (ev_policy == 3)
+            {
+                copy_ev(src_f, dst_f, bc_gr_s);
+            }
+            // process basecall alignments
+            if (al_policy == 1)
+            {
+                pack_al(src_f, dst_f, bc_gr_s, cnt);
+            }
+            else if (al_policy == 2)
+            {
+                unpack_al(src_f, dst_f, bc_gr_s);
+            }
+            else if (al_policy == 3)
+            {
+                copy_al(src_f, dst_f, bc_gr_s);
+            }
+            // copy basecall params
+            copy_basecall_params(src_f, dst_f, bc_gr_s);
+            // close files
+            src_f.close();
+            dst_f.close();
+        }
+        catch (hdf5_tools::Exception & e)
+        {
+            std::ostringstream oss;
+            oss << ifn << ": HDF5 error: " << e.what();
+            throw std::runtime_error(oss.str());
+        }
+        counts += cnt;
+    } // run()
+
+    void reset_counts() const
+    {
+        counts = Counts();
+    }
+
+    Counts const & get_counts() const
+    {
+        return counts;
+    }
+private:
+    int rw_policy;
+    int ed_policy;
+    int fq_policy;
+    int ev_policy;
+    int al_policy;
+    bool check;
+    bool force;
+    unsigned qv_bits;
+    unsigned p_model_state_bits;
+    mutable Counts counts;
+
+    void
+    pack_rw(File const & src_f, File & dst_f, Counts & cnt) const
+    {
+        auto rn_l = src_f.get_raw_samples_read_name_list();
+        for (auto const & rn : rn_l)
+        {
+            if (src_f.have_raw_samples_pack(rn))
+            {
+                auto rs_pack = src_f.get_raw_samples_pack(rn);
+                dst_f.add_raw_samples(rn, rs_pack);
+            }
+            else if (src_f.have_raw_samples_unpack(rn))
+            {
+                auto rsi_ds = src_f.get_raw_int_samples_dataset(rn);
+                auto & rsi = rsi_ds.first;
+                auto & rs_params = rsi_ds.second;
+                auto rs_pack = src_f.pack_rw(rsi_ds);
+                dst_f.add_raw_samples(rn, rs_pack);
+                if (check)
+                {
+                    auto rsi_ds_unpack = dst_f.get_raw_int_samples_dataset(rn);
+                    auto & rsi_unpack = rsi_ds_unpack.first;
+                    auto & rs_params_unpack = rsi_ds_unpack.second;
+                    if (not (rs_params_unpack == rs_params))
+                    {
+                        LOG_THROW
+                            << "check failed: rs_params_unpack!=rs_params";
+                    }
+                    if (rsi_unpack.size() != rsi.size())
+                    {
+                        LOG_THROW
+                            << "check failed: rs_unpack.size=" << rsi_unpack.size()
+                            << " rs_orig.size=" << rsi.size();
+
+                    }
+                    for (unsigned i = 0; i < rsi_unpack.size(); ++i)
+                    {
+                        if (rsi_unpack[i] != rsi[i])
+                        {
+                            LOG_THROW
+                                << "check failed: i=" << i
+                                << " rs_unpack=" << rsi_unpack[i]
+                                << " rs_orig=" << rsi[i];
+                        }
+                    }
+                }
+                cnt.rs_count += rsi.size();
+                cnt.rs_bits += rs_pack.signal.size() * sizeof(rs_pack.signal[0]) * 8;
+                if (cnt.rs_total_duration == 0.0)
+                {
+                    auto cid_params = src_f.get_channel_id_params();
+                    cnt.rs_total_duration = src_f.time_to_float(rs_params.duration, cid_params);
+                }
+                LOG(info)
+                    << "rn=" << rn
+                    << " rs_size=" << rsi.size()
+                    << " signal_bits=" << rs_pack.signal_params.at("avg_bits")
+                    << std::endl;
+            }
+        }
+    } // pack_rw()
+
+    void
+    unpack_rw(File const & src_f, File & dst_f) const
+    {
+        auto rn_l = src_f.get_raw_samples_read_name_list();
+        for (auto const & rn : rn_l)
+        {
+            auto rsi_ds = src_f.get_raw_int_samples_dataset(rn);
+            dst_f.add_raw_samples_dataset(rn, rsi_ds);
+        }
+    } // unpack_rw()
+
+    void
+    copy_rw(File const & src_f, File & dst_f) const
+    {
+        auto rn_l = src_f.get_raw_samples_read_name_list();
+        for (auto const & rn : rn_l)
+        {
+            if (src_f.have_raw_samples_unpack(rn))
+            {
+                auto rsi_ds = src_f.get_raw_int_samples_dataset(rn);
+                dst_f.add_raw_samples_dataset(rn, rsi_ds);
+            }
+            else if (src_f.have_raw_samples_pack(rn))
+            {
+                auto rs_pack = src_f.get_raw_samples_pack(rn);
+                dst_f.add_raw_samples(rn, rs_pack);
+            }
+        }
+    } // copy_rw()
+
+    void
+    pack_ed(File const & src_f, File & dst_f, Counts & cnt) const
+    {
+        auto gr_l = src_f.get_eventdetection_group_list();
+        for (auto const & gr : gr_l)
+        {
+            auto rn_l = src_f.get_eventdetection_read_name_list(gr);
+            for (auto const & rn : rn_l)
+            {
+                auto ed_params = src_f.get_eventdetection_params(gr);
+                dst_f.add_eventdetection_params(gr, ed_params);
+                if (src_f.have_eventdetection_events_pack(gr, rn))
+                {
+                    auto ede_pack = src_f.get_eventdetection_events_pack(gr, rn);
+                    dst_f.add_eventdetection_events(gr, rn, ede_pack);
+                }
+                else if (src_f.have_eventdetection_events(gr, rn))
+                {
+                    auto ede_ds = src_f.get_eventdetection_events_dataset(gr, rn);
+                    auto & ede = ede_ds.first;
+                    auto & ede_params = ede_ds.second;
+                    auto ede_pack = src_f.pack_ed(ede_ds);
+                    dst_f.add_eventdetection_events(gr, rn, ede_pack);
+                    if (check)
+                    {
+                        decltype(ede_ds) ede_ds_unpack;
+                        try
+                        {
+                            ede_ds_unpack = dst_f.get_eventdetection_events_dataset(gr, rn);
+                        }
+                        catch (std::logic_error & e)
+                        {
+                            LOG_THROW
+                                << "check failed: " << e.what();
+                        }
+                        auto & ede_unpack = ede_ds_unpack.first;
+                        auto & ede_params_unpack = ede_ds_unpack.second;
+                        if (not (ede_params_unpack == ede_params))
+                        {
+                            LOG_THROW
+                                << "check failed: ede_params_unpack!=ede_params";
+                        }
+                        if (ede_unpack.size() != ede.size())
+                        {
+                            LOG_THROW
+                                << "check failed: gr=" << gr
+                                << " ede_unpack.size=" << ede_unpack.size()
+                                << " ede_orig.size=" << ede.size();
+                        }
+                        for (unsigned i = 0; i + 1 < ede_unpack.size(); ++i) // skip last event
+                        {
+                            LOG(debug1)
+                                << "gr=" << gr
+                                << " i=" << i
+                                << " ede_unpack=(" << ede_unpack[i].start
+                                << "," << ede_unpack[i].length
+                                << "," << ede_unpack[i].mean
+                                << "," << ede_unpack[i].stdv
+                                << ") ed_orig=(" << ede[i].start
+                                << "," << ede[i].length
+                                << "," << ede[i].mean
+                                << "," << ede[i].stdv
+                                << ")" << std::endl;
+                            if (ede_unpack[i].start != ede[i].start
+                                or ede_unpack[i].length != ede[i].length
+                                or abs(ede_unpack[i].mean - ede[i].mean) > .1
+                                or abs(ede_unpack[i].stdv - ede[i].stdv) > .1)
+                            {
+                                LOG_THROW
+                                    << "check failed: gr=" << gr
+                                    << " i=" << i
+                                    << " ede_unpack=(" << ede_unpack[i].start
+                                    << "," << ede_unpack[i].length
+                                    << "," << ede_unpack[i].mean
+                                    << "," << ede_unpack[i].stdv
+                                    << ") ed_orig=(" << ede[i].start
+                                    << "," << ede[i].length
+                                    << "," << ede[i].mean
+                                    << "," << ede[i].stdv
+                                    << ")";
+                            }
+                        }
+                    } // if check
+                    cnt.ed_count += ede.size();
+                    cnt.ed_skip_bits += ede_pack.skip.size() * sizeof(ede_pack.skip[0]) * 8;
+                    cnt.ed_len_bits += ede_pack.len.size() * sizeof(ede_pack.len[0]) * 8;
+                    LOG(info)
+                        << "gr=" << gr
+                        << " rn=" << rn
+                        << " ed_size=" << ede.size()
+                        << " skip_bits=" << ede_pack.skip_params.at("avg_bits")
+                        << " len_bits=" << ede_pack.len_params.at("avg_bits")
+                        << std::endl;
+                }
+            } // for rn
+        } // for gr
+    } // pack_ed()
+
+    void
+    unpack_ed(File const & src_f, File & dst_f) const
+    {
+        auto gr_l = src_f.get_eventdetection_group_list();
+        for (auto const & gr : gr_l)
+        {
+            auto rn_l = src_f.get_eventdetection_read_name_list(gr);
+            for (auto const & rn : rn_l)
+            {
+                auto ed_params = src_f.get_eventdetection_params(gr);
+                dst_f.add_eventdetection_params(gr, ed_params);
+                auto ede_ds = src_f.get_eventdetection_events_dataset(gr, rn);
+                dst_f.add_eventdetection_events_dataset(gr, rn, ede_ds);
+            }
+        }
+    } // unpack_ed()
+
+    void
+    copy_ed(File const & src_f, File & dst_f) const
+    {
+        auto gr_l = src_f.get_eventdetection_group_list();
+        for (auto const & gr : gr_l)
+        {
+            auto rn_l = src_f.get_eventdetection_read_name_list(gr);
+            for (auto const & rn : rn_l)
+            {
+                auto ed_params = src_f.get_eventdetection_params(gr);
+                dst_f.add_eventdetection_params(gr, ed_params);
+                if (src_f.have_eventdetection_events_unpack(gr, rn))
+                {
+                    auto ede_ds = src_f.get_eventdetection_events_dataset(gr, rn);
+                    dst_f.add_eventdetection_events_dataset(gr, rn, ede_ds);
+                }
+                else if (src_f.have_eventdetection_events_pack(gr, rn))
+                {
+                    auto ede_pack = src_f.get_eventdetection_events_pack(gr, rn);
+                    dst_f.add_eventdetection_events(gr, rn, ede_pack);
+                }
+            }
+        }
+    } // copy_ed()
+
+    void
+    pack_fq(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s, Counts & cnt) const
+    {
+        bool compute_bp_seq_count = false;
+        for (unsigned st = 0; st < 3; ++st)
+        {
+            auto gr_l = src_f.get_basecall_strand_group_list(st);
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_fastq_pack(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto fq_pack = src_f.get_basecall_fastq_pack(st, gr);
+                    dst_f.add_basecall_fastq(st, gr, fq_pack);
+                }
+                else if (src_f.have_basecall_fastq_unpack(st, gr))
+                {
+                    compute_bp_seq_count = true;
+                    bc_gr_s.insert(gr);
+                    auto fq = src_f.get_basecall_fastq(st, gr);
+                    auto fqa = src_f.split_fq(fq);
+                    auto fq_pack = src_f.pack_fq(fq, qv_bits);
+                    dst_f.add_basecall_fastq(st, gr, fq_pack);
+                    if (check)
+                    {
+                        auto fq_unpack = dst_f.get_basecall_fastq(st, gr);
+                        auto fqa_unpack = src_f.split_fq(fq_unpack);
+                        if (fqa_unpack[0] != fqa[0])
+                        {
+                            LOG_THROW
+                                << "check failed: st=" << st
+                                << " gr=" << gr
+                                << " fq_unpack_name=" << fqa_unpack[0]
+                                << " fq_orig_name=" << fqa[0];
+                        }
+                        if (fqa_unpack[1] != fqa[1])
+                        {
+                            LOG_THROW
+                                << "check failed: st=" << st
+                                << " gr=" << gr
+                                << " fq_unpack_bp=" << fqa_unpack[1]
+                                << " fq_orig_bp=" << fqa[1];
+                        }
+                        if (fqa_unpack[3].size() != fqa[3].size())
+                        {
+                            LOG_THROW
+                                << "check failed: st=" << st
+                                << " gr=" << gr
+                                << " fq_unpack_qv_size=" << fqa_unpack[3].size()
+                                << " fq_orig_qv_size=" << fqa[3].size();
+                        }
+                        auto qv_mask = max_qv_mask() & (max_qv_mask() << (max_qv_bits() - qv_bits));
+                        for (unsigned i = 0; i < fqa_unpack[3].size(); ++i)
+                        {
+                            if ((std::min<unsigned>(fqa_unpack[3][i] - 33, max_qv_mask()) & qv_mask) !=
+                                (std::min<unsigned>(fqa[3][i] - 33, max_qv_mask()) & qv_mask))
+                            {
+                                LOG_THROW
+                                    << "check failed: st=" << st
+                                    << " gr=" << gr
+                                    << " i=" << i
+                                    << " fq_unpack_qv=" << fqa_unpack[3][i]
+                                    << " fq_orig_qv=" << fqa[3][i];
+                            }
+                        }
+                    }
+                    cnt.fq_count += fqa[1].size();
+                    cnt.fq_bp_bits += fq_pack.bp.size() * sizeof(fq_pack.bp[0]) * 8;
+                    cnt.fq_qv_bits += fq_pack.qv.size() * sizeof(fq_pack.qv[0]) * 8;
+                    LOG(info)
+                        << "gr=" << gr
+                        << " st=" << st
+                        << " bp_size=" << fqa[1].size()
+                        << " fq_bp_bits=" << fq_pack.bp_params.at("avg_bits")
+                        << " fq_qv_bits=" << fq_pack.qv_params.at("avg_bits")
+                        << std::endl;
+                }
+            }
+        }
+        if (compute_bp_seq_count)
+        {
+            std::string sq;
+            auto gr_l = src_f.get_basecall_group_list();
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_seq(0, gr) and src_f.have_basecall_events(0, gr))
+                {
+                    sq = src_f.get_basecall_seq(0, gr);
+                    auto bce = src_f.get_basecall_events(0, gr);
+                    cnt.rs_called_duration = bce.back().start + bce.back().length - bce.front().start;
+                    break;
+                }
+            }
+            if (sq.empty() and src_f.have_basecall_seq(0))
+            {
+                sq = src_f.get_basecall_seq(0);
+            }
+            cnt.bp_seq_count += sq.size();
+        }
+    } // pack_fq()
+
+    void
+    unpack_fq(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s) const
+    {
+        for (unsigned st = 0; st < 3; ++st)
+        {
+            auto gr_l = src_f.get_basecall_strand_group_list(st);
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_fastq(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto fq = src_f.get_basecall_fastq(st, gr);
+                    dst_f.add_basecall_fastq(st, gr, fq);
+                }
+            }
+        }
+    } // unpack_fq()
+
+    void
+    copy_fq(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s) const
+    {
+        for (unsigned st = 0; st < 3; ++st)
+        {
+            auto gr_l = src_f.get_basecall_strand_group_list(st);
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_fastq_unpack(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto fq = src_f.get_basecall_fastq(st, gr);
+                    dst_f.add_basecall_fastq(st, gr, fq);
+                }
+                else if (src_f.have_basecall_fastq_pack(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto fq_pack = src_f.get_basecall_fastq_pack(st, gr);
+                    dst_f.add_basecall_fastq(st, gr, fq_pack);
+                }
+            }
+        }
+    } // copy_fq()
+
+    void
+    pack_ev(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s, Counts & cnt) const
+    {
+        for (unsigned st = 0; st < 2; ++st)
+        {
+            auto gr_l = src_f.get_basecall_strand_group_list(st);
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_events_pack(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto ev_pack = src_f.get_basecall_events_pack(st, gr);
+                    dst_f.add_basecall_events(st, gr, ev_pack);
+                }
+                else if (src_f.have_basecall_events_unpack(st, gr))
+                {
+                    // bc group description
+                    auto bc_params = src_f.get_basecall_params(gr);
+                    auto bc_desc = src_f.get_basecall_group_description(gr);
+                    if (bc_desc.name != "metrichor")
+                    {
+                        LOG(warning)
+                            << "dropping basecall events group written by "
+                            << bc_desc.name << ":" << bc_desc.version
+                            << ": st=" << st << " gr=" << gr << "\n";
+                        continue;
+                    }
+                    bc_gr_s.insert(gr);
+                    auto ev_ds = src_f.get_basecall_events_dataset(st, gr);
+                    auto & ev = ev_ds.first;
+                    auto & ev_params = ev_ds.second;
+                    // sampling rate
+                    auto cid_params = src_f.get_channel_id_params();
+                    // basecall fq
+                    if (not src_f.have_basecall_fastq(st, gr))
+                    {
+                        LOG_THROW
+                            << "missing fastq required to pack basecall events: st=" << st << " gr=" << gr;
+                    }
+                    auto sq = src_f.get_basecall_seq(st, gr);
+                    // ed group
+                    auto ed_gr = src_f.get_basecall_eventdetection_group(gr);
+                    std::vector< EventDetection_Event > ed;
+                    if (not ed_gr.empty())
+                    {
+                        ed = src_f.get_eventdetection_events(ed_gr);
+                    }
+                    // try to find mean_sd_temp
+                    auto median_sd_temp = src_f.get_basecall_median_sd_temp(gr);
+                    auto ev_pack = src_f.pack_ev(ev_ds, bc_desc, sq, ed, ed_gr,
+                                                 cid_params, median_sd_temp, p_model_state_bits);
+                    dst_f.add_basecall_events(st, gr, ev_pack);
+                    if (check)
+                    {
+                        decltype(ev_ds) ev_ds_unpack;
+                        try
+                        {
+                            ev_ds_unpack = dst_f.get_basecall_events_dataset(st, gr);
+                        }
+                        catch (std::logic_error & e)
+                        {
+                            LOG_THROW
+                                << "check failed: " << e.what();
+                        }
+                        auto & ev_unpack = ev_ds_unpack.first;
+                        auto & ev_params_unpack = ev_ds_unpack.second;
+                        if (not (ev_params_unpack == ev_params))
+                        {
+                            LOG_THROW
+                                << "check failed: ev_params_unpack!=ev_params";
+                        }
+                        if (ev_unpack.size() != ev.size())
+                        {
+                            LOG_THROW
+                                << "check failed: st=" << st
+                                << " gr=" << gr
+                                << " ev_unpack.size=" << ev_unpack.size()
+                                << " ev_orig.size=" << ev.size();
+                        }
+                        for (unsigned i = 0; i < ev_unpack.size(); ++i)
+                        {
+                            if (abs(ev_unpack[i].start - ev[i].start) > 1e-3
+                                or abs(ev_unpack[i].length - ev[i].length) > 1e-3
+                                or abs(ev_unpack[i].mean - ev[i].mean) > 1e-1
+                                // workaround: allow for unexpected stdv when expected value is small
+                                //or abs(ev_unpack[i].stdv - ev[i].stdv) > 1e-1
+                                or (abs(ev_unpack[i].stdv - ev[i].stdv) > 1e-1
+                                    and ev_unpack[i].stdv != ev_pack.median_sd_temp)
+                                // workaround: allow for invalid moves:
+                                //or ev_unpack[i].move != ev[i].move
+                                or ev_unpack[i].model_state != ev[i].model_state)
+                            {
+                                LOG_THROW
+                                    << "check failed: st=" << st
+                                    << " gr=" << gr
+                                    << " i=" << i
+                                    << " ev_unpack=(" << ev_unpack[i].start
+                                    << "," << ev_unpack[i].length
+                                    << "," << ev_unpack[i].mean
+                                    << "," << ev_unpack[i].stdv
+                                    << "," << ev_unpack[i].move
+                                    << "," << ev_unpack[i].get_model_state()
+                                    << ") ev_orig=(" << ev[i].start
+                                    << "," << ev[i].length
+                                    << "," << ev[i].mean
+                                    << "," << ev[i].stdv
+                                    << "," << ev[i].move
+                                    << "," << ev[i].get_model_state()
+                                    << ")";
+                            }
+                            if (abs(ev_unpack[i].stdv - ev[i].stdv) > 1e-1
+                                and ev_unpack[i].stdv == ev_pack.median_sd_temp)
+                            {
+                                LOG(warning)
+                                    << "unexpected stdv: st=" << st
+                                    << " gr=" << gr
+                                    << " i=" << i
+                                    << " ev_unpack=(" << ev_unpack[i].start
+                                    << "," << ev_unpack[i].length
+                                    << "," << ev_unpack[i].mean
+                                    << "," << ev_unpack[i].stdv
+                                    << "," << ev_unpack[i].move
+                                    << "," << ev_unpack[i].get_model_state()
+                                    << ") ev_orig=(" << ev[i].start
+                                    << "," << ev[i].length
+                                    << "," << ev[i].mean
+                                    << "," << ev[i].stdv
+                                    << "," << ev[i].move
+                                    << "," << ev[i].get_model_state()
+                                    << ")\n";
+                            }
+                        }
+                    }
+                    cnt.ev_count += ev.size();
+                    cnt.ev_rel_skip_bits += ev_pack.rel_skip.size() * sizeof(ev_pack.rel_skip[0]) * 8;
+                    cnt.ev_skip_bits += ev_pack.skip.size() * sizeof(ev_pack.skip[0]) * 8;
+                    cnt.ev_len_bits += ev_pack.len.size() * sizeof(ev_pack.len[0]) * 8;
+                    cnt.ev_move_bits += ev_pack.move.size() * sizeof(ev_pack.move[0]) * 8;
+                    cnt.ev_p_model_state_bits += ev_pack.p_model_state.size() * sizeof(ev_pack.p_model_state[0]) * 8;
+                    std::ostringstream oss;
+                    if (not ev_pack.rel_skip.empty())
+                    {
+                        oss
+                            << "rel_skip_bits=" << ev_pack.rel_skip_params.at("avg_bits");
+                    }
+                    else
+                    {
+                        oss
+                            << "skip_bits=" << ev_pack.skip_params.at("avg_bits")
+                            << " len_bits=" << ev_pack.len_params.at("avg_bits");
+                    }
+                    LOG(info)
+                        << "gr=" << gr
+                        << " st=" << st
+                        << " ev_size=" << ev.size()
+                        << " " << oss.str()
+                        << " move_bits=" << ev_pack.move_params.at("avg_bits")
+                        << " p_model_state_bits=" << ev_pack.p_model_state_params.at("num_bits")
+                        << std::endl;
+                }
+            }
+        }
+    } // pack_ev()
+
+    void
+    unpack_ev(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s) const
+    {
+        for (unsigned st = 0; st < 2; ++st)
+        {
+            auto gr_l = src_f.get_basecall_strand_group_list(st);
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_events(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto ev_ds = src_f.get_basecall_events_dataset(st, gr);
+                    dst_f.add_basecall_events_dataset(st, gr, ev_ds);
+                }
+            }
+        }
+    } // unpack_ev()
+
+    void
+    copy_ev(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s) const
+    {
+        for (unsigned st = 0; st < 2; ++st)
+        {
+            auto gr_l = src_f.get_basecall_strand_group_list(st);
+            for (auto const & gr : gr_l)
+            {
+                if (src_f.have_basecall_events_unpack(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto ev_ds = src_f.get_basecall_events_dataset(st, gr);
+                    dst_f.add_basecall_events_dataset(st, gr, ev_ds);
+                }
+                else if (src_f.have_basecall_events_pack(st, gr))
+                {
+                    bc_gr_s.insert(gr);
+                    auto ev_pack = src_f.get_basecall_events_pack(st, gr);
+                    dst_f.add_basecall_events(st, gr, ev_pack);
+                }
+            }
+        }
+    } // copy_ev()
+
+    void
+    pack_al(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s, Counts & cnt) const
+    {
+        auto gr_l = src_f.get_basecall_strand_group_list(2);
+        for (auto const & gr : gr_l)
+        {
+            if (src_f.have_basecall_alignment_pack(gr))
+            {
+                bc_gr_s.insert(gr);
+                auto al_pack = src_f.get_basecall_alignment_pack(gr);
+                dst_f.add_basecall_alignment(gr, al_pack);
+            }
+            else if (src_f.have_basecall_alignment_unpack(gr))
+            {
+                // bc group description
+                auto bc_params = src_f.get_basecall_params(gr);
+                auto bc_desc = src_f.get_basecall_group_description(gr);
+                if (bc_desc.name != "metrichor")
+                {
+                    LOG(warning)
+                        << "dropping basecall alignment written by "
+                        << bc_desc.name << ":" << bc_desc.version
+                        << ": gr=" << gr << "\n";
+                    continue;
+                }
+                bc_gr_s.insert(gr);
+                auto al = src_f.get_basecall_alignment(gr);
+                // basecall seq
+                if (not src_f.have_basecall_seq(2, gr))
+                {
+                    LOG_THROW
+                        << "missing fastq required to pack basecall alignment: gr=" << gr;
+                }
+                auto seq = src_f.get_basecall_seq(2, gr);
+                auto al_pack = src_f.pack_al(al, seq);
+                dst_f.add_basecall_alignment(gr, al_pack);
+                if (check)
+                {
+                    auto al_unpack = dst_f.get_basecall_alignment(gr);
+                    if (al_unpack.size() != al.size())
+                    {
+                        LOG_THROW
+                            << "check failed: gr=" << gr
+                            << " al_unpack.size=" << al_unpack.size()
+                            << " al_orig.size=" << al.size();
+                    }
+                    for (unsigned i = 0; i < al.size(); ++i)
+                    {
+                        if (al_unpack[i].template_index != al[i].template_index
+                            or al_unpack[i].complement_index != al[i].complement_index
+                            or al_unpack[i].get_kmer() != al[i].get_kmer())
+                        {
+                            LOG_THROW
+                                << "check failed: gr=" << gr
+                                << " i=" << i
+                                << " al_unpack=(" << al_unpack[i].template_index
+                                << "," << al_unpack[i].complement_index
+                                << "," << al_unpack[i].get_kmer()
+                                << ") al_orig=(" << al[i].template_index
+                                << "," << al[i].complement_index
+                                << "," << al[i].get_kmer()
+                                << ")";
+                        }
+                    }
+                }
+                cnt.al_count += al.size();
+                cnt.al_template_step_bits += al_pack.template_step.size() * sizeof(al_pack.template_step[0]) * 8;
+                cnt.al_complement_step_bits += al_pack.complement_step.size() * sizeof(al_pack.complement_step[0]) * 8;
+                cnt.al_move_bits += al_pack.move.size() * sizeof(al_pack.move[0]) * 8;
+                LOG(info)
+                    << "gr=" << gr
+                    << " al_size=" << al.size()
+                    << " template_step_bits=" << al_pack.template_step_params.at("num_bits")
+                    << " complement_step_bits=" << al_pack.complement_step_params.at("num_bits")
+                    << " move_bits=" << al_pack.move_params.at("avg_bits")
+                    << std::endl;
+            }
+        }
+    } // pack_al()
+
+    void
+    unpack_al(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s) const
+    {
+        auto gr_l = src_f.get_basecall_strand_group_list(2);
+        for (auto const & gr : gr_l)
+        {
+            if (src_f.have_basecall_alignment(gr))
+            {
+                bc_gr_s.insert(gr);
+                auto al = src_f.get_basecall_alignment(gr);
+                dst_f.add_basecall_alignment(gr, al);
+            }
+        }
+    } // unpack_al()
+
+    void
+    copy_al(File const & src_f, File & dst_f, std::set< std::string > & bc_gr_s) const
+    {
+        auto gr_l = src_f.get_basecall_strand_group_list(2);
+        for (auto const & gr : gr_l)
+        {
+            if (src_f.have_basecall_alignment_unpack(gr))
+            {
+                bc_gr_s.insert(gr);
+                auto al = src_f.get_basecall_alignment(gr);
+                dst_f.add_basecall_alignment(gr, al);
+            }
+            else if (src_f.have_basecall_alignment_pack(gr))
+            {
+                bc_gr_s.insert(gr);
+                auto al_pack = src_f.get_basecall_alignment_pack(gr);
+                dst_f.add_basecall_alignment(gr, al_pack);
+            }
+        }
+    } // copy_al()
+
+    void
+    copy_basecall_params(File const & src_f, File & dst_f, std::set< std::string > const & bc_gr_s) const
+    {
+        for (auto const & gr : bc_gr_s)
+        {
+            auto bc_params = src_f.get_basecall_params(gr);
+            dst_f.add_basecall_params(gr, bc_params);
+        }
+    } // copy_basecall_params()
+
+    void
+    copy_attributes(File const & src_f, File const & dst_f, std::string const & p, bool recurse = false) const
+    {
+        File::Base::copy_attributes(src_f, dst_f, p, recurse);
+    } // copy_attributes()
+}; // class File_Packer
+
+} // namespace fast5
+
+#endif
diff --git a/src/Huffman_Packer.hpp b/src/Huffman_Packer.hpp
new file mode 100644
index 0000000..f17ac4a
--- /dev/null
+++ b/src/Huffman_Packer.hpp
@@ -0,0 +1,357 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
+#ifndef __HUFFMAN_PACKER_HPP
+#define __HUFFMAN_PACKER_HPP
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <map>
+#include <limits>
+#include <stdexcept>
+#include <cassert>
+#include <bitset>
+
+#include "logger.hpp"
+
+namespace fast5
+{
+
+class Huffman_Packer
+{
+public:
+    typedef std::vector< std::uint8_t > Code_Type;
+    typedef std::map< std::string, std::string > Code_Params_Type;
+
+    Huffman_Packer() = default;
+    Huffman_Packer(Huffman_Packer const &) = delete;
+    Huffman_Packer(Huffman_Packer &&) = default;
+    Huffman_Packer & operator = (Huffman_Packer const &) = delete;
+    Huffman_Packer & operator = (Huffman_Packer &&) = default;
+    Huffman_Packer(std::istream & is, std::string const & cwm_name)
+    {
+        load_codeword_map(is, cwm_name);
+    }
+    Huffman_Packer(std::vector< std::string > const & v, std::string const & cwm_name)
+    {
+        load_codeword_map(v.begin(), v.end(), cwm_name);
+    }
+    template < typename Iterator >
+    Huffman_Packer(Iterator it_begin, Iterator it_end, std::string const & cwm_name)
+    {
+        load_codeword_map(it_begin, it_end, cwm_name);
+    }
+
+    void load_codeword_map(std::istream & is, std::string const & cwm_name)
+    {
+        _cwm_name = cwm_name;
+        std::string v_s;
+        std::string cw_s;
+        while (is >> v_s >> cw_s)
+        {
+            add_codeword(v_s, cw_s);
+        }
+    }
+    template < typename Iterator >
+    void load_codeword_map(Iterator it_begin, Iterator it_end, std::string const & cwm_name)
+    {
+        _cwm_name = cwm_name;
+        for (auto it = it_begin; it != it_end and std::next(it) != it_end; it += 2)
+        {
+            add_codeword(*it, *next(it));
+        }
+    }
+
+    template < typename Int_Type >
+    std::pair< Code_Type, Code_Params_Type >
+    encode(std::vector< Int_Type > const & v, bool encode_diff = false) const
+    {
+        Code_Type res;
+        Code_Params_Type res_params = id();
+        res_params["code_diff"] = encode_diff? "1" : "0";
+        std::ostringstream oss;
+        oss << v.size();
+        res_params["size"] = oss.str();
+        uint64_t buff = 0;
+        uint8_t buff_len = 0;
+        bool reset = true;
+        Int_Type last = 0;
+        unsigned i = 0;
+        long long int val;
+        long long int x;
+        while (true)
+        {
+            assert(buff_len <= 64);
+            // flush buffer
+            while (buff_len >= 8)
+            {
+                res.push_back(buff & 0xFF);
+                buff >>= 8;
+                buff_len -= 8;
+            }
+            assert(buff_len < 8);
+            if (reset)
+            {
+                assert(buff_len == 0);
+                if (i == v.size()) break;
+                //LOG(debug) << "absolute value val=" << v[i] << std::endl;
+                for (unsigned j = 0; j < sizeof(Int_Type); ++j)
+                {
+                    std::uint8_t y = (v[i] >> (8 * j)) & 0xFF;
+                    //LOG(debug) << "byte " << j << ": " << std::bitset<8>(y) << std::endl;
+                    res.push_back(y);
+                }
+                reset = false;
+                last = v[i];
+                ++i;
+            }
+            else // not reset
+            {
+                if (i < v.size())
+                {
+                    val = v[i];
+                    x = encode_diff? val - last : val;
+                    reset = _cwm.count(x) == 0;
+                    //LOG(debug) << "relative value: val=" << v[i] << " last=" << last << " x=" << x << " reset=" << reset << std::endl;
+                }
+                else
+                {
+                    reset = true;
+                    //LOG(debug) << "end: reset=1" << std::endl;
+                }
+                auto p = (not reset? _cwm.at(x) : _cwm.at(break_cw()));
+                buff |= (p.first << buff_len);
+                buff_len += p.second;
+                if (not reset)
+                {
+                    last = v[i];
+                    ++i;
+                }
+                else if ((buff_len % 8) > 0) // and reset
+                {
+                    buff_len += 8 - (buff_len % 8);
+                }
+                        
+            }
+        }
+        oss.str("");
+        oss << std::fixed << std::setprecision(2) << (double)(res.size() * 8) / v.size();
+        res_params["avg_bits"] = oss.str();
+        return std::make_pair(std::move(res), std::move(res_params));
+    }
+
+    template < typename Int_Type >
+    std::vector< Int_Type >
+    decode(Code_Type const & v, Code_Params_Type const & v_params) const
+    {
+        check_params(v_params);
+        bool decode_diff = v_params.at("code_diff") == "1";
+        std::vector< Int_Type > res;
+        std::uint64_t buff = 0;
+        std::uint8_t buff_len = 0;
+        bool reset = true;
+        Int_Type last = 0;
+        unsigned i = 0;
+        while (i < v.size() or buff_len > 0)
+        {
+            assert(buff_len <= 64);
+            // fill buffer
+            while (i < v.size() and buff_len <= 56)
+            {
+                uint64_t y = v[i];
+                buff |= (y << buff_len);
+                buff_len += 8;
+                ++i;
+            }
+            assert(buff_len <= 64);
+            if (reset)
+            {
+                assert((buff_len % 8) == 0);
+                assert(buff_len / 8 >= sizeof(Int_Type));
+                //LOG(debug) << "absolute value" << std::endl;
+                Int_Type x = 0;
+                for (unsigned j = 0; j < sizeof(Int_Type); ++j)
+                {
+                    std::uint64_t y = (buff & 0xFF);
+                    //LOG(debug) << "byte " << j << ": " << std::bitset<8>(y) << std::endl;
+                    x |= (y << (8 * j));
+                    buff >>= 8;
+                    buff_len -= 8;
+                }
+                //LOG(debug) << "got: val=" << x << std::endl;
+                res.push_back(x);
+                last = x;
+                reset = false;
+            }
+            else // not reset
+            {
+                //LOG(debug) << "reading relative value" << std::endl;
+                // TODO: faster decoding
+                // currently, try all codewords one by one
+                auto it = _cwm.begin();
+                while (it != _cwm.end())
+                {
+                    if ((buff & ((1llu << it->second.second) - 1)) == it->second.first)
+                    {
+                        break;
+                    }
+                    ++it;
+                }
+                if (it == _cwm.end())
+                {
+                    LOG_THROW
+                        << "codeword not found: buff=" << std::bitset<64>(buff);
+                }
+                auto x = it->first;
+                auto p = it->second;
+                assert(buff_len >= p.second);
+                buff >>= p.second;
+                buff_len -= p.second;
+                if (x != break_cw())
+                {
+                    //LOG(debug) << "got: x=" << x << " last=" << last << " val=" << x + last << " cw_len=" << (int)p.second << std::endl;
+                    if (decode_diff) x += last;
+                    if (sizeof(Int_Type) < 8
+                        and (x < (long long)std::numeric_limits< Int_Type >::min()
+                             or x > (long long)std::numeric_limits< Int_Type >::max()))
+                    {
+                        LOG_THROW
+                            << "overflow";
+                    }
+                    res.push_back(x);
+                    last = x;
+                }
+                else
+                {
+                    //LOG(debug) << "got: break cw_len=" << (int)p.second << std::endl;
+                    reset = true;
+                    if ((buff_len % 8) > 0)
+                    {
+                        buff >>= (buff_len % 8);
+                        buff_len -= (buff_len % 8);
+                    }
+                }
+            }
+        }
+        return res;
+    }
+
+    //
+    // static coder access
+    //
+    static Huffman_Packer const &
+    get_coder(std::string const & cwm_name)
+    {
+        static_init();
+        if (cwm_m().count(cwm_name) == 0)
+        {
+            LOG_THROW
+                << "missing codeword map: " + cwm_name;
+        }
+        return cwm_m().at(cwm_name);
+    }
+
+private:
+    std::map< long long int, std::pair< std::uint64_t, std::uint8_t > > _cwm;
+    std::string _cwm_name;
+    static long long int break_cw()
+    {
+        static long long int const _break_cw = std::numeric_limits< long long int >::min();
+        return _break_cw;
+    }
+    Code_Params_Type id() const
+    {
+        Code_Params_Type res;
+        res["packer"] = "huffman_packer";
+        res["format_version"] = "2";
+        res["codeword_map_name"] = _cwm_name;
+        return res;
+    }
+    void check_params(Code_Params_Type const & params) const
+    {
+        auto _id = id();
+        if (params.at("packer") != _id.at("packer")
+            or params.at("format_version") != _id.at("format_version")
+            or params.at("codeword_map_name") != _id.at("codeword_map_name"))
+        {
+            LOG_THROW
+                << "decode id mismatch";
+        }
+    }
+    void add_codeword(std::string const & v_s, std::string const & cw_s)
+    {
+        long long int v;
+        if (v_s != ".")
+        {
+            std::istringstream(v_s) >> v;
+        }
+        else
+        {
+            v = break_cw();
+        }
+        std::uint64_t cw = 0;
+        if (cw_s.size() > 57)
+        {
+            LOG_THROW
+                << "codeword too long: " + v_s + " " + cw_s;
+        }
+        std::uint8_t cw_l = cw_s.size();
+        for (int i = cw_s.size() - 1; i >= 0; --i)
+        {
+            cw <<= 1;
+            cw |= (cw_s[i] == '1');
+        }
+        _cwm[v] = std::make_pair(cw, cw_l);
+    }
+
+    static std::map< std::string, Huffman_Packer > & cwm_m()
+    {
+        static std::map< std::string, Huffman_Packer > _cwm_m;
+        return _cwm_m;
+    }
+    static void static_init()
+    {
+        static bool inited = false;
+        if (inited) return;
+        std::deque< std::deque< std::string > > dd;
+        dd.push_back(
+#include "cwmap.fast5_rw_1.inl"
+            );
+        dd.push_back(
+#include "cwmap.fast5_ed_skip_1.inl"
+            );
+        dd.push_back(
+#include "cwmap.fast5_ed_len_1.inl"
+            );
+        dd.push_back(
+#include "cwmap.fast5_fq_bp_1.inl"
+            );
+        dd.push_back(
+#include "cwmap.fast5_fq_qv_1.inl"
+            );
+        dd.push_back(
+#include "cwmap.fast5_ev_rel_skip_1.inl"
+            );
+        dd.push_back(
+#include "cwmap.fast5_ev_move_1.inl"
+            );
+        cwm_m().clear();
+        for (auto & d : dd)
+        {
+            auto cwm_name = d.front();
+            Huffman_Packer hc(d.begin() + 1, d.end(), cwm_name);
+            cwm_m()[cwm_name] = std::move(hc);
+        }
+        inited = true;
+    } // static_init()
+}; // class Huffman_Packer
+
+} // namespace fast5
+
+#endif
diff --git a/src/Makefile b/src/Makefile
index 03b93d6..0d64a1e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,19 +1,35 @@
+#
+# Part of: https://github.com/mateidavid/fast5
+#
+# (c) 2017: Matei David, Ontario Institute for Cancer Research
+# MIT License
+#
+
 .SUFFIXES:
 MAKEFLAGS += -r
 SHELL := /bin/bash
 .DELETE_ON_ERROR:
 .PHONY: all help list clean check_hdf5
 
-HDF5_DIR = /usr/local
-HDF5_INCLUDE_DIR = ${HDF5_DIR}/include
-HDF5_LIB_DIR = ${HDF5_DIR}/lib
-HDF5_LIB = hdf5
-TCLAP_DIR = tclap
-HPPTOOLS_DIR = hpptools
+HDF5_DIR ?= /usr/local
+HDF5_INCLUDE_DIR ?= ${HDF5_DIR}/include
+HDF5_LIB_DIR ?= ${HDF5_DIR}/lib
+HDF5_LIB ?= hdf5
+TCLAP_DIR ?= tclap
+HPPTOOLS_DIR ?= hpptools
 
 TARGETS = f5ls f5ls-full hdf5-mod f5-mod
+EXTRA_TARGETS = f5dump f5pack
+HPP_FILES := fast5.hpp hdf5_tools.hpp Huffman_Packer.hpp Bit_Packer.hpp
+
+CXXFLAGS := -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -Wpedantic
+CPPFLAGS := -isystem ${HDF5_INCLUDE_DIR}
+EXTRA_CPPFLAGS := -isystem ${TCLAP_DIR}/include -I ${HPPTOOLS_DIR}/include
+LDFLAGS := -L${HDF5_LIB_DIR} -Wl,--rpath=${HDF5_LIB_DIR} -l${HDF5_LIB} -lpthread -lz -ldl
+
+default: ${TARGETS}
 
-all: ${TARGETS}
+all: default ${EXTRA_TARGETS}
 
 print-%:
 	@echo '$*=$($*)'
@@ -25,7 +41,7 @@ list:
 	@echo "TARGETS=${TARGETS}"
 
 clean:
-	rm -rf ${TARGETS}
+	rm -rf ${TARGETS} ${EXTRA_TARGETS}
 
 check_hdf5:
 	@[ -f "${HDF5_INCLUDE_DIR}/H5pubconf.h" ] || { echo "HDF5 headers not found" >&2; exit 1; }
@@ -37,9 +53,11 @@ check_tclap:
 check_hpptools:
 	@[ -f "${HPPTOOLS_DIR}/include/alg.hpp" ] || { echo "HPPTOOLS not found; get it from https://github.com/mateidavid/hpptools.git" >&2; exit 1; }
 
-%: %.cpp fast5.hpp hdf5_tools.hpp | check_hdf5
-	${CXX} -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -Wpedantic -isystem ${HDF5_INCLUDE_DIR} -o $@ $< -L${HDF5_LIB_DIR} -Wl,--rpath=${HDF5_LIB_DIR} -l${HDF5_LIB} -lpthread -lz -ldl
+%: %.cpp ${HPP_FILES} | check_hdf5
+	${CXX} ${CXXFLAGS} ${CPPFLAGS} -o $@ $< ${LDFLAGS}
 
-f5dump: f5dump.cpp fast5.hpp hdf5_tools.hpp | check_hdf5 check_tclap check_hpptools
-	${CXX} -std=c++11 -O0 -g3 -ggdb -fno-eliminate-unused-debug-types -Wall -Wextra -Wpedantic -isystem ${HDF5_INCLUDE_DIR} -isystem ${TCLAP_DIR}/include -I ${HPPTOOLS_DIR}/include -o $@ $< -L${HDF5_LIB_DIR} -Wl,--rpath=${HDF5_LIB_DIR} -l${HDF5_LIB} -lpthread -lz -ldl
+f5dump: f5dump.cpp ${HPP_FILES} | check_hdf5 check_tclap check_hpptools
+	${CXX} ${CXXFLAGS} ${CPPFLAGS} ${EXTRA_CPPFLAGS} -o $@ $< ${LDFLAGS}
 
+f5pack: f5pack.cpp ${HPP_FILES} File_Packer.hpp | check_hdf5 check_tclap check_hpptools
+	${CXX} ${CXXFLAGS} ${CPPFLAGS} ${EXTRA_CPPFLAGS} -o $@ $< ${LDFLAGS}
diff --git a/src/cwmap.fast5_ed_len_1.inl b/src/cwmap.fast5_ed_len_1.inl
new file mode 100644
index 0000000..98952ef
--- /dev/null
+++ b/src/cwmap.fast5_ed_len_1.inl
@@ -0,0 +1,103 @@
+{ "fast5_ed_len_1",
+"8", "001",
+"7", "010",
+"6", "100",
+"5", "111",
+"12", "0001",
+"11", "1010",
+"10", "1100",
+"9", "1101",
+"16", "00001",
+"4", "01101",
+"15", "01110",
+"14", "10110",
+"13", "10111",
+"19", "000001",
+"18", "011001",
+"17", "011111",
+"22", "0110000",
+"21", "0111100",
+"20", "0111101",
+"25", "00000010",
+"24", "01100010",
+"23", "01100011",
+"28", "000000001",
+"27", "000000110",
+"26", "000000111",
+"31", "0000000000",
+"30", "0000000100",
+"29", "0000000111",
+"32", "00000001010",
+"34", "000000000100",
+"35", "000000000110",
+"33", "000000011011",
+"41", "0000000001011",
+"40", "0000000001110",
+"36", "0000000101100",
+"37", "0000000110100",
+"38", "00000000010101",
+"42", "00000000011110",
+"39", "00000001101011",
+"99", "000000000101000",
+"100", "000000000101001",
+"43", "000000000111110",
+"44", "000000000111111",
+"46", "000000010110100",
+"53", "000000010110101",
+".", "0000000101101100",
+"1", "0000000101101101",
+"2", "0000000101101110",
+"3", "0000000101101111",
+"45", "0000000101110000",
+"47", "0000000101110001",
+"48", "0000000101110010",
+"49", "0000000101110011",
+"50", "0000000101110100",
+"51", "0000000101110101",
+"52", "0000000101110110",
+"54", "0000000101110111",
+"55", "0000000101111000",
+"56", "0000000101111001",
+"57", "0000000101111010",
+"58", "0000000101111011",
+"59", "0000000101111100",
+"60", "0000000101111101",
+"61", "0000000101111110",
+"62", "0000000101111111",
+"63", "0000000110000000",
+"64", "0000000110000001",
+"65", "0000000110000010",
+"66", "0000000110000011",
+"67", "0000000110000100",
+"68", "0000000110000101",
+"69", "0000000110000110",
+"70", "0000000110000111",
+"71", "0000000110001000",
+"72", "0000000110001001",
+"73", "0000000110001010",
+"74", "0000000110001011",
+"75", "0000000110001100",
+"76", "0000000110001101",
+"77", "0000000110001110",
+"78", "0000000110001111",
+"79", "0000000110010000",
+"80", "0000000110010001",
+"81", "0000000110010010",
+"82", "0000000110010011",
+"83", "0000000110010100",
+"84", "0000000110010101",
+"85", "0000000110010110",
+"86", "0000000110010111",
+"87", "0000000110011000",
+"88", "0000000110011001",
+"89", "0000000110011010",
+"90", "0000000110011011",
+"91", "0000000110011100",
+"92", "0000000110011101",
+"93", "0000000110011110",
+"94", "0000000110011111",
+"95", "0000000110101000",
+"96", "0000000110101001",
+"97", "0000000110101010",
+"98", "0000000110101011",
+}
diff --git a/src/cwmap.fast5_ed_skip_1.inl b/src/cwmap.fast5_ed_skip_1.inl
new file mode 100644
index 0000000..849f803
--- /dev/null
+++ b/src/cwmap.fast5_ed_skip_1.inl
@@ -0,0 +1,4 @@
+{ "fast5_ed_skip_1",
+"0", "0",
+".", "1",
+}
diff --git a/src/cwmap.fast5_ev_move_1.inl b/src/cwmap.fast5_ev_move_1.inl
new file mode 100644
index 0000000..1a695fc
--- /dev/null
+++ b/src/cwmap.fast5_ev_move_1.inl
@@ -0,0 +1,6 @@
+{ "fast5_ev_move_1",
+"0", "0",
+"1", "10",
+"2", "110",
+".", "111",
+}
diff --git a/src/cwmap.fast5_ev_rel_skip_1.inl b/src/cwmap.fast5_ev_rel_skip_1.inl
new file mode 100644
index 0000000..d315aca
--- /dev/null
+++ b/src/cwmap.fast5_ev_rel_skip_1.inl
@@ -0,0 +1,4 @@
+{ "fast5_ev_rel_skip_1",
+"0", "0",
+".", "1",
+}
diff --git a/src/cwmap.fast5_fq_bp_1.inl b/src/cwmap.fast5_fq_bp_1.inl
new file mode 100644
index 0000000..a42e4e2
--- /dev/null
+++ b/src/cwmap.fast5_fq_bp_1.inl
@@ -0,0 +1,7 @@
+{ "fast5_fq_bp_1",
+"65", "00",
+"67", "01",
+"71", "10",
+"84", "110",
+".", "111",
+}
diff --git a/src/cwmap.fast5_fq_qv_1.inl b/src/cwmap.fast5_fq_qv_1.inl
new file mode 100644
index 0000000..986f778
--- /dev/null
+++ b/src/cwmap.fast5_fq_qv_1.inl
@@ -0,0 +1,35 @@
+{ "fast5_fq_qv_1",
+"10", "10",
+"16", "00000",
+"17", "00001",
+"18", "00010",
+"19", "00011",
+"20", "00100",
+"21", "00101",
+"22", "00110",
+"23", "00111",
+"24", "01000",
+"25", "01001",
+"26", "01010",
+"27", "01011",
+"28", "01100",
+"29", "01101",
+"30", "01110",
+"31", "01111",
+".", "110000",
+"0", "110001",
+"1", "110010",
+"2", "110011",
+"3", "110100",
+"4", "110101",
+"5", "110110",
+"6", "110111",
+"7", "111000",
+"8", "111001",
+"9", "111010",
+"11", "111011",
+"12", "111100",
+"13", "111101",
+"14", "111110",
+"15", "111111",
+}
diff --git a/src/cwmap.fast5_rw_1.inl b/src/cwmap.fast5_rw_1.inl
new file mode 100644
index 0000000..23ad6e5
--- /dev/null
+++ b/src/cwmap.fast5_rw_1.inl
@@ -0,0 +1,204 @@
+{ "fast5_rw_1",
+"8", "00001",
+"-7", "00011",
+"7", "00100",
+"-6", "00110",
+"6", "01000",
+"-5", "01001",
+"5", "01010",
+"-4", "01100",
+"4", "01101",
+"-3", "01110",
+"3", "01111",
+"-2", "10001",
+"2", "10010",
+"-1", "10011",
+"0", "10100",
+"1", "10101",
+"-16", "000001",
+"16", "000100",
+"-15", "001011",
+"15", "001110",
+"-14", "010111",
+"14", "100000",
+"-13", "101101",
+"13", "101111",
+"-12", "110010",
+"12", "110011",
+"-11", "110101",
+"11", "110111",
+"-10", "111001",
+"10", "111010",
+"-9", "111100",
+"9", "111110",
+"-8", "111111",
+"22", "0001010",
+"-22", "0001011",
+"21", "0011111",
+"-21", "0101100",
+"-20", "1000011",
+"20", "1011000",
+".", "1011101",
+"19", "1100001",
+"-19", "1100010",
+"-18", "1101001",
+"18", "1101101",
+"-17", "1110110",
+"17", "1111010",
+"-29", "00000001",
+"28", "00000010",
+"-28", "00101010",
+"27", "00111101",
+"-27", "01011010",
+"26", "10110010",
+"-26", "10110011",
+"25", "11000001",
+"-25", "11000111",
+"24", "11011001",
+"-24", "11100001",
+"-23", "11101111",
+"23", "11110110",
+"36", "000000000",
+"35", "000000111",
+"-36", "001010001",
+"34", "001010011",
+"-35", "001111000",
+"-34", "010110110",
+"33", "100001000",
+"-33", "101110010",
+"32", "101110011",
+"-32", "110100000",
+"31", "110100010",
+"-31", "110110000",
+"30", "111000100",
+"-30", "111000111",
+"29", "111011101",
+"-49", "0000000011",
+"45", "0010100000",
+"-48", "0010100001",
+"-46", "0010101100",
+"44", "0010101110",
+"43", "0010101111",
+"-47", "0011110010",
+"42", "1000010010",
+"-45", "1000010011",
+"-44", "1000010111",
+"-43", "1011100001",
+"41", "1011100011",
+"-42", "1100000010",
+"-41", "1100011000",
+"40", "1100011010",
+"39", "1101000011",
+"-40", "1101100010",
+"38", "1110000001",
+"-39", "1110001010",
+"37", "1110001100",
+"-38", "1110111001",
+"-37", "1111011111",
+"-70", "00000000100",
+"61", "00000011000",
+"-65", "00000011011",
+"60", "00101001001",
+"-67", "00101001011",
+"58", "00101011010",
+"-66", "00101011011",
+"-64", "00111100110",
+"59", "00111100111",
+"-62", "01011011100",
+"57", "01011011101",
+"56", "01011011110",
+"-63", "01011011111",
+"-60", "10000101010",
+"-61", "10111000000",
+"55", "10111000100",
+"54", "10111000101",
+"52", "11000000000",
+"53", "11000000001",
+"-57", "11000000110",
+"-58", "11000000111",
+"-59", "11000110011",
+"51", "11000110110",
+"-56", "11010000101",
+"-55", "11010001110",
+"49", "11011000110",
+"-54", "11011000111",
+"50", "11100000001",
+"-52", "11100000101",
+"-51", "11100010111",
+"-53", "11101110000",
+"48", "11101110001",
+"46", "11110111001",
+"47", "11110111011",
+"-50", "11110111100",
+"-90", "000000001010",
+"-88", "000000001011",
+"84", "000000110010",
+"-89", "000000110011",
+"82", "000000110100",
+"81", "000000110101",
+"83", "001010010000",
+"-87", "001010010001",
+"80", "001010010100",
+"79", "001010010101",
+"-84", "100001010000",
+"76", "100001010001",
+"-85", "100001010010",
+"75", "100001010011",
+"74", "100001010110",
+"-86", "100001011000",
+"-82", "100001011001",
+"77", "100001011010",
+"-83", "100001011011",
+"78", "101110000010",
+"-81", "101110000011",
+"-80", "110000000100",
+"73", "110000000110",
+"-78", "110000000111",
+"72", "110001100100",
+"69", "110001101110",
+"-77", "110001101111",
+"-79", "110100001000",
+"71", "110100011000",
+"68", "110100011001",
+"-76", "110100011010",
+"-72", "110100011111",
+"70", "111000000000",
+"-74", "111000001001",
+"66", "111000001100",
+"-75", "111000001110",
+"-73", "111000001111",
+"67", "111000101101",
+"65", "111000110100",
+"63", "111000110110",
+"-71", "111101110000",
+"62", "111101110001",
+"-69", "111101110100",
+"-68", "111101110101",
+"64", "111101111010",
+"-98", "1000010101110",
+"97", "1000010101111",
+"95", "1100000001010",
+"-100", "1100000001011",
+"100", "1100011001010",
+"99", "1100011001011",
+"98", "1101000010010",
+"-99", "1101000010011",
+"-95", "1101000110110",
+"-94", "1101000110111",
+"94", "1101000111100",
+"92", "1101000111101",
+"-97", "1110000000010",
+"90", "1110000000011",
+"96", "1110000010000",
+"86", "1110000010001",
+"93", "1110000011010",
+"-96", "1110000011011",
+"91", "1110001011000",
+"-92", "1110001011001",
+"87", "1110001101010",
+"-93", "1110001101011",
+"88", "1110001101110",
+"89", "1110001101111",
+"-91", "1111011110110",
+"85", "1111011110111",
+}
diff --git a/src/f5-mod.cpp b/src/f5-mod.cpp
index 278ba57..5d510ae 100644
--- a/src/f5-mod.cpp
+++ b/src/f5-mod.cpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #include <cassert>
 #include <iostream>
 #include <string>
@@ -62,18 +69,17 @@ int main(int argc, char* argv[])
             //
             // add basecall events
             //
-            vector< fast5::Event_Entry > ev(3, {55.0, 1.0, 0.05, 0.01, .5, .5, .7, .1, .1, .1, 0,
-                        array< char, 8 >{"ACGTA"}, array< char, 8 >{"CGTAC"}});
+            vector< fast5::Basecall_Event > ev(3, {55.0, 1.0, 0.05, 0.01, .5, 0, array< char, 8 >{"ACGTA"}});
             f.add_basecall_events(0, test_bc_grp_prefix + test_bc_grp_suffix, ev);
             //
             // add basecall pore model
             //
-            vector< fast5::Model_Entry > mod(3, {0, 56.0, 1.0, 42.0, 1.0, 5.0, array< char, 8 >{"ACGTA"}});
+            vector< fast5::Basecall_Model_State > mod(3, {56.0, 1.0, 42.0, 1.0, array< char, 8 >{"ACGTA"}});
             f.add_basecall_model(0, test_bc_grp_prefix + test_bc_grp_suffix, mod);
             //
             // add basecall pore model params
             //
-            fast5::Model_Parameters params{1.0, 0.0, 0.0, 1.0, .9, .9};
+            fast5::Basecall_Model_Params params{1.0, 0.0, 0.0, 1.0, .9, .9};
             f.add_basecall_model_params(0, test_bc_grp_prefix + test_bc_grp_suffix, params);
             //
             // add basecall model file
diff --git a/src/f5dump.cpp b/src/f5dump.cpp
index 1dd8984..3d18351 100644
--- a/src/f5dump.cpp
+++ b/src/f5dump.cpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #include <cassert>
 #include <iostream>
 #include <iomanip>
@@ -10,7 +17,6 @@
 
 using namespace std;
 
-
 namespace opts
 {
     using namespace TCLAP;
@@ -26,8 +32,9 @@ namespace opts
     ValueArg< unsigned > st("", "st", "Strand.", false, 0, "0|1|2", cmd_parser);
     ValueArg< string > gr("", "gr", "Group name suffix.", false, "", "000|RNN_001|...", cmd_parser);
     //
-    SwitchArg fq("", "fq", "Dump basecall fastq data.", cmd_parser);
+    SwitchArg al("", "al", "Dump basecall 2d alignment data.", cmd_parser);
     SwitchArg ev("", "ev", "Dump basecall event data.", cmd_parser);
+    SwitchArg fq("", "fq", "Dump basecall fastq data.", cmd_parser);
     SwitchArg ed("", "ed", "Dump event detection data.", cmd_parser);
     SwitchArg rw("", "rw", "Dump raw samples data.", cmd_parser);
     SwitchArg id("", "id", "Dump channel/tracking id data.", cmd_parser);
@@ -44,11 +51,6 @@ void print_map(ostream& os, const map< U, V >& m, const string& prefix)
     }
 }
 
-unsigned time_int(double f, fast5::Channel_Id_Parameters const & channel_id_params)
-{
-    return f * channel_id_params.sampling_rate;
-}
-
 void real_main()
 {
     fast5::File f;
@@ -56,7 +58,7 @@ void real_main()
     {
         // open file
         f.open(opts::input_fn);
-        auto channel_id_params = f.get_channel_id_params();
+        auto cid_params = f.get_channel_id_params();
         //
         // list
         //
@@ -90,7 +92,7 @@ void real_main()
                         int have_events = (st == 2
                                            ? f.have_basecall_events(0, gr) and f.have_basecall_events(1, gr)
                                            : f.have_basecall_events(st, gr));
-                        string link = (st == 2? f.get_basecall_group_1d(gr) : f.get_basecall_eventdetection_group(gr));
+                        string link = (st == 2? f.get_basecall_1d_group(gr) : f.get_basecall_eventdetection_group(gr));
                         cout
                             << (st == 2? "bc2d" : "bc1d") << "\t"
                             << gr << "\t"
@@ -109,11 +111,11 @@ void real_main()
         if (opts::id)
         {
             cout
-                << "channel_id/channel_number=" << channel_id_params.channel_number << endl
-                << "channel_id/digitisation=" << channel_id_params.digitisation << endl
-                << "channel_id/offset=" << channel_id_params.offset << endl
-                << "channel_id/range=" << channel_id_params.range << endl
-                << "channel_id/sampling_rate=" << channel_id_params.sampling_rate << endl
+                << "channel_id/channel_number=" << cid_params.channel_number << endl
+                << "channel_id/digitisation=" << cid_params.digitisation << endl
+                << "channel_id/offset=" << cid_params.offset << endl
+                << "channel_id/range=" << cid_params.range << endl
+                << "channel_id/sampling_rate=" << cid_params.sampling_rate << endl
                 ;
             if (f.have_tracking_id_params())
             {
@@ -153,7 +155,7 @@ void real_main()
             }
             else
             {
-                auto rs_int = f.get_raw_samples_int(opts::rn);
+                auto rs_int = f.get_raw_int_samples(opts::rn);
                 if (opts::rw_time)
                 {
                     cout << "start\t";
@@ -174,7 +176,7 @@ void real_main()
         //
         if (opts::ed and f.have_eventdetection_events(opts::gr, opts::rn))
         {
-            auto ede_params = f.get_eventdetection_event_params(opts::gr, opts::rn);
+            auto ede_params = f.get_eventdetection_events_params(opts::gr, opts::rn);
             cout
                 << "#read_id=" << ede_params.read_id << endl
                 << "#read_number=" << ede_params.read_number << endl
@@ -186,7 +188,7 @@ void real_main()
             auto ede = f.get_eventdetection_events(opts::gr, opts::rn);
             cout
                 << "start\tlength\tmean\tstdv" << endl
-                << alg::os_join(ede, "\n", [] (fast5::EventDetection_Event_Entry const & e) {
+                << alg::os_join(ede, "\n", [] (fast5::EventDetection_Event const & e) {
                         ostringstream oss;
                         oss.precision(opts::float_prec);
                         oss << e.start << "\t" << e.length << "\t" << e.mean << "\t" << e.stdv;
@@ -195,13 +197,36 @@ void real_main()
                 << endl;
         } // if opts::ed
         //
+        // basecall fastq
+        //
+        if (opts::fq and f.have_basecall_fastq(opts::st, opts::gr))
+        {
+            auto fq = f.get_basecall_fastq(opts::st, opts::gr);
+            cout << fq;
+            if (fq.size() > 0 and fq[fq.size() - 1] != '\n') cout << endl;
+        } // if opts::fq
+        //
         // basecall events
         //
         if (opts::ev and f.have_basecall_events(opts::st, opts::gr))
         {
+            auto bce_params = f.get_basecall_events_params(opts::st, opts::gr);
+            if (not opts::time_int)
+            {
+                cout
+                    << "#start_time=" << bce_params.start_time << endl
+                    << "#duration=" << bce_params.duration << endl;
+            }
+            else
+            {
+                cout
+                    << "#start_time=" << f.time_to_int(bce_params.start_time, cid_params) << endl
+                    << "#duration=" << f.time_to_int(bce_params.duration, cid_params) << endl;
+            }
             auto bce = f.get_basecall_events(opts::st, opts::gr);
             cout
-                << alg::os_join(bce, "\n", [&channel_id_params] (fast5::Event_Entry const & e) {
+                << "start\tlength\tmean\tstdv\tstate\tmove\tp_model_state" << endl
+                << alg::os_join(bce, "\n", [&] (fast5::Basecall_Event const & e) {
                         ostringstream oss;
                         oss.precision(opts::float_prec);
                         if (not opts::time_int)
@@ -211,26 +236,33 @@ void real_main()
                         else
                         {
                             oss
-                                << time_int(e.start, channel_id_params) << "\t"
-                                << time_int(e.length, channel_id_params) << "\t";
+                                << f.time_to_int(e.start, cid_params) << "\t"
+                                << f.time_to_int(e.length, cid_params) << "\t";
                         }
                         oss
                             << e.mean << "\t"
                             << e.stdv << "\t"
-                            << string(e.model_state.begin(), e.model_state.end()).data() << "\t"
-                            << e.move;
+                            << e.get_model_state() << "\t"
+                            << e.move << "\t"
+                            << e.p_model_state;
                         return oss.str();
                     })
                 << endl;
         } // if opts::ev
-        //
-        // basecall fastq
-        //
-        if (opts::fq and f.have_basecall_fastq(opts::st, opts::gr))
+        if (opts::al and f.have_basecall_alignment(opts::gr))
         {
-            auto fq = f.get_basecall_fastq(opts::st, opts::gr);
-            cout << fq << endl;
-        } // if opts::fq
+            auto aln = f.get_basecall_alignment(opts::gr);
+            cout
+                << "template\tcomplement\tkmer" << endl
+                << alg::os_join(aln, "\n", [&] (fast5::Basecall_Alignment_Entry const & a) {
+                        ostringstream oss;
+                        oss << a.template_index << "\t"
+                            << a.complement_index << "\t"
+                            << a.get_kmer();
+                        return oss.str();
+                    })
+                << endl;
+        } // if opts::al
     }
     catch (hdf5_tools::Exception& e)
     {
@@ -247,13 +279,13 @@ int main(int argc, char * argv[])
     //    << "program: " << opts::cmd_parser.getProgramName() << endl
     //    << "version: " << opts::cmd_parser.getVersion() << endl
     //    << "args: " << opts::cmd_parser.getOrigArgv() << endl;
-    if (opts::ls + opts::id + opts::rw + opts::ed + opts::ev + opts::fq == 0)
+    if (opts::ls + opts::id + opts::rw + opts::ed + opts::fq + opts::ev + opts::al == 0)
     {
         opts::ls.set(true);
     }
-    else if (opts::ls + opts::id + opts::rw + opts::ed + opts::ev + opts::fq > 1)
+    else if (opts::ls + opts::id + opts::rw + opts::ed + opts::fq + opts::ev + opts::al > 1)
     {
-        cerr << "at most one of --ls/--id/--rw/--ed/--ev/--fq must be given" << endl;
+        cerr << "at most one of --ls/--id/--rw/--ed/--fq/--ev/--al must be given" << endl;
         exit(EXIT_FAILURE);
     }
     cout.precision(opts::float_prec);
diff --git a/src/f5ls-full.cpp b/src/f5ls-full.cpp
index d29000a..48dd47a 100644
--- a/src/f5ls-full.cpp
+++ b/src/f5ls-full.cpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #include <cassert>
 #include <iostream>
 #include <string>
@@ -121,9 +128,9 @@ int main(int argc, char* argv[])
             //
             bool have_eventdetection_events = f.have_eventdetection_events();
             cout << "have_eventdetection_events=" << have_eventdetection_events << endl;
-            bool have_eventdetection_groups = f.have_eventdetection_groups();
-            cout << "have_eventdetection_groups=" << have_eventdetection_groups << endl;
-            if (have_eventdetection_groups)
+            bool have_eventdetection_group = f.have_eventdetection_group();
+            cout << "have_eventdetection_group=" << have_eventdetection_group << endl;
+            if (have_eventdetection_group)
             {
                 auto ed_gr_list = f.get_eventdetection_group_list();
                 cout << "eventdetection_group_list=";
@@ -143,7 +150,7 @@ int main(int argc, char* argv[])
                     {
                         std::ostringstream tmp;
                         tmp << "eventdetection/" << ed_gr << "/" << rn;
-                        auto ed_ev_params = f.get_eventdetection_event_params(ed_gr, rn);
+                        auto ed_ev_params = f.get_eventdetection_events_params(ed_gr, rn);
                         auto ed_ev = f.get_eventdetection_events(ed_gr, rn);
                         cout << tmp.str() << "/abasic_found=" << ed_ev_params.abasic_found << endl
                              << tmp.str() << "/duration=" << ed_ev_params.duration << endl
@@ -168,9 +175,9 @@ int main(int argc, char* argv[])
             //
             // inspect basecall groups
             //
-            bool have_basecall_groups = f.have_basecall_groups();
-            cout << "have_basecall_groups=" << have_basecall_groups << endl;
-            if (have_basecall_groups)
+            bool have_basecall_group = f.have_basecall_group();
+            cout << "have_basecall_group=" << have_basecall_group << endl;
+            if (have_basecall_group)
             {
                 auto bc_gr_list = f.get_basecall_group_list();
                 cout << "basecall_group_list=";
@@ -192,9 +199,13 @@ int main(int argc, char* argv[])
                     print_map(cout, bc_params, tmp.str());
                     // check if basecall log exists
                     cout << "basecall/" << bc_gr << "/have_log=" << f.have_basecall_log(bc_gr) << endl;
+                    // check if eventdetection link exists
+                    auto bc_ed_gr = f.get_basecall_eventdetection_group(bc_gr);
+                    cout << "basecall/" << bc_gr << "/eventdetection_group=" << bc_ed_gr << endl;
                 }
                 for (unsigned st = 0; st < 3; ++st)
                 {
+                    auto gr_l = f.get_basecall_strand_group_list(st);
                     bool have_seq = f.have_basecall_seq(st);
                     cout << "basecall(" << st << ")/have_seq=" << have_seq << endl;
                     if (have_seq)
@@ -243,12 +254,12 @@ int main(int argc, char* argv[])
                     }
                     if (st == 2)
                     {
-                        bool have_event_alignment = f.have_basecall_event_alignment();
-                        cout << "basecall(2)/have_event_alignment=" << have_event_alignment << endl;
-                        if (have_event_alignment)
+                        bool have_alignment = f.have_basecall_alignment();
+                        cout << "basecall(2)/have_alignment=" << have_alignment << endl;
+                        if (have_alignment)
                         {
-                            auto al = f.get_basecall_event_alignment();
-                            cout << "basecall(2)/event_alignment/size=" << al.size() << endl;
+                            auto al = f.get_basecall_alignment();
+                            cout << "basecall(2)/alignment/size=" << al.size() << endl;
                             for (const auto& e : al)
                             {
                                 cout << "  (template_index=" << e.template_index
diff --git a/src/f5ls.cpp b/src/f5ls.cpp
index af655dd..c485ab7 100644
--- a/src/f5ls.cpp
+++ b/src/f5ls.cpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #include <cassert>
 #include <iostream>
 #include <string>
@@ -111,7 +118,7 @@ int main(int argc, char* argv[])
             {
                 auto ed_params = f.get_eventdetection_params();
                 print_map(cout, ed_params, "eventdetection/");
-                auto ed_ev_params = f.get_eventdetection_event_params();
+                auto ed_ev_params = f.get_eventdetection_events_params();
                 auto ed_ev = f.get_eventdetection_events();
                 cout << "eventdetection/events/abasic_found=" << ed_ev_params.abasic_found << endl
                      << "eventdetection/events/duration=" << ed_ev_params.duration << endl
@@ -173,11 +180,11 @@ int main(int argc, char* argv[])
                          << ", p_model_state=" << e.p_model_state
                          << ", move=" << e.move << ")" << endl;
                 }
-                // basecall event alignment
-                if (st == 2 and f.have_basecall_event_alignment())
+                // basecall alignment
+                if (st == 2 and f.have_basecall_alignment())
                 {
-                    auto al = f.get_basecall_event_alignment();
-                    cout << "basecall(2)/event_alignment/size=" << al.size() << endl;
+                    auto al = f.get_basecall_alignment();
+                    cout << "basecall(2)/alignment/size=" << al.size() << endl;
                     const auto& e = al.front();
                     cout << "  (template_index=" << e.template_index
                          << ", complement_index=" << e.complement_index
diff --git a/src/f5pack.cpp b/src/f5pack.cpp
new file mode 100644
index 0000000..9afd30f
--- /dev/null
+++ b/src/f5pack.cpp
@@ -0,0 +1,185 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
+#include <cassert>
+#include <iostream>
+#include <iomanip>
+#include <string>
+
+#include <tclap/CmdLine.h>
+#include "logger.hpp"
+
+#include "fast5.hpp"
+#include "File_Packer.hpp"
+
+using namespace std;
+
+namespace opts
+{
+    using namespace TCLAP;
+    string description = "Pack an ONT fast5 file.";
+    CmdLine cmd_parser(description);
+    //
+    MultiArg< string > log_level("", "log", "Log level. (default: info)", false, "string", cmd_parser);
+    MultiSwitchArg extra_verbosity("v", "", "Increase verbosity", cmd_parser);
+    //
+    SwitchArg al_drop("", "al-drop", "Drop basecall alignment data.", cmd_parser);
+    SwitchArg al_copy("", "al-copy", "Copy basecall alignment data.", cmd_parser);
+    SwitchArg al_unpack("", "al-unpack", "Unpack basecall alignment data.", cmd_parser);
+    SwitchArg al_pack("", "al-pack", "Pack basecall alignment data.", cmd_parser);
+    //
+    SwitchArg ev_drop("", "ev-drop", "Drop basecall event data.", cmd_parser);
+    SwitchArg ev_copy("", "ev-copy", "Copy basecall event data.", cmd_parser);
+    SwitchArg ev_unpack("", "ev-unpack", "Unpack basecall event data.", cmd_parser);
+    SwitchArg ev_pack("", "ev-pack", "Pack basecall event data.", cmd_parser);
+    //
+    SwitchArg fq_drop("", "fq-drop", "Drop basecall fastq data.", cmd_parser);
+    SwitchArg fq_copy("", "fq-copy", "Copy basecall fastq data.", cmd_parser);
+    SwitchArg fq_unpack("", "fq-unpack", "Unpack basecall fatsq data.", cmd_parser);
+    SwitchArg fq_pack("", "fq-pack", "Pack basecall fastq data.", cmd_parser);
+    //
+    SwitchArg ed_drop("", "ed-drop", "Drop event detection data.", cmd_parser);
+    SwitchArg ed_copy("", "ed-copy", "Copy event detection data.", cmd_parser);
+    SwitchArg ed_unpack("", "ed-unpack", "Unpack event detection data.", cmd_parser);
+    SwitchArg ed_pack("", "ed-pack", "Pack event detection data.", cmd_parser);
+    //
+    SwitchArg rw_drop("", "rw-drop", "Drop raw samples data.", cmd_parser);
+    SwitchArg rw_copy("", "rw-copy", "Copy raw samples data.", cmd_parser);
+    SwitchArg rw_unpack("", "rw-unpack", "Unpack raw samples data.", cmd_parser);
+    SwitchArg rw_pack("", "rw-pack", "Pack raw samples data.", cmd_parser);
+    //
+    ValueArg< unsigned > p_model_state_bits("", "p-model-state-bits", "P_Model_State bits to keep.", false, fast5::File_Packer::default_p_model_state_bits(), "int", cmd_parser);
+    ValueArg< unsigned > qv_bits("", "qv-bits", "QV bits to keep.", false, fast5::File_Packer::max_qv_bits(), "int", cmd_parser);
+    SwitchArg no_check("n", "no-check", "Don't check packing.", cmd_parser);
+    SwitchArg force("f", "force", "Overwrite output file if it exists.", cmd_parser);
+    //
+    SwitchArg fastq("", "fastq", "Pack fastq data, drop rest.", cmd_parser);
+    SwitchArg archive("", "archive", "Pack raw saples data, drop rest.", cmd_parser);
+    SwitchArg unpack("u", "unpack", "Unpack files.", cmd_parser);
+    SwitchArg pack("p", "pack", "Pack files (default, if no other pack/unpack/copy options).", cmd_parser);
+    //
+    UnlabeledValueArg< string > input_fn("input", "Input fast5 file.", true, "", "file", cmd_parser);
+    UnlabeledValueArg< string > output_fn("output", "Output fast5 file.", true, "", "file", cmd_parser);
+} // opts
+
+
+int main(int argc, char * argv[])
+{
+    opts::cmd_parser.parse(argc, argv);
+    // set log levels
+    auto default_level = (int)logger::level::info + opts::extra_verbosity.getValue();
+    logger::Logger::set_default_level(default_level);
+    logger::Logger::set_levels_from_options(opts::log_level, &clog);
+    // print options
+    LOG(info) << "program: " << opts::cmd_parser.getProgramName() << endl;
+    LOG(info) << "version: " << opts::cmd_parser.getVersion() << endl;
+    LOG(info) << "args: " << opts::cmd_parser.getOrigArgv() << endl;
+    // what to pack/unpack
+    if (opts::pack + opts::unpack + opts::archive + opts::fastq > 1)
+    {
+        LOG_EXIT << "at most one of --pack/--unpack/--archive/--fastq may be given" << endl;
+    }
+    if (opts::rw_pack + opts::rw_unpack + opts::rw_copy + opts::rw_drop > 1)
+    {
+        LOG_EXIT << "at most one of --rw-pack/--rw-unpack/--rw-copy/--rw-drop may be given" << endl;
+    }
+    if (opts::ed_pack + opts::ed_unpack + opts::ed_copy + opts::ed_drop > 1)
+    {
+        LOG_EXIT << "at most one of --ed-pack/--ed-unpack/--ed-copy/--ed-drop may be given" << endl;
+    }
+    if (opts::fq_pack + opts::fq_unpack + opts::fq_copy + opts::fq_drop > 1)
+    {
+        LOG_EXIT << "at most one of --fq-pack/--fq-unpack/--fq-copy/--fq-drop may be given" << endl;
+    }
+    if (opts::ev_pack + opts::ev_unpack + opts::ev_copy + opts::ev_drop > 1)
+    {
+        LOG_EXIT << "at most one of --ev-pack/--ev-unpack/--ev-copy/--ev-drop may be given" << endl;
+    }
+    if (opts::al_pack + opts::al_unpack + opts::al_copy + opts::al_drop > 1)
+    {
+        LOG_EXIT << "at most one of --al-pack/--al-unpack/--al-copy/--al-drop may be given" << endl;
+    }
+    if (opts::pack + opts::unpack + opts::archive + opts::fastq
+        + opts::rw_pack + opts::rw_unpack + opts::rw_copy + opts::rw_drop
+        + opts::ed_pack + opts::ed_unpack + opts::ed_copy + opts::ed_drop
+        + opts::fq_pack + opts::fq_unpack + opts::fq_copy + opts::fq_drop
+        + opts::ev_pack + opts::ev_unpack + opts::ev_copy + opts::ev_drop
+        + opts::al_pack + opts::al_unpack + opts::al_copy + opts::al_drop
+        == 0)
+    {
+        opts::pack.set(true);
+    }
+    if (opts::pack)
+    {
+        opts::rw_pack.set(true);
+        opts::ed_pack.set(true);
+        opts::fq_pack.set(true);
+        opts::ev_pack.set(true);
+        opts::al_pack.set(true);
+    }
+    else if (opts::unpack)
+    {
+        opts::rw_unpack.set(true);
+        opts::ed_unpack.set(true);
+        opts::fq_unpack.set(true);
+        opts::ev_unpack.set(true);
+        opts::al_unpack.set(true);
+    }
+    if (opts::archive)
+    {
+        opts::rw_pack.set(true);
+    }
+    if (opts::fastq)
+    {
+        opts::fq_pack.set(true);
+    }
+    if (opts::rw_pack + opts::rw_unpack + opts::rw_copy + opts::rw_drop == 0) opts::rw_drop.set(true);
+    if (opts::ed_pack + opts::ed_unpack + opts::ed_copy + opts::ed_drop == 0) opts::ed_drop.set(true);
+    if (opts::fq_pack + opts::fq_unpack + opts::fq_copy + opts::fq_drop == 0) opts::fq_drop.set(true);
+    if (opts::ev_pack + opts::ev_unpack + opts::ev_copy + opts::ev_drop == 0) opts::ev_drop.set(true);
+    if (opts::al_pack + opts::al_unpack + opts::al_copy + opts::al_drop == 0) opts::al_drop.set(true);
+    LOG(info) << "rw: " << (opts::rw_pack? "pack" : opts::rw_unpack? "unpack" : opts::rw_copy? "copy" : "drop") << endl;
+    LOG(info) << "ed: " << (opts::ed_pack? "pack" : opts::ed_unpack? "unpack" : opts::ed_copy? "copy" : "drop") << endl;
+    LOG(info) << "fq: " << (opts::fq_pack? "pack" : opts::fq_unpack? "unpack" : opts::fq_copy? "copy" : "drop") << endl;
+    LOG(info) << "ev: " << (opts::ev_pack? "pack" : opts::ev_unpack? "unpack" : opts::ev_copy? "copy" : "drop") << endl;
+    LOG(info) << "al: " << (opts::al_pack? "pack" : opts::al_unpack? "unpack" : opts::al_copy? "copy" : "drop") << endl;
+    LOG(info) << "check: " << (not opts::no_check? "yes" : "no") << endl;
+    // set File_Packer options
+    int rw_policy = (opts::rw_pack? 1 : opts::rw_unpack? 2 : opts::rw_copy? 3 : 0);
+    int ed_policy = (opts::ed_pack? 1 : opts::ed_unpack? 2 : opts::ed_copy? 3 : 0);
+    int fq_policy = (opts::fq_pack? 1 : opts::fq_unpack? 2 : opts::fq_copy? 3 : 0);
+    int ev_policy = (opts::ev_pack? 1 : opts::ev_unpack? 2 : opts::ev_copy? 3 : 0);
+    int al_policy = (opts::al_pack? 1 : opts::al_unpack? 2 : opts::al_copy? 3 : 0);
+    fast5::File_Packer fp(rw_policy, ed_policy, fq_policy, ev_policy, al_policy);
+    fp.set_check(not opts::no_check);
+    fp.set_force(opts::force);
+    fp.set_qv_bits(opts::qv_bits);
+    fp.set_p_model_state_bits(opts::p_model_state_bits);
+    fp.run(opts::input_fn, opts::output_fn);
+    auto cnt = fp.get_counts();
+    cout
+        << std::fixed << std::setprecision(2)
+        << "bp_seq_count\t" << cnt.bp_seq_count << "\n"
+        << "rs_count\t" << cnt.rs_count << "\n"
+        << "rs_bits\t" << (double)cnt.rs_bits/cnt.rs_count << "\n"
+        << "ed_count\t" << cnt.ed_count << "\n"
+        << "ed_skip_bits\t" << (double)cnt.ed_skip_bits/cnt.ed_count << "\n"
+        << "ed_len_bits\t" << (double)cnt.ed_len_bits/cnt.ed_count << "\n"
+        << "fq_count\t" << cnt.fq_count << "\n"
+        << "fq_bp_bits\t" << (double)cnt.fq_bp_bits/cnt.fq_count << "\n"
+        << "fq_qv_bits\t" << (double)cnt.fq_qv_bits/cnt.fq_count << "\n"
+        << "ev_count\t" << cnt.ev_count << "\n"
+        << "ev_rel_skip_bits\t" << (double)cnt.ev_rel_skip_bits/cnt.ev_count << "\n"
+        << "ev_skip_bits\t" << (double)cnt.ev_skip_bits/cnt.ev_count << "\n"
+        << "ev_len_bits\t" << (double)cnt.ev_len_bits/cnt.ev_count << "\n"
+        << "ev_move_bits\t" << (double)cnt.ev_move_bits/cnt.ev_count << "\n"
+        << "ev_p_model_state_bits\t" << (double)cnt.ev_p_model_state_bits/cnt.ev_count << "\n"
+        << "al_count\t" << cnt.al_count << "\n"
+        << "al_template_step_bits\t" << (double)cnt.al_template_step_bits/cnt.al_count << "\n"
+        << "al_complement_step_bits\t" << (double)cnt.al_complement_step_bits/cnt.al_count << "\n"
+        << "al_move_bits\t" << (double)cnt.al_move_bits/cnt.al_count << "\n";
+}
diff --git a/src/fast5.hpp b/src/fast5.hpp
index 14ae3cd..db6eb1d 100644
--- a/src/fast5.hpp
+++ b/src/fast5.hpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #ifndef __FAST5_HPP
 #define __FAST5_HPP
 
@@ -13,13 +20,19 @@
 #include <array>
 #include <set>
 #include <map>
+#include <stdexcept>
 
+#include "logger.hpp"
+#include "fast5_version.hpp"
 #include "hdf5_tools.hpp"
+#include "Huffman_Packer.hpp"
+#include "Bit_Packer.hpp"
+
 #define MAX_K_LEN 8
 
 namespace
 {
-    inline static std::string array_to_string(const std::array< char, MAX_K_LEN >& a)
+    inline static std::string array_to_string(std::array< char, MAX_K_LEN > const & a)
     {
         return std::string(a.begin(), std::find(a.begin(), a.end(), '\0'));
     }
@@ -28,47 +41,147 @@ namespace
 namespace fast5
 {
 
-struct Channel_Id_Parameters
+typedef hdf5_tools::File::Attr_Map Attr_Map;
+
+struct Channel_Id_Params
 {
     std::string channel_number;
     double digitisation;
     double offset;
     double range;
     double sampling_rate;
-}; // struct Channel_Id_Parameters
+    Channel_Id_Params()
+        : channel_number(""),
+          digitisation(0.0),
+          offset(0.0),
+          range(0.0),
+          sampling_rate(0.0) {}
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/channel_number", channel_number);
+        f.read(p + "/digitisation", digitisation);
+        f.read(p + "/offset", offset);
+        f.read(p + "/range", range);
+        f.read(p + "/sampling_rate", sampling_rate);
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_attribute(p + "/channel_number", channel_number);
+        f.write_attribute(p + "/digitisation", digitisation);
+        f.write_attribute(p + "/offset", offset);
+        f.write_attribute(p + "/range", range);
+        f.write_attribute(p + "/sampling_rate", sampling_rate);
+    }
+}; // struct Channel_Id_Params
 
-typedef std::map< std::string, std::string > Tracking_Id_Parameters;
+typedef Attr_Map Tracking_Id_Params;
 
-typedef std::map< std::string, std::string > Sequences_Parameters;
+typedef Attr_Map Sequences_Params;
 
-typedef float Raw_Samples_Entry;
-typedef int16_t Raw_Samples_Int_Entry;
+typedef float Raw_Sample;
+typedef int16_t Raw_Int_Sample;
 
-struct Raw_Samples_Parameters
+struct Raw_Samples_Params
 {
     std::string read_id;
     long long read_number;
     long long start_mux;
     long long start_time;
     long long duration;
-}; // struct Raw_Samples_Parameters
+    friend bool operator == (Raw_Samples_Params const & lhs, Raw_Samples_Params const & rhs)
+    {
+        return (lhs.read_id == rhs.read_id
+                and lhs.read_number == rhs.read_number
+                and lhs.start_mux == rhs.start_mux
+                and lhs.start_time == rhs.start_time
+                and lhs.duration == rhs.duration);
+    }
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/read_id", read_id);
+        f.read(p + "/read_number", read_number);
+        f.read(p + "/start_mux", start_mux);
+        f.read(p + "/start_time", start_time);
+        f.read(p + "/duration", duration);
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_attribute(p + "/read_id", read_id);
+        f.write_attribute(p + "/read_number", read_number);
+        f.write_attribute(p + "/start_mux", start_mux);
+        f.write_attribute(p + "/start_time", start_time);
+        f.write_attribute(p + "/duration", duration);
+    }
+}; // struct Raw_Samples_Params
+
+typedef std::pair< std::vector< Raw_Int_Sample >, Raw_Samples_Params > Raw_Int_Samples_Dataset;
+typedef std::pair< std::vector< Raw_Sample >, Raw_Samples_Params > Raw_Samples_Dataset;
+
+struct Raw_Samples_Pack
+{
+    Huffman_Packer::Code_Type signal;
+    Attr_Map signal_params;
+    //
+    Raw_Samples_Params params;
+    //
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/Signal", signal);
+        signal_params = f.get_attr_map(p + "/Signal");
+        params.read(f, p + "/params");
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_dataset(p + "/Signal", signal);
+        f.add_attr_map(p + "/Signal", signal_params);
+        params.write(f, p + "/params");
+    }
+}; // struct Raw_Samples_Pack
 
-struct EventDetection_Event_Entry
+struct EventDetection_Event
 {
     double mean;
     double stdv;
     long long start;
     long long length;
-    friend bool operator == (const EventDetection_Event_Entry& lhs, const EventDetection_Event_Entry& rhs)
+    friend bool operator == (EventDetection_Event const & lhs, EventDetection_Event const & rhs)
     {
         return lhs.mean == rhs.mean
             and lhs.stdv == rhs.stdv
             and lhs.start == rhs.start
             and lhs.length == rhs.length;
     }
+    static hdf5_tools::Compound_Map const & compound_map()
+    {
+        static hdf5_tools::Compound_Map m;
+        static bool inited = false;
+        if (not inited)
+        {
+            m.add_member("mean", &EventDetection_Event::mean);
+            m.add_member("start", &EventDetection_Event::start);
+            m.add_member("length", &EventDetection_Event::length);
+            m.add_member("stdv", &EventDetection_Event::stdv);
+            inited = true;
+        }
+        return m;
+    }
+    static hdf5_tools::Compound_Map const & alt_compound_map()
+    {
+        static hdf5_tools::Compound_Map m;
+        static bool inited = false;
+        if (not inited)
+        {
+            m.add_member("mean", &EventDetection_Event::mean);
+            m.add_member("start", &EventDetection_Event::start);
+            m.add_member("length", &EventDetection_Event::length);
+            m.add_member("variance", &EventDetection_Event::stdv);
+            inited = true;
+        }
+        return m;
+    }
 }; // struct EventDetection_Event
 
-struct EventDetection_Event_Parameters
+struct EventDetection_Events_Params
 {
     std::string read_id;
     long long read_number;
@@ -78,42 +191,136 @@ struct EventDetection_Event_Parameters
     long long duration;
     double median_before;
     unsigned abasic_found;
-}; // struct EventDetection_Event_Parameters
+    friend bool operator == (EventDetection_Events_Params const & lhs, EventDetection_Events_Params const & rhs)
+    {
+        return (lhs.read_id == rhs.read_id
+                and lhs.read_number == rhs.read_number
+                and lhs.scaling_used == rhs.scaling_used
+                and lhs.start_mux == rhs.start_mux
+                and lhs.start_time == rhs.start_time
+                and lhs.duration == rhs.duration
+                and ((std::isnan(lhs.median_before) and std::isnan(rhs.median_before))
+                     or lhs.median_before == rhs.median_before)
+                and lhs.abasic_found == rhs.abasic_found);
+    }
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        auto a_v = f.get_attr_list(p);
+        std::set< std::string > a_s(a_v.begin(), a_v.end());
+        f.read(p + "/read_number", read_number);
+        f.read(p + "/scaling_used", scaling_used);
+        f.read(p + "/start_mux", start_mux);
+        f.read(p + "/start_time", start_time);
+        f.read(p + "/duration", duration);
+        // optional fields
+        if (a_s.count("read_id"))
+        {
+            f.read(p + "/read_id", read_id);
+        }
+        if (a_s.count("median_before"))
+        {
+            f.read(p + "/median_before", median_before);
+        }
+        else
+        {
+            median_before = std::nan("");
+        }
+        if (a_s.count("abasic_found"))
+        {
+            f.read(p + "/abasic_found", abasic_found);
+        }
+        else
+        {
+            abasic_found = 2;
+        }
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_attribute(p + "/read_number", read_number);
+        f.write_attribute(p + "/scaling_used", scaling_used);
+        f.write_attribute(p + "/start_mux", start_mux);
+        f.write_attribute(p + "/start_time", start_time);
+        f.write_attribute(p + "/duration", duration);
+        if (not read_id.empty()) f.write_attribute(p + "/read_id", read_id);
+        if (not std::isnan(median_before)) f.write_attribute(p + "/median_before", median_before);
+        if (abasic_found < 2) f.write_attribute(p + "/abasic_found", abasic_found);
+    }
+}; // struct EventDetection_Events_Params
+
+typedef std::pair< std::vector< EventDetection_Event >, EventDetection_Events_Params > EventDetection_Events_Dataset;
+
+struct EventDetection_Events_Pack
+{
+    Huffman_Packer::Code_Type skip;
+    Attr_Map skip_params;
+    Huffman_Packer::Code_Type len;
+    Attr_Map len_params;
+    //
+    EventDetection_Events_Params params;
+    //
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/Skip", skip);
+        skip_params = f.get_attr_map(p + "/Skip");
+        f.read(p + "/Len", len);
+        len_params = f.get_attr_map(p + "/Len");
+        params.read(f, p + "/params");
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_dataset(p + "/Skip", skip);
+        f.add_attr_map(p + "/Skip", skip_params);
+        f.write_dataset(p + "/Len", len);
+        f.add_attr_map(p + "/Len", len_params);
+        params.write(f, p + "/params");
+    }
+}; // struct EventDetection_Events_Pack
 
 //
 // This struct represents the expected signal measured
 // given the kmer sequence that is in the pore when the
 // the observations are made. A pore model consists
 // of 1024 of these entries (one per 5-mer) and global
-// shift/scaling parameters.
+// shift/scaling params.
 //
-struct Model_Entry
+struct Basecall_Model_State
 {
-    long long variant;
     double level_mean;
     double level_stdv;
     double sd_mean;
     double sd_stdv;
-    double weight;
     std::array< char, MAX_K_LEN > kmer;
     std::string get_kmer() const { return array_to_string(kmer); }
-    friend bool operator == (const Model_Entry& lhs, const Model_Entry& rhs)
-    {
-        return lhs.variant == rhs.variant
-            and lhs.level_mean == rhs.level_mean
-            and lhs.level_stdv == rhs.level_stdv
-            and lhs.sd_mean == rhs.sd_mean
-            and lhs.sd_stdv == rhs.sd_stdv
-            and lhs.weight == rhs.weight
-            and lhs.kmer == rhs.kmer;
+    friend bool operator == (Basecall_Model_State const & lhs, Basecall_Model_State const & rhs)
+    {
+        return (lhs.level_mean == rhs.level_mean
+                and lhs.level_stdv == rhs.level_stdv
+                and lhs.sd_mean == rhs.sd_mean
+                and lhs.sd_stdv == rhs.sd_stdv
+                and lhs.kmer == rhs.kmer);
     }
-}; // struct Model_Entry
+    static hdf5_tools::Compound_Map const & compound_map()
+    {
+        static hdf5_tools::Compound_Map m;
+        static bool inited = false;
+        if (not inited)
+        {
+            m.add_member("level_mean", &Basecall_Model_State::level_mean);
+            m.add_member("level_stdv", &Basecall_Model_State::level_stdv);
+            m.add_member("sd_mean", &Basecall_Model_State::sd_mean);
+            m.add_member("sd_stdv", &Basecall_Model_State::sd_stdv);
+            m.add_member("kmer", &Basecall_Model_State::kmer);
+            inited = true;
+        }
+        return m;
+    }
+}; // struct Basecall_Model_State
 
 //
 // This struct represents the global transformations
-// that must be applied to each Model_Entry
+// that must be applied to each Basecall_Model_State
 //
-struct Model_Parameters
+struct Basecall_Model_Params
 {
     double scale;
     double shift;
@@ -121,66 +328,303 @@ struct Model_Parameters
     double var;
     double scale_sd;
     double var_sd;
-}; // struct Model_Parameters
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/scale", scale);
+        f.read(p + "/shift", shift);
+        f.read(p + "/drift", drift);
+        f.read(p + "/var", var);
+        f.read(p + "/scale_sd", scale_sd);
+        f.read(p + "/var_sd", var_sd);
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_attribute(p + "/scale", scale);
+        f.write_attribute(p + "/shift", shift);
+        f.write_attribute(p + "/drift", drift);
+        f.write_attribute(p + "/var", var);
+        f.write_attribute(p + "/scale_sd", scale_sd);
+        f.write_attribute(p + "/var_sd", var_sd);
+    }
+}; // struct Basecall_Model_Params
+
+struct Basecall_Fastq_Pack
+{
+    Huffman_Packer::Code_Type bp;
+    Attr_Map bp_params;
+    Huffman_Packer::Code_Type qv;
+    Attr_Map qv_params;
+    std::string read_name;
+    std::uint8_t qv_bits;
+    //
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/BP", bp);
+        bp_params = f.get_attr_map(p + "/BP");
+        f.read(p + "/QV", qv);
+        qv_params = f.get_attr_map(p + "/QV");
+        f.read(p + "/read_name", read_name);
+        f.read(p + "/qv_bits", qv_bits);
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_dataset(p + "/BP", bp);
+        f.add_attr_map(p + "/BP", bp_params);
+        f.write_dataset(p + "/QV", qv);
+        f.add_attr_map(p + "/QV", qv_params);
+        f.write_attribute(p + "/read_name", read_name);
+        f.write_attribute(p + "/qv_bits", qv_bits);
+    }
+}; // struct Basecall_Fastq_Pack
 
 //
 // This struct represents an observed event.
 // The members of the struct are the same as
 // the fields encoded in the FAST5 file.
 //
-struct Event_Entry
+struct Basecall_Event
 {
     double mean;
     double stdv;
     double start;
     double length;
     double p_model_state;
-    double p_mp_state;
-    double p_A;
-    double p_C;
-    double p_G;
-    double p_T;
     long long move;
     std::array< char, MAX_K_LEN > model_state;
-    std::array< char, MAX_K_LEN > mp_state;
     std::string get_model_state() const { return array_to_string(model_state); }
-    std::string get_mp_state() const { return array_to_string(mp_state); }
-    friend bool operator == (const Event_Entry& lhs, const Event_Entry& rhs)
+    friend bool operator == (Basecall_Event const & lhs, Basecall_Event const & rhs)
     {
-        return lhs.mean == rhs.mean
-            and lhs.stdv == rhs.stdv
-            and lhs.start == rhs.start
-            and lhs.length == rhs.length
-            and lhs.p_model_state == rhs.p_model_state
-            and lhs.p_mp_state == rhs.p_mp_state
-            and lhs.p_A == rhs.p_A
-            and lhs.p_C == rhs.p_C
-            and lhs.p_G == rhs.p_G
-            and lhs.p_T == rhs.p_T
-            and lhs.move == rhs.move
-            and lhs.model_state == rhs.model_state
-            and lhs.mp_state == rhs.mp_state;
-    }
-}; // struct Event_Entry
+        return (lhs.mean == rhs.mean
+                and lhs.stdv == rhs.stdv
+                and lhs.start == rhs.start
+                and lhs.length == rhs.length
+                and lhs.p_model_state == rhs.p_model_state
+                and lhs.move == rhs.move
+                and lhs.model_state == rhs.model_state);
+    }
+    static hdf5_tools::Compound_Map const & compound_map()
+    {
+        static hdf5_tools::Compound_Map m;
+        static bool inited = false;
+        if (not inited)
+        {
+            m.add_member("mean", &Basecall_Event::mean);
+            m.add_member("stdv", &Basecall_Event::stdv);
+            m.add_member("start", &Basecall_Event::start);
+            m.add_member("length", &Basecall_Event::length);
+            m.add_member("p_model_state", &Basecall_Event::p_model_state);
+            m.add_member("move", &Basecall_Event::move);
+            m.add_member("model_state", &Basecall_Event::model_state);
+            inited = true;
+        }
+        return m;
+    }
+}; // struct Basecall_Event
+
+struct Basecall_Events_Params
+{
+    double start_time;
+    double duration;
+    friend bool operator == (Basecall_Events_Params const & lhs, Basecall_Events_Params const & rhs)
+    {
+        return (lhs.start_time == rhs.start_time
+                and lhs.duration == rhs.duration);
+    }
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        if (f.attribute_exists(p + "/start_time"))
+        {
+            f.read(p + "/start_time", start_time);
+        }
+        else
+        {
+            start_time = 0.0;
+        }
+        if (f.attribute_exists(p + "/duration"))
+        {
+            f.read(p + "/duration", duration);
+        }
+        else
+        {
+            duration = 0.0;
+        }
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        if (start_time > 0.0) f.write_attribute(p + "/start_time", start_time);
+        if (duration > 0.0) f.write_attribute(p + "/duration", duration);
+    }
+};
+
+typedef std::pair< std::vector< Basecall_Event >, Basecall_Events_Params > Basecall_Events_Dataset;
+
+struct Basecall_Events_Pack
+{
+    Huffman_Packer::Code_Type rel_skip;
+    Attr_Map rel_skip_params;
+    Huffman_Packer::Code_Type skip;
+    Attr_Map skip_params;
+    Huffman_Packer::Code_Type len;
+    Attr_Map len_params;
+    Huffman_Packer::Code_Type move;
+    Attr_Map move_params;
+    Bit_Packer::Code_Type p_model_state;
+    Attr_Map p_model_state_params;
+    //
+    std::string name;
+    std::string version;
+    std::string ed_gr;
+    long long start_time;
+    unsigned state_size;
+    double median_sd_temp;
+    unsigned p_model_state_bits;
+    //
+    Basecall_Events_Params params;
+    //
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        if (f.dataset_exists(p + "/Rel_Skip"))
+        {
+            f.read(p + "/Rel_Skip", rel_skip);
+            rel_skip_params = f.get_attr_map(p + "/Rel_Skip");
+        }
+        else
+        {
+            f.read(p + "/Skip", skip);
+            skip_params = f.get_attr_map(p + "/Skip");
+            f.read(p + "/Len", len);
+            len_params = f.get_attr_map(p + "/Len");
+        }
+        f.read(p + "/Move", move);
+        move_params = f.get_attr_map(p + "/Move");
+        f.read(p + "/P_Model_State", p_model_state);
+        p_model_state_params = f.get_attr_map(p + "/P_Model_State");
+        f.read(p + "/name", name);
+        f.read(p + "/version", version);
+        f.read(p + "/ed_gr", ed_gr);
+        f.read(p + "/start_time", start_time);
+        f.read(p + "/state_size", state_size);
+        f.read(p + "/median_sd_temp", median_sd_temp);
+        f.read(p + "/p_model_state_bits", p_model_state_bits);
+        params.read(f, p + "/params");
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        if (not rel_skip.empty())
+        {
+            f.write_dataset(p + "/Rel_Skip", rel_skip);
+            f.add_attr_map(p + "/Rel_Skip", rel_skip_params);
+        }
+        else
+        {
+            f.write_dataset(p + "/Skip", skip);
+            f.add_attr_map(p + "/Skip", skip_params);
+            f.write_dataset(p + "/Len", len);
+            f.add_attr_map(p + "/Len", len_params);
+        }
+        f.write_dataset(p + "/Move", move);
+        f.add_attr_map(p + "/Move", move_params);
+        f.write_dataset(p + "/P_Model_State", p_model_state);
+        f.add_attr_map(p + "/P_Model_State", p_model_state_params);
+        f.write_attribute(p + "/name", name);
+        f.write_attribute(p + "/version", version);
+        f.write_attribute(p + "/ed_gr", ed_gr);
+        f.write_attribute(p + "/start_time", start_time);
+        f.write_attribute(p + "/state_size", state_size);
+        f.write_attribute(p + "/median_sd_temp", median_sd_temp);
+        f.write_attribute(p + "/p_model_state_bits", p_model_state_bits);
+        params.write(f, p + "/params");
+    }
+}; // struct Basecall_Events_Pack
 
 //
 // This struct represents a template-to-complement
 // match that is emitted by ONT's 2D basecaller
 //
-struct Event_Alignment_Entry
+struct Basecall_Alignment_Entry
 {
     long long template_index;
     long long complement_index;
     std::array< char, MAX_K_LEN > kmer;
     std::string get_kmer() const { return array_to_string(kmer); }
-    friend bool operator == (const Event_Alignment_Entry& lhs, const Event_Alignment_Entry& rhs)
+    friend bool operator == (Basecall_Alignment_Entry const & lhs, Basecall_Alignment_Entry const & rhs)
     {
         return lhs.template_index == rhs.template_index
             and lhs.complement_index == rhs.complement_index
             and lhs.kmer == rhs.kmer;
     }
-}; // struct Event_Alignment_Entry
+    static hdf5_tools::Compound_Map const & compound_map()
+    {
+        static hdf5_tools::Compound_Map m;
+        static bool inited = false;
+        if (not inited)
+        {
+            m.add_member("template", &Basecall_Alignment_Entry::template_index);
+            m.add_member("complement", &Basecall_Alignment_Entry::complement_index);
+            m.add_member("kmer", &Basecall_Alignment_Entry::kmer);
+            inited = true;
+        }
+        return m;
+    }
+}; // struct Basecall_Alignment_Entry
 
+struct Basecall_Alignment_Pack
+{
+    Bit_Packer::Code_Type template_step;
+    Bit_Packer::Code_Params_Type template_step_params;
+    Bit_Packer::Code_Type complement_step;
+    Bit_Packer::Code_Params_Type complement_step_params;
+    Huffman_Packer::Code_Type move;
+    Huffman_Packer::Code_Params_Type move_params;
+    unsigned template_index_start;
+    unsigned complement_index_start;
+    unsigned kmer_size;
+    //
+    void read(hdf5_tools::File const & f, std::string const & p)
+    {
+        f.read(p + "/Template_Step", template_step);
+        template_step_params = f.get_attr_map(p + "/Template_Step");
+        f.read(p + "/Complement_Step", complement_step);
+        complement_step_params = f.get_attr_map(p + "/Complement_Step");
+        f.read(p + "/Move", move);
+        move_params = f.get_attr_map(p + "/Move");
+        f.read(p + "/template_index_start", template_index_start);
+        f.read(p + "/complement_index_start", complement_index_start);
+        f.read(p + "/kmer_size", kmer_size);
+    }
+    void write(hdf5_tools::File const & f, std::string const & p) const
+    {
+        f.write_dataset(p + "/Template_Step", template_step);
+        f.add_attr_map(p + "/Template_Step", template_step_params);
+        f.write_dataset(p + "/Complement_Step", complement_step);
+        f.add_attr_map(p + "/Complement_Step", complement_step_params);
+        f.write_dataset(p + "/Move", move);
+        f.add_attr_map(p + "/Move", move_params);
+        f.write_attribute(p + "/template_index_start", template_index_start);
+        f.write_attribute(p + "/complement_index_start", complement_index_start);
+        f.write_attribute(p + "/kmer_size", kmer_size);
+    }
+};
+
+struct Basecall_Group_Description
+{
+    std::string name;
+    std::string version;
+    std::string ed_gr;
+    std::string bc_1d_gr;
+    bool have_subgroup[3];
+    bool have_fastq[3];
+    bool have_events[3];
+    bool have_model[2];
+    bool have_alignment;
+    Basecall_Group_Description() :
+        have_subgroup{false, false, false},
+        have_fastq{false, false, false},
+        have_events{false, false, false},
+        have_model{false, false},
+        have_alignment{false}
+    {}
+}; // struct Basecall_Group_Description
 
 class File
     : private hdf5_tools::File
@@ -188,654 +632,731 @@ class File
 private:
     typedef hdf5_tools::File Base;
 public:
-    //using Base::is_open;
-    //using Base::is_rw;
-    //using Base::file_name;
-    //using Base::create;
-    //using Base::close;
-    using Base::get_object_count;
-    using Base::is_valid_file;
-    //using Base::write;
-
+    //
+    // Constructors
+    //
     File() = default;
-    File(const std::string& file_name, bool rw = false) { open(file_name, rw); }
+    File(std::string const & file_name, bool rw = false) { open(file_name, rw); }
 
-    bool is_open() const { return static_cast< const Base* >(this)->is_open(); }
-    bool is_rw() const { return static_cast< const Base* >(this)->is_rw(); }
-    const std::string& file_name() const { return static_cast< const Base* >(this)->file_name(); }
-    void create(const std::string& file_name, bool truncate = false) { static_cast< Base* >(this)->create(file_name, truncate); }
-    void close() { static_cast< Base* >(this)->close(); }
+    //
+    // Base methods
+    //
+    using Base::is_open;
+    using Base::is_rw;
+    using Base::file_name;
+    using Base::create;
+    using Base::close;
+    using Base::get_object_count;
+    using Base::is_valid_file;
 
-    void open(const std::string& file_name, bool rw = false)
+    //
+    // Base method wrappers
+    //
+    void
+    open(std::string const & file_name, bool rw = false)
     {
         Base::open(file_name, rw);
-        if (is_open())
-        {
-            // detect raw samples read name
-            detect_raw_samples_read_name_list();
-            // detect eventdetection groups
-            detect_eventdetection_group_list();
-            // detect basecall groups
-            detect_basecall_group_list();
-        }
+        reload();
     }
 
-    /**
-     * Extract "/file_version" attribute. This must exist.
-     */
-    std::string file_version() const
+    //
+    // Access /file_version
+    //
+    std::string
+    file_version() const
     {
         std::string res;
-        assert(Base::exists(file_version_path()));
         Base::read(file_version_path(), res);
         return res;
     }
 
-    /**
-     * Check if "/UniqueGlobalKey/channel_id" attributes exist.
-     */
-    bool have_channel_id_params() const
-    {
-        return Base::group_exists(channel_id_path());
-    }
-    /**
-     * Extract "/UniqueGlobalKey/channel_id" attributes.
-     */
-    Channel_Id_Parameters get_channel_id_params() const
-    {
-        Channel_Id_Parameters res;
-        Base::read(channel_id_path() + "/channel_number", res.channel_number);
-        Base::read(channel_id_path() + "/digitisation", res.digitisation);
-        Base::read(channel_id_path() + "/offset", res.offset);
-        Base::read(channel_id_path() + "/range", res.range);
-        Base::read(channel_id_path() + "/sampling_rate", res.sampling_rate);
-        return res;
+    //
+    // Access /UniqueGlobalKey/channel_id
+    //
+    bool
+    have_channel_id_params() const
+    {
+        return _channel_id_params.sampling_rate > 0.0;
     }
-    /**
-     * Check if sampling rate exists.
-     */
-    bool have_sampling_rate() const
+    Channel_Id_Params
+    get_channel_id_params() const
     {
-        return have_channel_id_params();
+        return _channel_id_params;
     }
-    /**
-     * Get sampling rate.
-     */
-    double get_sampling_rate() const
+    void
+    add_channel_id_params(Channel_Id_Params const & channel_id_params)
     {
-        auto channel_id_params = get_channel_id_params();
-        return channel_id_params.sampling_rate;
+        _channel_id_params = channel_id_params;
+        _channel_id_params.write(*this, channel_id_path());
     }
+    bool
+    have_sampling_rate() const { return have_channel_id_params(); }
+    double
+    get_sampling_rate() const { return _channel_id_params.sampling_rate; }
 
-    /**
-     * Check if "/UniqueGlobalKey/tracking_id" attributes exist.
-     */
-    bool have_tracking_id_params() const
+    //
+    // Access /UniqueGlobalKey/tracking_id
+    //
+    bool
+    have_tracking_id_params() const
     {
         return Base::group_exists(tracking_id_path());
     }
-    /**
-     * Extract "/UniqueGlobalKey/tracking_id" attributes.
-     */
-    Tracking_Id_Parameters get_tracking_id_params() const
+    Tracking_Id_Params
+    get_tracking_id_params() const
     {
         return get_attr_map(tracking_id_path());
     }
+    void
+    add_tracking_id_params(Tracking_Id_Params const & tracking_id_params) const
+    {
+        add_attr_map(tracking_id_path(), tracking_id_params);
+    }
 
-    /**
-     * Check if sequences attributes exists.
-     */
-    bool have_sequences_params() const
+    //
+    // Access /Sequences
+    //
+    bool
+    have_sequences_params() const
     {
         return Base::group_exists(sequences_path());
     }
-    /**
-     * Get sequences attributes.
-     */
-    Sequences_Parameters get_sequences_params() const
+    Sequences_Params
+    get_sequences_params() const
     {
         return get_attr_map(sequences_path());
     }
+    void
+    add_sequences_params(Sequences_Params const & sequences_params) const
+    {
+        add_attr_map(sequences_path(), sequences_params);
+    }
 
-    /**
-     * Get list of raw samples read names.
-     */
-    const std::vector< std::string >& get_raw_samples_read_name_list() const
+    //
+    // Access Raw Samples
+    //
+    std::vector< std::string > const &
+    get_raw_samples_read_name_list() const
     {
-        return _raw_samples_read_name_list;
+        return _raw_samples_read_names;
     }
-    /**
-     * Check if raw samples exist.
-     * If _rn non-empty, check if raw samples exist for given read.
-     */
-    bool have_raw_samples(const std::string& _rn = std::string()) const
+    bool
+    have_raw_samples(std::string const & rn = std::string()) const
     {
-        if (not have_channel_id_params())
-        {
-            return false;
-        }
-        auto rn_l = get_raw_samples_read_name_list();
-        if (_rn.empty())
+        auto && rn_l = get_raw_samples_read_name_list();
+        return (rn.empty()
+                ? not rn_l.empty()
+                : std::find(rn_l.begin(), rn_l.end(), rn) != rn_l.end());
+    }
+    bool
+    have_raw_samples_unpack(std::string const & rn) const
+    {
+        return Base::dataset_exists(raw_samples_path(rn));
+    }
+    bool
+    have_raw_samples_pack(std::string const & rn) const
+    {
+        return Base::group_exists(raw_samples_pack_path(rn));
+    }
+    Raw_Samples_Params
+    get_raw_samples_params(std::string const & rn = std::string()) const
+    {
+        Raw_Samples_Params res;
+        auto && _rn = fill_raw_samples_read_name(rn);
+        if (have_raw_samples_unpack(_rn))
         {
-            return not rn_l.empty();
+            res.read(*this, raw_samples_params_path(_rn));
         }
         else
         {
-            std::set< std::string > rn_d(rn_l.begin(), rn_l.end());
-            return rn_d.count(_rn) > 0;
+            res.read(*this, raw_samples_params_pack_path(_rn));
         }
+        return res;
     }
-    /**
-     * Get raw samples attributes for given read name (default: first read name).
-     */
-    Raw_Samples_Parameters get_raw_samples_params(const std::string& _rn = std::string()) const
+    void
+    add_raw_samples_params(std::string const & rn, Raw_Samples_Params const & params) const
     {
-        Raw_Samples_Parameters res;
-        const std::string& rn = not _rn.empty()? _rn : get_raw_samples_read_name_list().front();
         std::string p = raw_samples_params_path(rn);
-        Base::read(p + "/read_id", res.read_id);
-        Base::read(p + "/read_number", res.read_number);
-        Base::read(p + "/start_mux", res.start_mux);
-        Base::read(p + "/start_time", res.start_time);
-        Base::read(p + "/duration", res.duration);
-        return res;
+        params.write(*this, p);
     }
-    /**
-     * Get raw samples for given read name as ints (default: first read name).
-     */
-    std::vector< Raw_Samples_Int_Entry > get_raw_samples_int(const std::string& _rn = std::string()) const
+    std::vector< Raw_Int_Sample >
+    get_raw_int_samples(std::string const & rn = std::string()) const
     {
-        // get raw samples
-        std::vector< Raw_Samples_Int_Entry > res;
-        const std::string& rn = not _rn.empty()? _rn : get_raw_samples_read_name_list().front();
-        Base::read(raw_samples_path(rn), res);
+        std::vector< Raw_Int_Sample > res;
+        auto && _rn = fill_raw_samples_read_name(rn);
+        if (have_raw_samples_unpack(_rn))
+        {
+            Base::read(raw_samples_path(_rn), res);
+        }
+        else if (have_raw_samples_pack(_rn))
+        {
+            auto rs_pack = get_raw_samples_pack(_rn);
+            res = unpack_rw(rs_pack).first;
+        }
         return res;
     }
-    /**
-     * Get raw samples for given read name (default: first read name).
-     */
-    std::vector< Raw_Samples_Entry > get_raw_samples(const std::string& _rn = std::string()) const
+    void
+    add_raw_samples(std::string const & rn, std::vector< Raw_Int_Sample > const & rsi)
+    {
+        Base::write_dataset(raw_samples_path(rn), rsi);
+        reload();
+    }
+    std::vector< Raw_Sample >
+    get_raw_samples(std::string const & rn = std::string()) const
     {
         // get raw samples
-        auto raw_samples_int = get_raw_samples_int(_rn);
-        // get scaling parameters
-        auto channel_id_params = get_channel_id_params();
+        auto rsi = get_raw_int_samples(rn);
         // decode levels
-        std::vector< Raw_Samples_Entry > res;
-        res.reserve(raw_samples_int.size());
-        for (auto int_level : raw_samples_int)
+        std::vector< Raw_Sample > res;
+        res.reserve(rsi.size());
+        for (auto int_level : rsi)
         {
-            res.push_back((static_cast< float >(int_level) + channel_id_params.offset)
-                          * channel_id_params.range / channel_id_params.digitisation);
+            res.push_back(raw_sample_to_float(int_level, _channel_id_params));
         }
         return res;
     }
 
-    /**
-     * Get list of EventDetection groups.
-     */
-    const std::vector< std::string >& get_eventdetection_group_list() const
+    //
+    // Access EventDetection groups
+    //
+    std::vector< std::string > const &
+    get_eventdetection_group_list() const
     {
-        return _eventdetection_group_list;
+        return _eventdetection_groups;
     }
-    /**
-     * Check if any EventDetection groups exist.
-     */
-    bool have_eventdetection_groups() const
+    bool
+    have_eventdetection_group(std::string const & gr = std::string()) const
     {
-        return not get_eventdetection_group_list().empty();
+        return (gr.empty()
+                ? not _eventdetection_groups.empty()
+                : _eventdetection_read_names.count(gr));
     }
-    /**
-     * Get list of reads for given EventDetection group (default: first EventDetection group).
-     */
-    std::vector< std::string > get_eventdetection_read_name_list(const std::string& _ed_gr = std::string()) const
+    std::vector< std::string > const &
+    get_eventdetection_read_name_list(std::string const & gr = std::string()) const
     {
-        const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
-        return detect_eventdetection_read_name_list(ed_gr);
+        static const std::vector< std::string > _empty;
+        auto && _gr = fill_eventdetection_group(gr);
+        return (_eventdetection_read_names.count(_gr)
+                ? _eventdetection_read_names.at(_gr)
+                : _empty);
     }
-    /**
-     * Check if EventDetection events exist.
-     * If _ed_gr given: check if events exist for given group; else: check first EventDetection group.
-     * If _rn given: check if events exist for given group and read name.
-     */
-    bool have_eventdetection_events(
-        const std::string& _ed_gr = std::string(),
-        const std::string& _rn = std::string()) const
+    Attr_Map
+    get_eventdetection_params(std::string const & gr = std::string()) const
     {
-        std::string ed_gr;
-        if (_ed_gr.empty())
-        {
-            auto ed_gr_l = get_eventdetection_group_list();
-            if (ed_gr_l.empty()) return false;
-            ed_gr = ed_gr_l.front();
-        }
-        else
-        {
-            ed_gr = _ed_gr;
-        }
-        auto rn_l = get_eventdetection_read_name_list(ed_gr);
-        if (_rn.empty())
+        auto && _gr = fill_eventdetection_group(gr);
+        return get_attr_map(eventdetection_group_path(_gr));
+    }
+    void
+    add_eventdetection_params(std::string const & gr, Attr_Map const & am) const
+    {
+        add_attr_map(eventdetection_group_path(gr), am);
+    }
+
+    //
+    // Access EventDetection events
+    //
+    bool
+    have_eventdetection_events(
+        std::string const & gr = std::string(), std::string const & rn = std::string()) const
+    {
+        auto && _gr = fill_eventdetection_group(gr);
+        auto && _rn = fill_eventdetection_read_name(_gr, rn);
+        return (_eventdetection_read_names.count(_gr)
+                and std::find(
+                    _eventdetection_read_names.at(_gr).begin(),
+                    _eventdetection_read_names.at(_gr).end(),
+                    _rn)
+                != _eventdetection_read_names.at(_gr).end());
+    }
+    bool
+    have_eventdetection_events_unpack(std::string const & gr, std::string const & rn) const
+    {
+        return Base::dataset_exists(eventdetection_events_path(gr, rn));
+    }
+    bool
+    have_eventdetection_events_pack(std::string const & gr, std::string const & rn) const
+    {
+        return Base::group_exists(eventdetection_events_pack_path(gr, rn));
+    }
+    EventDetection_Events_Params
+    get_eventdetection_events_params(
+        std::string const & gr = std::string(), std::string const & rn = std::string()) const
+    {
+        EventDetection_Events_Params res;
+        auto && _gr = fill_eventdetection_group(gr);
+        auto && _rn = fill_eventdetection_read_name(_gr, rn);
+        if (have_eventdetection_events_unpack(_gr, _rn))
         {
-            return not rn_l.empty();
+            res.read(*this, eventdetection_events_params_path(_gr, _rn));
         }
-        else
+        else if (have_eventdetection_events_pack(_gr, _rn))
         {
-            std::set< std::string > rn_d(rn_l.begin(), rn_l.end());
-            return rn_d.count(_rn) > 0;
+            res.read(*this, eventdetection_events_params_pack_path(_gr, _rn));
         }
+        return res;
     }
-    /**
-     * Get EventDetection params for given EventDetection group (default: first EventDetection group).
-     */
-    std::map< std::string, std::string > get_eventdetection_params(const std::string& _ed_gr = std::string()) const
+    void
+    add_eventdetection_events_params(
+        std::string const & gr, std::string const & rn,
+        EventDetection_Events_Params const & ede_params) const
     {
-        const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
-        return get_attr_map(eventdetection_params_path(ed_gr));
+        auto p = eventdetection_events_params_path(gr, rn);
+        ede_params.write(*this, p);
     }
-    /**
-     * Get EventDetection event params for given EventDetection group, and given read name
-     * (default: first EventDetection group, and first read name in it).
-     */
-    EventDetection_Event_Parameters get_eventdetection_event_params(
-        const std::string& _ed_gr = std::string(), const std::string& _rn = std::string()) const
+    std::vector< EventDetection_Event >
+    get_eventdetection_events(
+        std::string const & gr = std::string(), std::string const & rn = std::string()) const
     {
-        EventDetection_Event_Parameters res;
-        const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
-        const std::string rn = not _rn.empty()? _rn : get_eventdetection_read_name_list(ed_gr).front();
-        auto p = eventdetection_event_params_path(ed_gr, rn);
-        auto a_v = Base::get_attr_list(p);
-        std::set< std::string > a_s(a_v.begin(), a_v.end());
-        Base::read(p + "/read_number", res.read_number);
-        Base::read(p + "/scaling_used", res.scaling_used);
-        Base::read(p + "/start_mux", res.start_mux);
-        Base::read(p + "/start_time", res.start_time);
-        Base::read(p + "/duration", res.duration);
-        // optional fields
-        if (a_s.count("read_id"))
+        std::vector< EventDetection_Event > ede;
+        auto && _gr = fill_eventdetection_group(gr);
+        auto && _rn = fill_eventdetection_read_name(_gr, rn);
+        if (have_eventdetection_events_unpack(_gr, _rn))
         {
-            Base::read(p + "/read_id", res.read_id);
-        }
-        if (a_s.count("median_before"))
-        {
-            Base::read(p + "/median_before", res.median_before);
-        }
-        else
-        {
-            res.median_before = -1;
-        }
-        if (a_s.count("abasic_found"))
-        {
-            Base::read(p + "/abasic_found", res.abasic_found);
+            auto p = eventdetection_events_path(_gr, _rn);
+            // accept either stdv or variance
+            auto meml = get_struct_members(p);
+            std::set< std::string > mems(meml.begin(), meml.end());
+            if (mems.count("stdv"))
+            {
+                Base::read(p, ede, EventDetection_Event::compound_map());
+            }
+            else if (mems.count("variance"))
+            {
+                Base::read(p, ede, EventDetection_Event::alt_compound_map());
+                for (auto & e : ede)
+                {
+                    e.stdv = std::sqrt(e.stdv);
+                }
+            }
+            else
+            {
+                LOG_THROW
+                    << "neither stdv nor variance found for ed_gr=" << gr;
+            }
         }
-        else
+        else if (have_eventdetection_events_pack(_gr, _rn))
         {
-            res.abasic_found = 0;
+            auto ede_pack = get_eventdetection_events_pack(_gr, _rn);
+            if (not have_raw_samples(_rn))
+            {
+                LOG_THROW_(std::logic_error)
+                    << "missing raw samples required to unpack eventdetection events: gr=" << _gr
+                    << " rn=" << _rn;
+            }
+            auto rs_ds = get_raw_samples_dataset(_rn);
+            ede = unpack_ed(ede_pack, rs_ds).first;
         }
-        return res;
+        return ede;
+    } // get_eventdetection_events()
+    void
+    add_eventdetection_events(
+        std::string const & gr, std::string const & rn,
+        std::vector< EventDetection_Event > const & ede)
+    {
+        Base::write_dataset(eventdetection_events_path(gr, rn), ede, EventDetection_Event::compound_map());
+        reload();
     }
-    /**
-     * Get EventDetection events for given EventDetection group, and given read name.
-     */
-    std::vector< EventDetection_Event_Entry > get_eventdetection_events(
-        const std::string& _ed_gr = std::string(), const std::string& _rn = std::string()) const
+
+    //
+    // Access Basecall groups
+    //
+    std::vector< std::string > const &
+    get_basecall_group_list() const
     {
-        std::vector< EventDetection_Event_Entry > res;
-        const std::string& ed_gr = not _ed_gr.empty()? _ed_gr : get_eventdetection_group_list().front();
-        const std::string rn = not _rn.empty()? _rn : get_eventdetection_read_name_list(ed_gr).front();
-        auto p = eventdetection_events_path(ed_gr, rn);
-        auto struct_member_names = Base::get_struct_members(p);
-        assert(struct_member_names.size() >= 4);
-        bool have_stdv = false;
-        bool have_variance = false;
-        for (const auto& s : struct_member_names)
-        {
-            if (s == "stdv") have_stdv = true;
-            else if (s == "variance") have_variance = true;
-        }
-        hdf5_tools::Compound_Map m;
-        m.add_member("mean", &EventDetection_Event_Entry::mean);
-        m.add_member("start", &EventDetection_Event_Entry::start);
-        m.add_member("length", &EventDetection_Event_Entry::length);
-        if (have_stdv)
+        return _basecall_groups;
+    }
+    bool
+    have_basecall_group(std::string const & gr = std::string()) const
+    {
+        auto && gr_l = get_basecall_group_list();
+        return (gr.empty()
+                ? not gr_l.empty()
+                : std::find(gr_l.begin(), gr_l.end(), gr) != gr_l.end());
+    }
+    std::vector< std::string > const &
+    get_basecall_strand_group_list(unsigned st) const
+    {
+        return _basecall_strand_groups.at(st);
+    }
+    bool
+    have_basecall_strand_group(unsigned st, std::string const & gr = std::string()) const
+    {
+        auto && gr_l = get_basecall_strand_group_list(st);
+        if (gr.empty())
         {
-            m.add_member("stdv", &EventDetection_Event_Entry::stdv);
+            return not gr_l.empty();
         }
-        else if (have_variance)
+        if (not _basecall_group_descriptions.count(gr))
         {
-            m.add_member("variance", &EventDetection_Event_Entry::stdv);
+            return false;
         }
         else
         {
-            // must have stdv or variance
-            abort();
-        }
-        Base::read(p, res, m);
-        if (not have_stdv)
-        {
-            // have read variances
-            for (auto& e : res)
-            {
-                e.stdv = std::sqrt(e.stdv);
-            }
+            return _basecall_group_descriptions.at(gr).have_subgroup[st];
         }
-        return res;
-    } // get_eventdetection_events()
-
-    /**
-     * Get list of all Basecall groups.
-     */
-    const std::vector< std::string >& get_basecall_group_list() const
+    }
+    Basecall_Group_Description const &
+    get_basecall_group_description(std::string const & gr) const
     {
-        return _basecall_group_list;
+        return _basecall_group_descriptions.at(gr);
     }
-    /**
-     * Check if any Basecall groups exist.
-     */
-    bool have_basecall_groups() const
+    std::string const &
+    get_basecall_1d_group(std::string const & gr) const
     {
-        return not get_basecall_group_list().empty();
+        static std::string const empty;
+        return (_basecall_group_descriptions.count(gr)
+                ? _basecall_group_descriptions.at(gr).bc_1d_gr
+                : empty);
     }
-    /**
-     * Get list of Basecall groups for given strand.
-     */
-    const std::vector< std::string >& get_basecall_strand_group_list(unsigned st) const
+    std::string const &
+    get_basecall_eventdetection_group(std::string const & gr) const
     {
-        return _basecall_strand_group_list[st];
+        static std::string const empty;
+        return (_basecall_group_descriptions.count(gr)
+                ? _basecall_group_descriptions.at(gr).ed_gr
+                : empty);
     }
-    /**
-     * Check if any Basecall groups exist for given strand.
-     */
-    bool have_basecall_strand_groups(unsigned st) const
+
+    //
+    // Access Basecall group params
+    //
+    Attr_Map
+    get_basecall_params(std::string const & gr) const
     {
-        return not get_basecall_strand_group_list(st).empty();
+        return get_attr_map(basecall_group_path(gr));
     }
-    /**
-     * Get Basecall group params for given Basecall group.
-     */
-    std::map< std::string, std::string > get_basecall_params(const std::string& bc_gr) const
+    void
+    add_basecall_params(std::string const & gr, Attr_Map const & am) const
     {
-        return get_attr_map(basecall_root_path() + "/" + basecall_group_prefix() + bc_gr);
+        add_attr_map(basecall_group_path(gr), am);
     }
-    /**
-     * Check if Basecall log exists for given Basecall group.
-     */
-    bool have_basecall_log(const std::string& bc_gr) const
+    //
+    // Access Basecall group log
+    //
+    bool
+    have_basecall_log(std::string const & gr) const
     {
-        std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/Log";
-        return Base::exists(path);
+        return Base::exists(basecall_log_path(gr));
     }
-    /**
-     * Get Basecall log for given Basecall group.
-     */
-    std::string get_basecall_log(const std::string& bc_gr) const
+    std::string
+    get_basecall_log(std::string const & gr) const
     {
         std::string res;
-        std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/Log";
-        Base::read(path, res);
+        Base::read(basecall_log_path(gr), res);
         return res;
     }
-    /**
-     * Check if Basecall fastq exists for given Basecall group and given strand.
-     */
-    bool have_basecall_fastq(unsigned st, const std::string& _bc_gr = std::string()) const
+    Attr_Map
+    get_basecall_config(std::string const & gr) const
     {
-        if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        return Base::dataset_exists(basecall_fastq_path(bc_gr, st));
+        Attr_Map res;
+        if (Base::group_exists(basecall_config_path(gr)))
+        {
+            res = get_attr_map(basecall_config_path(gr), true);
+        }
+        return res;
     }
-    /**
-     * Get Basecall fastq for given Basecall group and given strand.
-     */
-    std::string get_basecall_fastq(unsigned st, const std::string& _bc_gr = std::string()) const
+    Attr_Map
+    get_basecall_summary(std::string const & gr) const
+    {
+        Attr_Map res;
+        if (Base::group_exists(basecall_summary_path(gr)))
+        {
+            res = get_attr_map(basecall_summary_path(gr), true);
+        }
+        return res;
+    }
+
+    //
+    // Access Basecall fastq
+    //
+    bool
+    have_basecall_fastq(unsigned st, std::string const & gr = std::string()) const
+    {
+        auto && _gr = fill_basecall_group(st, gr);
+        return (_basecall_group_descriptions.count(_gr)
+                and _basecall_group_descriptions.at(_gr).have_fastq[st]);
+    }
+    bool
+    have_basecall_fastq_unpack(unsigned st, std::string const & gr) const
+    {
+        return Base::dataset_exists(basecall_fastq_path(gr, st));
+    }
+    bool
+    have_basecall_fastq_pack(unsigned st, std::string const & gr) const
+    {
+        return Base::group_exists(basecall_fastq_pack_path(gr, st));
+    }
+    std::string
+    get_basecall_fastq(unsigned st, std::string const & gr = std::string()) const
     {
         std::string res;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        Base::read(basecall_fastq_path(bc_gr, st), res);
+        auto && _gr = fill_basecall_group(st, gr);
+        if (have_basecall_fastq_unpack(st, _gr))
+        {
+            Base::read(basecall_fastq_path(_gr, st), res);
+        }
+        else if (have_basecall_fastq_pack(st, _gr))
+        {
+            auto fq_pack = get_basecall_fastq_pack(st, _gr);
+            res = unpack_fq(fq_pack);
+        }
         return res;
     }
-    /**
-     * Add Basecall fastq
-     */
-    void add_basecall_fastq(unsigned st, const std::string& bc_gr, const std::string& fq) const
+    void
+    add_basecall_fastq(unsigned st, std::string const & gr, std::string const & fq)
     {
-        Base::write(basecall_fastq_path(bc_gr, st), true, fq);
+        Base::write(basecall_fastq_path(gr, st), true, fq);
+        reload();
     }
-    /**
-     * Check if Basecall seq exists for given Basecall group and given strand.
-     */
-    bool have_basecall_seq(unsigned st, const std::string& _bc_gr = std::string()) const
+    bool
+    have_basecall_seq(unsigned st, std::string const & _gr = std::string()) const
     {
-        return have_basecall_fastq(st, _bc_gr);
+        return have_basecall_fastq(st, _gr);
     }
-    /**
-     * Get Basecall sequence for given Basecall group and given strand.
-     */
-    std::string get_basecall_seq(unsigned st, const std::string& _bc_gr = std::string()) const
+    std::string
+    get_basecall_seq(unsigned st, std::string const & _gr = std::string()) const
     {
-        return fq2seq(get_basecall_fastq(st, _bc_gr));
+        return fq2seq(get_basecall_fastq(st, _gr));
     }
-    /**
-     * Add Basecall seq
-     */
-    void add_basecall_seq(unsigned st, const std::string& bc_gr,
-                          const std::string& name, const std::string& seq, int default_qual = 33) const
+    void
+    add_basecall_seq(unsigned st, std::string const & gr,
+                     std::string const & name, std::string const & seq, int default_qual = 33)
     {
         std::ostringstream oss;
-        oss << '@' << name << std::endl
-            << seq << std::endl
-            << '+' << std::endl
-            << std::string(seq.size(), static_cast< char >(default_qual));
-        add_basecall_fastq(st, bc_gr, oss.str());
-    }
-    /**
-     * Check if Basecall model exist for given Basecall group and given strand.
-     */
-    bool have_basecall_model(unsigned st, const std::string& _bc_gr = std::string()) const
-    {
-        if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        return Base::dataset_exists(basecall_model_path(bc_gr_1d, st));
-    }
-    /**
-     * Get Basecall model file name for given Basecall group and given strand.
-     */
-    std::string get_basecall_model_file(unsigned st, const std::string& _bc_gr = std::string()) const
+        oss << "@" << name << "\n"
+            << seq << "\n"
+            << "+\n"
+            << std::string(seq.size(), (char)default_qual);
+        add_basecall_fastq(st, gr, oss.str());
+        reload();
+    }
+
+    //
+    // Access Basecall model
+    //
+    bool
+    have_basecall_model(unsigned st, std::string const & gr = std::string()) const
+    {
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        return (_basecall_group_descriptions.count(gr_1d)
+                and _basecall_group_descriptions.at(gr_1d).have_model[st]);
+    }
+    std::string
+    get_basecall_model_file(unsigned st, std::string const & gr = std::string()) const
     {
         std::string res;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        assert(Base::exists(basecall_model_file_path(bc_gr_1d, st)));
-        Base::read(basecall_model_file_path(bc_gr_1d, st), res);
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        Base::read(basecall_model_file_path(gr_1d, st), res);
         return res;
     }
-    void add_basecall_model_file(unsigned st, const std::string& bc_gr, const std::string& file_name) const
-    {
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        std::string path = basecall_model_file_path(bc_gr_1d, st);
-        Base::write(path, false, file_name);
-    }
-    /**
-     * Get Basecall model parameters for given Basecall group and given strand.
-     */
-    Model_Parameters get_basecall_model_params(unsigned st, const std::string& _bc_gr = std::string()) const
-    {
-        Model_Parameters res;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        std::string path = basecall_model_path(bc_gr_1d, st);
-        Base::read(path + "/scale", res.scale);
-        Base::read(path + "/shift", res.shift);
-        Base::read(path + "/drift", res.drift);
-        Base::read(path + "/var", res.var);
-        Base::read(path + "/scale_sd", res.scale_sd);
-        Base::read(path + "/var_sd", res.var_sd);
-        return res;
+    void
+    add_basecall_model_file(unsigned st, std::string const & gr, std::string const & file_name) const
+    {
+        Base::write_attribute(basecall_model_file_path(gr, st), file_name);
     }
-    template < typename T >
-    void add_basecall_model_params(unsigned st, const std::string& bc_gr, const T& params) const
-    {
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        std::string path = basecall_model_path(bc_gr_1d, st);
-        Base::write(path + "/scale", false, params.scale);
-        Base::write(path + "/shift", false, params.shift);
-        Base::write(path + "/drift", false, params.drift);
-        Base::write(path + "/var", false, params.var);
-        Base::write(path + "/scale_sd", false, params.scale_sd);
-        Base::write(path + "/var_sd", false, params.var_sd);
-    }
-    /**
-     * Get Basecall model for given Basecall group and given strand.
-     */
-    std::vector< Model_Entry > get_basecall_model(unsigned st, const std::string& _bc_gr = std::string()) const
-    {
-        std::vector< Model_Entry > res;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        hdf5_tools::Compound_Map m;
-        m.add_member("kmer", &Model_Entry::kmer);
-        m.add_member("level_mean", &Model_Entry::level_mean);
-        m.add_member("level_stdv", &Model_Entry::level_stdv);
-        m.add_member("sd_mean", &Model_Entry::sd_mean);
-        m.add_member("sd_stdv", &Model_Entry::sd_stdv);
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        Base::read(basecall_model_path(bc_gr_1d, st), res, m);
-        return res;
+    Basecall_Model_Params
+    get_basecall_model_params(unsigned st, std::string const & gr = std::string()) const
+    {
+        Basecall_Model_Params params;
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        std::string path = basecall_model_path(gr_1d, st);
+        params.read(*this, path);
+        return params;
     }
-    /**
-     * Add Basecall model
-     */
-    template < typename T >
-    void add_basecall_model(unsigned st, const std::string& bc_gr, const std::vector< T >& m) const
-    {
-        hdf5_tools::Compound_Map cm;
-        cm.add_member("kmer", &T::kmer);
-        cm.add_member("level_mean", &T::level_mean);
-        cm.add_member("level_stdv", &T::level_stdv);
-        cm.add_member("sd_mean", &T::sd_mean);
-        cm.add_member("sd_stdv", &T::sd_stdv);
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        Base::write(basecall_model_path(bc_gr_1d, st), true, m, cm);
-    }
-    /**
-     * Check if Basecall events exist for given Basecall group and given strand.
-     */
-    bool have_basecall_events(unsigned st, const std::string& _bc_gr = std::string()) const
-    {
-        if (_bc_gr.empty() and get_basecall_strand_group_list(st).empty()) return false;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        return Base::dataset_exists(basecall_events_path(bc_gr_1d, st));
-    }
-    /**
-     * Get Basecall events for given Basecall group and given strand.
-     */
-    std::vector< Event_Entry > get_basecall_events(unsigned st, const std::string& _bc_gr = std::string()) const
-    {
-        std::vector< Event_Entry > res;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(st).front();
-        hdf5_tools::Compound_Map m;
-        m.add_member("mean", &Event_Entry::mean);
-        m.add_member("start", &Event_Entry::start);
-        m.add_member("stdv", &Event_Entry::stdv);
-        m.add_member("length", &Event_Entry::length);
-        m.add_member("p_model_state", &Event_Entry::p_model_state);
-        m.add_member("model_state", &Event_Entry::model_state);
-        m.add_member("move", &Event_Entry::move);
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        Base::read(basecall_events_path(bc_gr_1d, st), res, m);
-        return res;
+    void
+    add_basecall_model_params(unsigned st, std::string const & gr, Basecall_Model_Params const & params) const
+    {
+        std::string path = basecall_model_path(gr, st);
+        params.write(*this, path);
+    }
+    std::vector< Basecall_Model_State >
+    get_basecall_model(unsigned st, std::string const & gr = std::string()) const
+    {
+        std::vector< Basecall_Model_State > mod;
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        Base::read(basecall_model_path(gr_1d, st), mod, Basecall_Model_State::compound_map());
+        return mod;
     }
-    /**
-     * Add Basecall events
-     */
     template < typename T >
-    void add_basecall_events(unsigned st, const std::string& bc_gr, const std::vector< T >& ev) const
-    {
-        hdf5_tools::Compound_Map cm;
-        cm.add_member("mean", &T::mean);
-        cm.add_member("start", &T::start);
-        cm.add_member("stdv", &T::stdv);
-        cm.add_member("length", &T::length);
-        cm.add_member("p_model_state", &T::p_model_state);
-        cm.add_member("model_state", &T::model_state);
-        cm.add_member("move", &T::move);
-        auto bc_gr_1d = get_basecall_group_1d(bc_gr);
-        Base::write(basecall_events_path(bc_gr_1d, st), true, ev, cm);
-    }
-    /**
-     * Check if Basecall event alignment exist for given Basecall group.
-     */
-    bool have_basecall_event_alignment(const std::string& _bc_gr = std::string()) const
-    {
-        if (_bc_gr.empty() and get_basecall_strand_group_list(2).empty()) return false;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(2).front();
-        return Base::dataset_exists(basecall_event_alignment_path(bc_gr));
-    }
-    /**
-     * Get Basecall events for given Basecall group.
-     */
-    std::vector< Event_Alignment_Entry > get_basecall_event_alignment(const std::string& _bc_gr = std::string()) const
-    {
-        std::vector< Event_Alignment_Entry > res;
-        const std::string& bc_gr = not _bc_gr.empty()? _bc_gr : get_basecall_strand_group_list(2).front();
-        hdf5_tools::Compound_Map m;
-        m.add_member("template", &Event_Alignment_Entry::template_index);
-        m.add_member("complement", &Event_Alignment_Entry::complement_index);
-        m.add_member("kmer", &Event_Alignment_Entry::kmer);
-        Base::read(basecall_event_alignment_path(bc_gr), res, m);
-        return res;
+    void add_basecall_model(unsigned st, std::string const & gr, std::vector< T > const & mod)
+    {
+        auto && gr_1d = get_basecall_1d_group(gr);
+        Base::write_dataset(basecall_model_path(gr_1d, st), mod, Basecall_Model_State::compound_map());
+        reload();
     }
 
-    /**
-     * Get basecall group holding 1d calls.
-     */
-    std::string get_basecall_group_1d(const std::string& bc_gr) const
+    //
+    // Access Basecall events
+    //
+    bool
+    have_basecall_events(unsigned st, std::string const & gr = std::string()) const
     {
-        std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/basecall_1d";
-        if (Base::attribute_exists(path))
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        return (_basecall_group_descriptions.count(gr_1d)
+                and _basecall_group_descriptions.at(gr_1d).have_events[st]);
+    }
+    bool
+    have_basecall_events_unpack(unsigned st, std::string const & gr) const
+    {
+        return Base::dataset_exists(basecall_events_path(gr, st));
+    }
+    bool
+    have_basecall_events_pack(unsigned st, std::string const & gr) const
+    {
+        return Base::group_exists(basecall_events_pack_path(gr, st));
+    }
+    Basecall_Events_Params
+    get_basecall_events_params(unsigned st, std::string const & gr = std::string()) const
+    {
+        Basecall_Events_Params bce_params;
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        if (have_basecall_events_unpack(st, gr_1d))
         {
-            std::string tmp;
-            Base::read(path, tmp);
-            auto tmp1 = tmp.substr(0, 18);
-            auto tmp2 = tmp.substr(18);
-            if (tmp1 == "Analyses/Basecall_"
-                and Base::group_exists(basecall_root_path() + "/" + basecall_group_prefix() + tmp2))
-            {
-                return tmp2;
-            }
+            bce_params.read(*this, basecall_events_path(gr_1d, st));
+        }
+        else if (have_basecall_events_pack(st, gr_1d))
+        {
+            bce_params.read(*this, basecall_events_params_pack_path(gr_1d, st));
         }
-        return bc_gr;
+        return bce_params;
     }
-    /**
-     * Get EventDetection group for given Basecall group, if available.
-     */
-    std::string get_basecall_eventdetection_group(const std::string& bc_gr) const
+    void
+    add_basecall_events_params(unsigned st, std::string const & gr,
+                               Basecall_Events_Params const & bce_params) const
     {
-        std::string path = basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/event_detection";
-        if (Base::attribute_exists(path))
+        auto path = basecall_events_path(gr, st);
+        if (not Base::dataset_exists(path))
         {
-            std::string tmp;
-            Base::read(path, tmp);
-            auto pos = tmp.find(eventdetection_group_prefix());
-            if (pos != std::string::npos)
+            LOG_THROW
+                << "basecall events must be added before their params";
+        }
+        bce_params.write(*this, path);
+    }
+    std::vector< Basecall_Event >
+    get_basecall_events(unsigned st, std::string const & gr = std::string()) const
+    {
+        std::vector< Basecall_Event > res;
+        auto && gr_1d = fill_basecall_1d_group(st, gr);
+        if (have_basecall_events_unpack(st, gr_1d))
+        {
+            Base::read(basecall_events_path(gr_1d, st), res, Basecall_Event::compound_map());
+        }
+        else if (have_basecall_events_pack(st, gr_1d))
+        {
+            auto ev_pack = get_basecall_events_pack(st, gr_1d);
+            if (not have_basecall_seq(st, gr_1d))
+            {
+                LOG_THROW_(std::logic_error)
+                    << "missing fastq required to unpack basecall events: st=" << st
+                    << " gr=" << gr_1d;
+            }
+            auto sq = get_basecall_seq(st, gr_1d);
+            if (not ev_pack.ed_gr.empty())
             {
-                pos += eventdetection_group_prefix().size();
-                auto end_pos = tmp.find("/", pos);
-                if (end_pos == std::string::npos)
+                if (not have_eventdetection_events(ev_pack.ed_gr))
                 {
-                    end_pos = tmp.size();
+                    LOG_THROW_(std::logic_error)
+                        << "missing eventdetection events required to unpack basecall events: st=" << st
+                        << " gr=" << gr_1d
+                        << " ed_gr=" << ev_pack.ed_gr;
                 }
-                return tmp.substr(pos, end_pos - pos);
+                auto ed = get_eventdetection_events(ev_pack.ed_gr);
+                res = unpack_ev(ev_pack, sq, ed, _channel_id_params).first;
             }
+            else // ed_gr == "": packed relative to raw samples
+            {
+                if (not have_raw_samples())
+                {
+                    LOG_THROW_(std::logic_error)
+                        << "missing raw samples required to unpack basecall events: st=" << st
+                        << " gr=" << gr_1d;
+                }
+                auto rs_ds = get_raw_samples_dataset();
+                auto ed = unpack_implicit_ed(ev_pack, rs_ds);
+                res = unpack_ev(ev_pack, sq, ed, _channel_id_params).first;
+            }
+        }
+        return res;
+    }
+    template < typename T >
+    void
+    add_basecall_events(unsigned st, std::string const & gr, std::vector< T > const & ev)
+    {
+        Base::write_dataset(basecall_events_path(gr, st), ev, T::compound_map());
+        reload();
+    }
+
+    //
+    // Access Basecall alignment
+    //
+    bool
+    have_basecall_alignment(std::string const & gr = std::string()) const
+    {
+        auto && _gr = fill_basecall_group(2, gr);
+        return (_basecall_group_descriptions.count(_gr)
+                and _basecall_group_descriptions.at(_gr).have_alignment);
+    }
+    bool
+    have_basecall_alignment_unpack(std::string const & gr) const
+    {
+        return Base::dataset_exists(basecall_alignment_path(gr));
+    }
+    bool
+    have_basecall_alignment_pack(std::string const & gr) const
+    {
+        return Base::group_exists(basecall_alignment_pack_path(gr));
+    }
+    std::vector< Basecall_Alignment_Entry >
+    get_basecall_alignment(std::string const & gr = std::string()) const
+    {
+        std::vector< Basecall_Alignment_Entry > al;
+        auto && _gr = fill_basecall_group(2, gr);
+        if (have_basecall_alignment_unpack(_gr))
+        {
+            Base::read(basecall_alignment_path(_gr), al, Basecall_Alignment_Entry::compound_map());
+        }
+        else if (have_basecall_alignment_pack(_gr)
+                 and have_basecall_seq(2, _gr))
+        {
+            auto al_pack = get_basecall_alignment_pack(_gr);
+            auto seq = get_basecall_seq(2, _gr);
+            al = unpack_al(al_pack, seq);
         }
-        return std::string();
+        return al;
+    }
+    void
+    add_basecall_alignment(std::string const & gr, std::vector< Basecall_Alignment_Entry > const & al)
+    {
+        Base::write_dataset(basecall_alignment_path(gr), al, Basecall_Alignment_Entry::compound_map());
+        reload();
     }
 
-    static std::string fq2seq(const std::string& fq)
+    //
+    // Static helpers
+    //
+    static inline long long
+    time_to_int(double tf, Channel_Id_Params const & cid_params)
+    {
+        return tf * cid_params.sampling_rate;
+    }
+    static inline double
+    time_to_float(long long ti, Channel_Id_Params const & cid_params)
+    {
+        return ((long double)ti + .5) / cid_params.sampling_rate;
+    }
+    static inline float
+    raw_sample_to_float(int si, Channel_Id_Params const & cid_params)
+    {
+        return ((float)si + cid_params.offset)
+            * cid_params.range / cid_params.digitisation;
+    }
+    static std::string
+    fq2seq(std::string const & fq)
     {
         return split_fq(fq)[1];
     }
-    static std::array< std::string, 4 > split_fq(const std::string& fq)
+    static std::array< std::string, 4 >
+    split_fq(std::string const & fq)
     {
         std::array< std::string, 4 > res = {{"", "", "", ""}};
         size_t i = 0;
@@ -861,193 +1382,1108 @@ public:
     }
 
 private:
-    void detect_raw_samples_read_name_list()
+    friend struct File_Packer;
+
+    //
+    // Cached file data
+    //
+    Channel_Id_Params _channel_id_params;
+    std::vector< std::string > _raw_samples_read_names;
+    std::vector< std::string > _eventdetection_groups;
+    std::map< std::string, std::vector< std::string > > _eventdetection_read_names;
+    std::vector< std::string > _basecall_groups;
+    std::map< std::string, Basecall_Group_Description > _basecall_group_descriptions;
+    std::array< std::vector< std::string >, 3 > _basecall_strand_groups;
+
+    //
+    // Cache updaters
+    //
+    void
+    reload()
+    {
+        load_channel_id_params();
+        load_raw_samples_read_names();
+        load_eventdetection_groups();
+        load_basecall_groups();
+    }
+    void
+    load_channel_id_params()
+    {
+        if (not Base::group_exists(channel_id_path())) return;
+        _channel_id_params.read(*this, channel_id_path());
+    }
+    void
+    load_raw_samples_read_names()
     {
+        _raw_samples_read_names.clear();
         if (not Base::group_exists(raw_samples_root_path())) return;
-        auto rn_list = Base::list_group(raw_samples_root_path());
-        for (const auto& rn : rn_list)
+        auto rn_l = Base::list_group(raw_samples_root_path());
+        for (auto const & rn : rn_l)
         {
-            if (not Base::dataset_exists(raw_samples_path(rn))) continue;
-            _raw_samples_read_name_list.push_back(rn);
+            if (have_raw_samples_unpack(rn)
+                or have_raw_samples_pack(rn))
+            {
+                _raw_samples_read_names.push_back(rn);
+            }
         }
     }
-
-    void detect_eventdetection_group_list()
+    void
+    load_eventdetection_groups()
     {
+        _eventdetection_groups.clear();
+        _eventdetection_read_names.clear();
         if (not Base::group_exists(eventdetection_root_path())) return;
-        auto g_list = Base::list_group(eventdetection_root_path());
-        for (const auto& g : g_list)
+        auto ed_gr_prefix = eventdetection_group_prefix();
+        auto gr_l = Base::list_group(eventdetection_root_path());
+        for (auto const & g : gr_l)
         {
-            if (g.size() <= eventdetection_group_prefix().size()) continue;
-            auto p = std::mismatch(eventdetection_group_prefix().begin(),
-                                   eventdetection_group_prefix().end(),
-                                   g.begin());
-            if (p.first != eventdetection_group_prefix().end()) continue;
-            _eventdetection_group_list.emplace_back(p.second, g.end());
+            if (g.substr(0, ed_gr_prefix.size()) != ed_gr_prefix) continue;
+            std::string gr = g.substr(ed_gr_prefix.size());
+            _eventdetection_groups.push_back(gr);
+            _eventdetection_read_names[gr] = detect_eventdetection_read_names(gr);
         }
     }
-
-    std::vector< std::string > detect_eventdetection_read_name_list(const std::string& ed_gr) const
+    std::vector< std::string >
+    detect_eventdetection_read_names(std::string const & gr) const
     {
         std::vector< std::string > res;
-        std::string p = eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads";
+        std::string p = eventdetection_root_path() + "/" + eventdetection_group_prefix() + gr + "/Reads";
         if (not Base::group_exists(p)) return res;
-        auto rn_list = Base::list_group(p);
-        for (const auto& rn : rn_list)
+        auto rn_l = Base::list_group(p);
+        for (auto const & rn : rn_l)
         {
-            if (not Base::dataset_exists(p + "/" + rn + "/Events")) continue;
-            res.push_back(rn);
+            if (have_eventdetection_events_unpack(gr, rn)
+                or have_eventdetection_events_pack(gr, rn))
+            {
+                res.push_back(rn);
+            }
         }
         return res;
     }
-
-    void detect_basecall_group_list()
+    void
+    load_basecall_groups()
     {
+        _basecall_groups.clear();
+        _basecall_group_descriptions.clear();
+        std::for_each(
+            _basecall_strand_groups.begin(), _basecall_strand_groups.end(),
+            [] (decltype(_basecall_strand_groups)::value_type & v) {
+                v.clear();
+            });
         if (not Base::group_exists(basecall_root_path())) return;
-        auto g_list = Base::list_group(basecall_root_path());
-        for (const auto& g : g_list)
-        {
-            if (g.size() <= basecall_group_prefix().size()) continue;
-            auto p = std::mismatch(basecall_group_prefix().begin(),
-                                   basecall_group_prefix().end(),
-                                   g.begin());
-            if (p.first != basecall_group_prefix().end()) continue;
-            _basecall_group_list.emplace_back(p.second, g.end());
+        auto bc_gr_prefix = basecall_group_prefix();
+        auto gr_l = Base::list_group(basecall_root_path());
+        for (auto const & g : gr_l)
+        {
+            if (g.substr(0, bc_gr_prefix.size()) != bc_gr_prefix) continue;
+            // found basecall group
+            std::string gr = g.substr(bc_gr_prefix.size());
+            _basecall_groups.push_back(gr);
+            // name and version
+            _basecall_group_descriptions[gr] = detect_basecall_group_id(gr);
+            auto & bc_desc = _basecall_group_descriptions.at(gr);
+            // subgroups
             for (unsigned st = 0; st < 3; ++st)
             {
-                if (Base::group_exists(basecall_root_path() + "/" + g + "/" + basecall_strand_subgroup(st)))
+                bc_desc.have_subgroup[st] =
+                    Base::group_exists(basecall_strand_group_path(gr, st));
+                if (bc_desc.have_subgroup[st])
                 {
-                    _basecall_strand_group_list[st].emplace_back(p.second, g.end());
+                    _basecall_strand_groups[st].push_back(gr);
+                    // fastq
+                    bc_desc.have_fastq[st] =
+                        have_basecall_fastq_unpack(st, gr) or
+                        have_basecall_fastq_pack(st, gr);
+                    // events
+                    bc_desc.have_events[st] =
+                        have_basecall_events_unpack(st, gr) or
+                        have_basecall_events_pack(st, gr);
+                    if (st == 0)
+                    {
+                        // ed_gr
+                        bc_desc.ed_gr = detect_basecall_eventdetection_group(gr);
+                    }
+                    if (st == 2)
+                    {
+                        // alignment
+                        bc_desc.have_alignment =
+                            have_basecall_alignment_unpack(gr)
+                            or have_basecall_alignment_pack(gr);
+                    }
                 }
             }
+            // bc_1d_gr
+            if (bc_desc.have_subgroup[0] or bc_desc.have_subgroup[1])
+            {
+                bc_desc.bc_1d_gr = gr;
+            }
+            else if (bc_desc.have_subgroup[2])
+            {
+                bc_desc.bc_1d_gr = detect_basecall_1d_group(gr);
+            }
+            // model
+            for (unsigned st = 0; st < 2; ++st)
+            {
+                bc_desc.have_model[st] =
+                    not bc_desc.bc_1d_gr.empty()
+                    and Base::dataset_exists(basecall_model_path(bc_desc.bc_1d_gr, st));
+            }
         }
     }
-
-    std::map< std::string, std::string > get_attr_map(const std::string& path) const
+    Basecall_Group_Description
+    detect_basecall_group_id(std::string const & gr) const
     {
-        std::map< std::string, std::string > res;
-        auto a_list = Base::get_attr_list(path);
-        for (const auto& a : a_list)
+        Basecall_Group_Description res;
+        res.name = "?";
+        res.version = "?";
+        auto am = get_basecall_params(gr);
+        if (am.count("name"))
+        {
+            if (am.at("name") == "ONT Sequencing Workflow")
+            {
+                res.name = "metrichor";
+                res.version = (am.count("chimaera version")? am.at("chimaera version") : "?") + "+" +
+                    (am.count("dragonet version")? am.at("dragonet version") : "?");
+            }
+            else if (am.at("name") == "MinKNOW-Live-Basecalling")
+            {
+                res.name = "minknow";
+                res.version = (am.count("version")? am.at("version") : "?");
+            }
+            else if (am.at("name") == "ONT Albacore Sequencing Software")
+            {
+                res.name = "albacore";
+                res.version = (am.count("version")? am.at("version") : "?");
+            }
+        }
+        return res;
+    }
+    std::string
+    detect_basecall_1d_group(std::string const & gr) const
+    {
+        std::string path = basecall_group_path(gr) + "/basecall_1d";
+        if (Base::attribute_exists(path))
         {
             std::string tmp;
-            Base::read(path + "/" + a, tmp);
-            res[a] = tmp;
+            Base::read(path, tmp);
+            auto pref = basecall_root_path().substr(1) + "/" + basecall_group_prefix();
+            if (tmp.size() >= pref.size()
+                and tmp.substr(0, pref.size()) == pref)
+            {
+                auto gr_1d = tmp.substr(pref.size());
+                if (have_basecall_group(gr_1d))
+                {
+                    return gr_1d;
+                }
+            }
         }
+        return gr;
+    }
+    std::string
+    detect_basecall_eventdetection_group(std::string const & gr) const
+    {
+        auto bc_params = get_basecall_params(gr);
+        if (bc_params.count("event_detection"))
+        {
+            auto && tmp = bc_params.at("event_detection");
+            auto pref = eventdetection_root_path().substr(1) + "/" + eventdetection_group_prefix();
+            if (tmp.substr(0, pref.size()) == pref)
+            {
+                auto ed_gr = tmp.substr(pref.size());
+                if (have_eventdetection_group(ed_gr))
+                {
+                    return ed_gr;
+                }
+            }
+        }
+        if (have_basecall_events_pack(0, gr))
+        {
+            auto ev_pack = get_basecall_events_pack(0, gr);
+            auto ed_gr = ev_pack.ed_gr;
+            if (have_eventdetection_group(ed_gr))
+            {
+                return ed_gr;
+            }
+        }
+        return "";
+    }
+    double
+    get_basecall_median_sd_temp(std::string const & gr) const
+    {
+        std::string segmentation_link_path = basecall_group_path(gr) + "/segmentation";
+        if (not Base::attribute_exists(segmentation_link_path)) return 0.0;
+        std::string segmentation_path;
+        Base::read(segmentation_link_path, segmentation_path);
+        std::string median_sd_temp_path = "/" + segmentation_path + "/Summary/split_hairpin/median_sd_temp";
+        if (not Base::attribute_exists(median_sd_temp_path)) return 0.0;
+        double res;
+        Base::read(median_sd_temp_path, res);
         return res;
     }
 
-    // list of read names for which we have raw samples
-    std::vector< std::string > _raw_samples_read_name_list;
-
-    // list of EventDetection groups
-    std::vector< std::string > _eventdetection_group_list;
-
-    // list of Basecall groups
-    std::vector< std::string > _basecall_group_list;
-
-    // list of per-strand Basecall groups; 0/1/2 = template/complement/2d
-    std::array< std::vector< std::string >, 3 > _basecall_strand_group_list;
-
-    // static paths
-    static const std::string& file_version_path()
+    //
+    // Functions that fill in empty arguments with default values
+    //
+    std::string const &
+    fill_raw_samples_read_name(std::string const & rn) const
+    {
+        return (not rn.empty() or _raw_samples_read_names.empty()
+                ? rn
+                : _raw_samples_read_names.front());
+    }
+    std::string const &
+    fill_eventdetection_group(std::string const & gr) const
+    {
+        return (not gr.empty() or _eventdetection_groups.empty()
+                ? gr
+                : _eventdetection_groups.front());
+    }
+    std::string const &
+    fill_eventdetection_read_name(std::string const & gr, std::string const & rn) const
+    {
+        return (not rn.empty()
+                or _eventdetection_read_names.count(gr) == 0
+                or _eventdetection_read_names.at(gr).empty()
+                ? rn
+                : _eventdetection_read_names.at(gr).front());
+    }
+    std::string const &
+    fill_basecall_group(unsigned st, std::string const & gr) const
+    {
+        return (not gr.empty()
+                or _basecall_strand_groups.at(st).empty()
+                ? gr
+                : _basecall_strand_groups.at(st).front());
+    }
+    std::string const &
+    fill_basecall_1d_group(unsigned st, std::string const & gr) const
     {
-        static const std::string _file_version_path = "/file_version";
-        return _file_version_path;
+        auto && _gr = fill_basecall_group(st, gr);
+        return get_basecall_1d_group(_gr);
     }
 
-    static const std::string& channel_id_path()
+    //
+    // Packing interface
+    //
+    Raw_Samples_Pack
+    get_raw_samples_pack(std::string const & rn) const
+    {
+        Raw_Samples_Pack rs_pack;
+        auto path = raw_samples_pack_path(rn);
+        rs_pack.read(*this, path);
+        return rs_pack;
+    }
+    void
+    add_raw_samples(std::string const & rn, Raw_Samples_Pack const & rs_pack)
+    {
+        auto path = raw_samples_pack_path(rn);
+        rs_pack.write(*this, path);
+        reload();
+    }
+    Raw_Int_Samples_Dataset
+    get_raw_int_samples_dataset(std::string const & rn = std::string()) const
+    {
+        Raw_Int_Samples_Dataset res;
+        auto && _rn = fill_raw_samples_read_name(rn);
+        res.first = get_raw_int_samples(_rn);
+        res.second = get_raw_samples_params(_rn);
+        return res;
+    }
+    Raw_Samples_Dataset
+    get_raw_samples_dataset(std::string const & rn = std::string()) const
+    {
+        Raw_Samples_Dataset res;
+        auto && _rn = fill_raw_samples_read_name(rn);
+        res.first = get_raw_samples(_rn);
+        res.second = get_raw_samples_params(_rn);
+        return res;
+    }
+    void
+    add_raw_samples_dataset(std::string const & rn, Raw_Int_Samples_Dataset const & rsi_ds)
+    {
+        add_raw_samples(rn, rsi_ds.first);
+        add_raw_samples_params(rn, rsi_ds.second);
+    }
+    EventDetection_Events_Pack
+    get_eventdetection_events_pack(
+        std::string const & gr, std::string const & rn) const
+    {
+        EventDetection_Events_Pack ede_pack;
+        ede_pack.read(*this, eventdetection_events_pack_path(gr, rn));
+        return ede_pack;
+    }
+    void
+    add_eventdetection_events(
+        std::string const & gr, std::string const & rn,
+        EventDetection_Events_Pack const & ede_pack)
+    {
+        ede_pack.write(*this, eventdetection_events_pack_path(gr, rn));
+        reload();
+    }
+    EventDetection_Events_Dataset
+    get_eventdetection_events_dataset(
+        std::string const & gr, std::string const & rn) const
     {
-        static const std::string _channel_id_path = "/UniqueGlobalKey/channel_id";
-        return _channel_id_path;
+        EventDetection_Events_Dataset ede_ds;
+        ede_ds.first = get_eventdetection_events(gr, rn);
+        ede_ds.second = get_eventdetection_events_params(gr, rn);
+        return ede_ds;
     }
-    static const std::string& tracking_id_path()
+    void
+    add_eventdetection_events_dataset(
+        std::string const & gr, std::string const & rn,
+        EventDetection_Events_Dataset const & ede_ds)
     {
-        static const std::string _tracking_id_path = "/UniqueGlobalKey/tracking_id";
-        return _tracking_id_path;
+        add_eventdetection_events(gr, rn, ede_ds.first);
+        add_eventdetection_events_params(gr, rn, ede_ds.second);
     }
-    static const std::string& raw_samples_root_path()
+    //
+    Basecall_Fastq_Pack
+    get_basecall_fastq_pack(unsigned st, std::string const & gr) const
     {
-        static const std::string _raw_samples_root_path = "/Raw/Reads";
-        return _raw_samples_root_path;
+        Basecall_Fastq_Pack fq_pack;
+        auto p = basecall_fastq_pack_path(gr, st);
+        fq_pack.read(*this, p);
+        return fq_pack;        
     }
-    static std::string raw_samples_params_path(const std::string& rn)
+    void
+    add_basecall_fastq(unsigned st, std::string const & gr, Basecall_Fastq_Pack const & fq_pack)
+    {
+        auto p = basecall_fastq_pack_path(gr, st);
+        fq_pack.write(*this, p);
+        reload();
+    }
+    //
+    Basecall_Events_Pack
+    get_basecall_events_pack(unsigned st, std::string const & gr) const
+    {
+        auto p = basecall_events_pack_path(gr, st);
+        Basecall_Events_Pack ev_pack;
+        ev_pack.read(*this, p);
+        return ev_pack;
+    }
+    void
+    add_basecall_events(unsigned st, std::string const & gr, Basecall_Events_Pack const & ev_pack)
+    {
+        auto p = basecall_events_pack_path(gr, st);
+        ev_pack.write(*this, p);
+        reload();
+    }
+    Basecall_Events_Dataset
+    get_basecall_events_dataset(unsigned st, std::string const & gr) const
+    {
+        Basecall_Events_Dataset bce_ds;
+        bce_ds.first = get_basecall_events(st, gr);
+        bce_ds.second = get_basecall_events_params(st, gr);
+        return bce_ds;
+    }
+    void
+    add_basecall_events_dataset(unsigned st, std::string const & gr, Basecall_Events_Dataset const & bce_ds)
+    {
+        add_basecall_events(st, gr, bce_ds.first);
+        add_basecall_events_params(st, gr, bce_ds.second);
+    }
+    //
+    Basecall_Alignment_Pack
+    get_basecall_alignment_pack(std::string const & gr) const
+    {
+        Basecall_Alignment_Pack al_pack;
+        auto p = basecall_alignment_pack_path(gr);
+        al_pack.read(*this, p);
+        return al_pack;
+    }
+    void
+    add_basecall_alignment(std::string const & gr, Basecall_Alignment_Pack const & al_pack)
+    {
+        auto p = basecall_alignment_pack_path(gr);
+        al_pack.write(*this, p);
+        reload();
+    }
+
+    //
+    // Packers & Unpackers
+    //
+    static Raw_Samples_Pack
+    pack_rw(Raw_Int_Samples_Dataset const & rsi_ds)
+    {
+        Raw_Samples_Pack rsp;
+        rsp.params = rsi_ds.second;
+        std::tie(rsp.signal, rsp.signal_params) = rw_coder().encode(rsi_ds.first, true);
+        return rsp;
+    }
+    static Raw_Int_Samples_Dataset
+    unpack_rw(Raw_Samples_Pack const & rs_pack)
+    {
+        Raw_Int_Samples_Dataset rsi_ds;
+        rsi_ds.second = rs_pack.params;
+        rsi_ds.first = rw_coder().decode< Raw_Int_Sample >(rs_pack.signal, rs_pack.signal_params);
+        return rsi_ds;
+    }
+    static std::pair< std::vector< long long >, std::vector< long long > >
+    pack_event_start_length(
+        unsigned num_events,
+        std::function< long long(unsigned) > get_start,
+        std::function< long long(unsigned) > get_length,
+        long long start_time)
+    {
+        std::pair< std::vector< long long >, std::vector< long long > > res;
+        auto & skip = res.first;
+        auto & len = res.second;
+        for (unsigned i = 0; i < num_events; ++i)
+        {
+            auto si = get_start(i);
+            auto li = get_length(i);
+            skip.push_back(si - start_time);
+            len.push_back(li);
+            start_time = si + li;
+        }
+        return res;
+    }
+    static void
+    unpack_event_start_length(
+        std::vector< long long > const & skip,
+        std::vector< long long > const & len,
+        std::function< void(unsigned, long long) > set_start,
+        std::function< void(unsigned, long long) > set_length,
+        long long start_time)
+    {
+        for (unsigned i = 0; i < skip.size(); ++i)
+        {
+            auto si = start_time + skip[i];
+            auto li = len[i];
+            set_start(i, si);
+            set_length(i, li);
+            start_time = si + li;
+        }
+    }
+    static void
+    unpack_event_mean_stdv(
+        unsigned num_events,
+        std::function< long long(unsigned) > get_start,
+        std::function< long long(unsigned) > get_length,
+        std::function< void(unsigned, double) > set_mean,
+        std::function< void(unsigned, double) > set_stdv,
+        std::vector< Raw_Sample > const & rs,
+        long long rs_start_time,
+        int offset)
+    {
+        for (unsigned i = 0; i < num_events; ++i)
+        {
+            long long rs_start_idx = get_start(i) - rs_start_time + offset;
+            long long rs_end_idx = rs_start_idx + get_length(i);
+            if (i == 0 and rs_start_idx < 0) rs_start_idx = 0;
+            if (i == num_events - 1 and rs_end_idx > (long long)rs.size()) rs_end_idx = rs.size();
+            if (rs_start_idx < 0
+                or rs_end_idx <= rs_start_idx
+                or rs_end_idx > (long long)rs.size())
+            {
+                LOG_THROW
+                    << "bad index: rs_start_idx=" << rs_start_idx
+                    << " rs_end_idx=" << rs_end_idx
+                    << " i=" << i
+                    << " length(i)=" << get_length(i)
+                    << " rs_size=" << rs.size()
+                    << " offset=" << offset;
+            }
+            bool all_equal = true;
+            double s = 0.0;
+            double s2 = 0.0;
+            unsigned n = rs_end_idx - rs_start_idx;
+            for (unsigned j = 0; j < n; ++j)
+            {
+                double x = rs[rs_start_idx + j];
+                if (j > 0 and all_equal)
+                {
+                    all_equal = rs[rs_start_idx + j] == rs[rs_start_idx];
+                }
+                s += x;
+                s2 += x * x;
+            }
+            set_mean(i, s / n);
+            if (n > 1 and not all_equal)
+            {
+                double x = (s2 - s*s/n)/n;
+                set_stdv(i, x > 1e-3? std::sqrt(x) : 0);
+            }
+            else
+            {
+                set_stdv(i, 0);
+            }
+        }
+    }
+    static EventDetection_Events_Pack
+    pack_ed(EventDetection_Events_Dataset const & ede_ds)
+    {
+        EventDetection_Events_Pack ede_pack;
+        auto & ede = ede_ds.first;
+        auto & ede_params = ede_ds.second;
+        ede_pack.params = ede_params;
+        std::vector< long long > skip;
+        std::vector< long long > len;
+        std::tie(skip, len) = pack_event_start_length(
+            ede.size(),
+            [&] (unsigned i) { return ede.at(i).start; },
+            [&] (unsigned i) { return ede.at(i).length; },
+            ede_params.start_time);
+        std::tie(ede_pack.skip, ede_pack.skip_params) = ed_skip_coder().encode(skip, false);
+        std::tie(ede_pack.len, ede_pack.len_params) = ed_len_coder().encode(len, false);
+        return ede_pack;
+    }
+    static EventDetection_Events_Dataset
+    unpack_ed(EventDetection_Events_Pack const & ede_pack,
+              Raw_Samples_Dataset const & rs_ds)
+    {
+        EventDetection_Events_Dataset res;
+        auto & ede_params = ede_pack.params;
+        auto & rs = rs_ds.first;
+        auto & rs_params = rs_ds.second;
+        res.second = ede_params;
+        auto skip = ed_skip_coder().decode< long long >(ede_pack.skip, ede_pack.skip_params);
+        auto len = ed_len_coder().decode< long long >(ede_pack.len, ede_pack.len_params);
+        if (skip.size() != len.size())
+        {
+            LOG_THROW
+                << "wrong dataset size: skip_size=" << skip.size()
+                << " len_size=" << len.size();
+        }
+        auto & ede = res.first;
+        ede.resize(skip.size());
+        unpack_event_start_length(
+            skip,
+            len,
+            [&] (unsigned i, long long x) { return ede.at(i).start = x; },
+            [&] (unsigned i, long long x) { return ede.at(i).length = x; },
+            ede_params.start_time);
+        int offset = 0;
+        static bool warned = false;
+        if (offset != 0 and not warned)
+        {
+            LOG(warning) << "using workaround for old off-by-one ed events bug\n";
+            warned = true;
+        }
+        unpack_event_mean_stdv(
+            ede.size(),
+            [&] (unsigned i) { return ede.at(i).start; },
+            [&] (unsigned i) { return ede.at(i).length; },
+            [&] (unsigned i, double x) { return ede.at(i).mean = x; },
+            [&] (unsigned i, double x) { return ede.at(i).stdv = x; },
+            rs,
+            rs_params.start_time,
+            offset);
+        return res;
+    }
+    static Basecall_Fastq_Pack
+    pack_fq(std::string const & fq, unsigned qv_bits = 5)
+    {
+        static unsigned const max_qv_bits = 5;
+        static std::uint8_t const max_qv = ((std::uint8_t)1 << max_qv_bits) - 1;
+        Basecall_Fastq_Pack fq_pack;
+        auto fqa = split_fq(fq);
+        fq_pack.read_name = fqa[0];
+        std::vector< std::int8_t > bp(fqa[1].begin(), fqa[1].end());
+        qv_bits = std::min(qv_bits, max_qv_bits);
+        auto qv_mask = max_qv & (max_qv << (max_qv_bits - qv_bits));
+        fq_pack.qv_bits = qv_bits;
+        std::vector< std::uint8_t > qv;
+        for (auto c : fqa[3])
+        {
+            std::uint8_t val = (std::uint8_t)(c - 33);
+            val = std::min(val, max_qv);
+            val &= qv_mask;
+            qv.push_back(val);
+        }
+        std::tie(fq_pack.bp, fq_pack.bp_params) = fq_bp_coder().encode(bp, false);
+        std::tie(fq_pack.qv, fq_pack.qv_params) = fq_qv_coder().encode(qv, false);
+        return fq_pack;
+    }
+    static std::string
+    unpack_fq(Basecall_Fastq_Pack const & fq_pack)
+    {
+        std::string res;
+        res += "@";
+        res += fq_pack.read_name;
+        res += "\n";
+        auto bp = fq_bp_coder().decode< std::int8_t >(fq_pack.bp, fq_pack.bp_params);
+        for (auto c : bp) res += c;
+        res += "\n+\n";
+        auto qv = fq_qv_coder().decode< std::uint8_t >(fq_pack.qv, fq_pack.qv_params);
+        for (auto c : qv) res += (char)33 + c;
+        res += "\n";
+        return res;
+    }
+    static Basecall_Events_Pack
+    pack_ev(Basecall_Events_Dataset const & ev_ds,
+            Basecall_Group_Description const & bc_desc,
+            std::string const & sq,
+            std::vector< EventDetection_Event > const & ed,
+            std::string const & ed_gr,
+            Channel_Id_Params const & cid_params,
+            double median_sd_temp,
+            unsigned p_model_state_bits)
+    {
+        Basecall_Events_Pack ev_pack;
+        ev_pack.params = ev_ds.second;
+        auto & ev = ev_ds.first;
+        ev_pack.name = bc_desc.name;
+        ev_pack.version = bc_desc.version;
+        ev_pack.ed_gr = ed_gr;
+        ev_pack.start_time = time_to_int(ev[0].start, cid_params);
+        ev_pack.state_size = ev[0].get_model_state().size();
+        ev_pack.median_sd_temp = median_sd_temp;
+        ev_pack.p_model_state_bits = p_model_state_bits;
+        std::vector< long long > rel_skip;
+        std::vector< long long > skip;
+        std::vector< long long > len;
+        std::vector< std::uint8_t > mv;
+        std::vector< std::uint16_t > p_model_state;
+        // first pack start/duration
+        if (not ed_gr.empty())
+        {
+            // pack relative to ed events
+            long long j = -1;
+            for (unsigned i = 0; i < ev.size(); ++i)
+            {
+                auto ti = time_to_int(ev[i].start, cid_params);
+                auto last_j = j++;
+                while (j < (long long)ed.size() and ed[j].start < ti) ++j;
+                if (j == (long long)ed.size())
+                {
+                    LOG_THROW
+                        << "no matching ed event: i=" << i
+                        << " ev[i]=(" << ti
+                        << "," << time_to_int(ev[i].length, cid_params)
+                        << "," << ev[i].mean
+                        << "," << ev[i].stdv
+                        << ")";
+                }
+                rel_skip.push_back(j - last_j - 1);
+            }
+            std::tie(ev_pack.rel_skip, ev_pack.rel_skip_params) = ev_rel_skip_coder().encode(rel_skip, false);
+        }
+        else
+        {
+            // pack start&length as for ed events
+            std::tie(skip, len) = pack_event_start_length(
+                ev.size(),
+                [&] (unsigned i) { return time_to_int(ev.at(i).start, cid_params); },
+                [&] (unsigned i) { return time_to_int(ev.at(i).length, cid_params); },
+                ev_pack.start_time);
+            std::tie(ev_pack.skip, ev_pack.skip_params) = ed_skip_coder().encode(skip, false);
+            std::tie(ev_pack.len, ev_pack.len_params) = ed_len_coder().encode(len, false);
+        }
+        unsigned sq_pos = 0;
+        for (unsigned i = 0; i < ev.size(); ++i)
+        {
+            auto s = ev[i].get_model_state();
+            if (s.size() != ev_pack.state_size)
+            {
+                LOG_THROW
+                    << "unexpected state size: i=" << i
+                    << " s=" << s
+                    << " expected_size=" << ev_pack.state_size;
+            }
+            // check if move is valid
+            if (ev[i].move < 0 or ev[i].move > std::numeric_limits< uint8_t >::max())
+            {
+                LOG_THROW
+                    << "invalid move: i=" << i
+                    << "ev[i].move=" << ev[i].move;
+            }
+            int real_move = ev[i].move;
+            if (sq.substr(sq_pos + real_move, ev_pack.state_size) != s)
+            {
+                // move is not valid, compute alternative:
+                // allow move > state_size only if previous state is homopolymer
+                auto next_sq_pos = sq.find(s, sq_pos);
+                if (next_sq_pos != std::string::npos
+                    and (next_sq_pos <= sq_pos + ev_pack.state_size
+                         or sq.substr(sq_pos, ev_pack.state_size) == std::string(ev_pack.state_size, sq[sq_pos])))
+                {
+                    real_move = next_sq_pos - sq_pos;
+                }
+                else
+                {
+                    real_move = -1;
+                }
+                if (real_move >= 0)
+                {
+                    LOG(warning)
+                        << "using workaround for invalid move: i=" << i
+                        << " sq=" << sq.substr(sq_pos, 2 * ev_pack.state_size)
+                        << " move[i]=" << ev[i].move
+                        << " state[i]=" << s
+                        << " real_move=" << real_move << std::endl;
+                }
+                else
+                {
+                    LOG_THROW
+                        << "invalid move: i=" << i
+                        << " sq=" << sq.substr(sq_pos, 2 * ev_pack.state_size)
+                        << " move[i]=" << ev[i].move
+                        << " state[i]=" << s;
+                }
+            }
+            mv.push_back(real_move);
+            sq_pos += real_move;
+            // p_model_state
+            std::uint16_t p_model_state_val = ev[i].p_model_state * (1u << p_model_state_bits);
+            if (p_model_state_val >= (1u << p_model_state_bits)) p_model_state_val = (1u << p_model_state_bits) - 1;
+            p_model_state.push_back(p_model_state_val);
+        }
+        if (sq_pos + ev_pack.state_size != sq.size())
+        {
+            LOG_THROW
+                << "leftover base sequence: sq_size=" << sq.size()
+                << " sq_end_pos=" << sq_pos + ev_pack.state_size;
+        }
+        std::tie(ev_pack.move, ev_pack.move_params) = ev_move_coder().encode(mv, false);
+        std::tie(ev_pack.p_model_state, ev_pack.p_model_state_params) = bit_packer().encode(p_model_state, p_model_state_bits);
+        return ev_pack;
+    } // pack_ev()
+    static std::vector< EventDetection_Event >
+    unpack_implicit_ed(Basecall_Events_Pack const & ev_pack,
+                       Raw_Samples_Dataset const & rs_ds)
+    {
+        std::vector< EventDetection_Event > ede;
+        auto & rs = rs_ds.first;
+        auto & rs_params = rs_ds.second;
+        auto skip = ed_skip_coder().decode< long long >(ev_pack.skip, ev_pack.skip_params);
+        auto len = ed_len_coder().decode< long long >(ev_pack.len, ev_pack.len_params);
+        if (skip.empty() or skip.size() != len.size())
+        {
+            LOG_THROW
+                << "wrong dataset size: skip_size=" << skip.size()
+                << " len_size=" << len.size();
+        }
+        ede.resize(skip.size());
+        unpack_event_start_length(
+            skip,
+            len,
+            [&] (unsigned i, long long x) { return ede.at(i).start = x; },
+            [&] (unsigned i, long long x) { return ede.at(i).length = x; },
+            ev_pack.start_time);
+        int offset = 0;
+        static bool warned = false;
+        if (offset != 0 and not warned)
+        {
+            LOG(warning) << "using workaround for bug in "
+                         << ev_pack.name << ":" << ev_pack.version << "\n";
+            warned = true;
+        }
+        unpack_event_mean_stdv(
+            ede.size(),
+            [&] (unsigned i) { return ede.at(i).start; },
+            [&] (unsigned i) { return ede.at(i).length; },
+            [&] (unsigned i, double x) { return ede.at(i).mean = x; },
+            [&] (unsigned i, double x) { return ede.at(i).stdv = x; },
+            rs,
+            rs_params.start_time,
+            offset);
+        return ede;
+    }
+    static Basecall_Events_Dataset
+    unpack_ev(Basecall_Events_Pack const & ev_pack,
+              std::string const & sq,
+              std::vector< EventDetection_Event > const & ed,
+              Channel_Id_Params const & cid_params)
+    {
+        Basecall_Events_Dataset ev_ds;
+        ev_ds.second = ev_pack.params;
+        auto & ev = ev_ds.first;
+        std::vector< long long > rel_skip;
+        if (not ev_pack.rel_skip.empty())
+        {
+            rel_skip = ev_rel_skip_coder().decode< long long >(ev_pack.rel_skip, ev_pack.rel_skip_params);
+        }
+        auto mv = ev_move_coder().decode< std::uint8_t >(ev_pack.move, ev_pack.move_params);
+        auto p_model_state = bit_packer().decode< std::uint16_t >(ev_pack.p_model_state, ev_pack.p_model_state_params);
+        if ((not rel_skip.empty() and rel_skip.size() != mv.size()) or p_model_state.size() != mv.size())
+        {
+            LOG_THROW
+                << "wrong dataset size: rel_skip_size=" << rel_skip.size()
+                << " mv_size=" << mv.size()
+                << " p_model_state_size=" << p_model_state.size();
+        }
+        ev.resize(mv.size());
+        long long j = -1;
+        std::string s;
+        unsigned sq_pos = 0;
+        unsigned p_model_state_bits;
+        std::istringstream(ev_pack.p_model_state_params.at("num_bits")) >> p_model_state_bits;
+        long long unsigned max_p_model_state_int = 1llu << p_model_state_bits;
+        for (unsigned i = 0; i < ev.size(); ++i)
+        {
+            j += (not rel_skip.empty()? rel_skip[i] : 0) + 1;
+            ev[i].start = time_to_float(ed[j].start, cid_params);
+            ev[i].length = time_to_float(ed[j].length, cid_params);
+            ev[i].mean = ed[j].mean;
+            ev[i].stdv = ed[j].stdv;
+            if (ev[i].stdv == 0.0) ev[i].stdv = ev_pack.median_sd_temp;
+            ev[i].move = mv[i];
+            if (i > 0) s = s.substr(mv[i]); // apply move
+            while (s.size() < ev_pack.state_size) s += sq[sq_pos++];
+            std::copy(s.begin(), s.end(), ev[i].model_state.begin());
+            if (ev_pack.state_size < MAX_K_LEN) ev[i].model_state[ev_pack.state_size] = 0;
+            ev[i].p_model_state = (double)p_model_state[i] / max_p_model_state_int;
+        }
+        return ev_ds;
+    } // unpack_ev()
+    static Basecall_Alignment_Pack
+    pack_al(std::vector< Basecall_Alignment_Entry > const & al,
+            std::string const & sq)
+    {
+        Basecall_Alignment_Pack al_pack;
+        std::array< std::vector< uint8_t > , 2 > step_v;
+        std::vector< int8_t > mv;
+        step_v[0].reserve(al.size());
+        step_v[1].reserve(al.size());
+        mv.reserve(al.size());
+        std::array< int, 2 > start_index = {{ -1, -1 }};
+        std::array< int, 2 > next_index = {{ -1, -1 }};
+        std::array< int, 2 > delta = {{ 1, -1 }};
+        auto get_idx = [&] (unsigned i, unsigned k) {
+            return k == 0? al[i].template_index : al[i].complement_index;
+        };
+        unsigned pos = 0;
+        for (unsigned i = 0; i < al.size(); ++i)
+        {
+            for (unsigned k = 0; k < 2; ++k)
+            {
+                auto idx = get_idx(i, k);
+                if (idx >= 0)
+                {
+                    if (start_index[k] < 0)
+                    {
+                        start_index[k] = idx;
+                        next_index[k] = idx;
+                    }
+                    if (idx != next_index[k])
+                    {
+                        LOG_THROW
+                            << "bad index: idx=" << idx
+                            << " next_index=" << next_index[k];
+                    }
+                    step_v[k].push_back(1);
+                    next_index[k] += delta[k];
+                }
+                else // idx < 0
+                {
+                    step_v[k].push_back(0);
+                }
+            }
+            // compute move
+            auto kmer = al[i].get_kmer();
+            size_t next_pos = sq.find(kmer, pos);
+            if (next_pos == std::string::npos)
+            {
+                LOG_THROW
+                    << "missing kmer in 2d seq";
+            }
+            if (next_pos - pos > std::numeric_limits< int8_t >::max())
+            {
+                LOG_THROW
+                    << "bad move: next_pos=" << next_pos
+                    << " pos=" << pos;
+            }
+            mv.push_back(next_pos - pos);
+            pos = next_pos;
+        }
+        if (start_index[0] < 0)
+        {
+            LOG_THROW
+                << "no template events";
+        }
+        if (start_index[1] < 0)
+        {
+            LOG_THROW
+                << "no complement events";
+        }
+        al_pack.template_index_start = start_index[0];
+        al_pack.complement_index_start = start_index[1];
+        al_pack.kmer_size = al[0].get_kmer().size();
+        std::tie(al_pack.template_step, al_pack.template_step_params) = bit_packer().encode(step_v[0], 1);
+        std::tie(al_pack.complement_step, al_pack.complement_step_params) = bit_packer().encode(step_v[1], 1);
+        std::tie(al_pack.move, al_pack.move_params) = ev_move_coder().encode(mv, false);
+        return al_pack;
+    } // pack_al()
+    static std::vector< Basecall_Alignment_Entry >
+    unpack_al(Basecall_Alignment_Pack const & al_pack,
+              std::string const & sq)
+    {
+        std::vector< Basecall_Alignment_Entry > al;
+        std::array< std::vector< uint8_t >, 2 > step_v =
+            {{ bit_packer().decode< uint8_t >(al_pack.template_step, al_pack.template_step_params),
+               bit_packer().decode< uint8_t >(al_pack.complement_step, al_pack.complement_step_params) }};
+        auto mv = ev_move_coder().decode< int8_t >(al_pack.move, al_pack.move_params);
+        if (step_v[1].size() != step_v[0].size()
+            or mv.size() != step_v[0].size())
+        {
+            LOG_THROW
+                << "wrong dataset size: step_v[0]_size=" << step_v[0].size()
+                << " step_v[1]_size=" << step_v[1].size()
+                << " mv_size=" << mv.size();
+        }
+        al.resize(step_v[0].size());
+        std::array< unsigned, 2 > crt_index = {{ al_pack.template_index_start, al_pack.complement_index_start }};
+        std::array< int, 2 > delta = {{ 1, -1 }};
+        auto pos = 0;
+        auto set_idx = [&] (unsigned i, unsigned k, int val) {
+            if (k == 0)
+            {
+                al[i].template_index = val;
+            }
+            else
+            {
+                al[i].complement_index = val;
+            }
+        };
+        for (unsigned i = 0; i < step_v[0].size(); ++i)
+        {
+            for (unsigned k = 0; k < 2; ++k)
+            {
+                if (step_v[k][i] > 0)
+                {
+                    set_idx(i, k, crt_index[k]);
+                    crt_index[k] += delta[k];
+                }
+                else
+                {
+                    set_idx(i, k, -1);
+                }
+            }
+            // set kmer
+            pos += mv[i];
+            std::copy(sq.begin() + pos, sq.begin() + pos + al_pack.kmer_size, al[i].kmer.begin());
+            if (al_pack.kmer_size < MAX_K_LEN) al[i].kmer[al_pack.kmer_size] = 0;
+        }
+        return al;
+    } // unpack_al()
+
+    //
+    // Fast5 internal paths
+    //
+    static std::string file_version_path() { return "/file_version"; }
+    static std::string channel_id_path()   { return "/UniqueGlobalKey/channel_id"; }
+    static std::string tracking_id_path()  { return "/UniqueGlobalKey/tracking_id"; }
+    static std::string sequences_path()    { return "/Sequences/Meta"; }
+    static std::string raw_samples_root_path() { return "/Raw/Reads"; }
+    static std::string raw_samples_params_path(std::string const & rn)
     {
         return raw_samples_root_path() + "/" + rn;
     }
-    static std::string raw_samples_path(const std::string& rn)
+    static std::string raw_samples_path(std::string const & rn)
     {
         return raw_samples_root_path() + "/" + rn + "/Signal";
     }
-    static const std::string& sequences_path()
+    static std::string raw_samples_pack_path(std::string const & rn)
     {
-        static const std::string _sequences_path = "/Sequences/Meta";
-        return _sequences_path;
+        return raw_samples_path(rn) + "_Pack";
     }
-    static const std::string& eventdetection_root_path()
+    static std::string raw_samples_params_pack_path(std::string const & rn)
     {
-        static const std::string _eventdetection_root_path = "/Analyses";
-        return _eventdetection_root_path;
+        return raw_samples_pack_path(rn) + "/params";
     }
-    static const std::string& eventdetection_group_prefix()
+    static std::string eventdetection_root_path() { return "/Analyses"; }
+    static std::string eventdetection_group_prefix() { return "EventDetection_"; }
+    static std::string eventdetection_group_path(std::string const & gr)
     {
-        static const std::string _eventdetection_group_prefix = "EventDetection_";
-        return _eventdetection_group_prefix;
+        return eventdetection_root_path() + "/" + eventdetection_group_prefix() + gr;
     }
-    static std::string eventdetection_params_path(const std::string& ed_gr)
+    static std::string eventdetection_events_params_path(std::string const & gr, std::string const & rn)
     {
-        return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr;
+        return eventdetection_group_path(gr) + "/Reads/" + rn;
     }
-    static std::string eventdetection_event_params_path(const std::string& ed_gr, const std::string& rn)
+    static std::string eventdetection_events_path(std::string const & gr, std::string const & rn)
     {
-        return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads/" + rn;
+        return eventdetection_group_path(gr) + "/Reads/" + rn + "/Events";
     }
-    static std::string eventdetection_events_path(const std::string& ed_gr, const std::string& rn)
+    static std::string eventdetection_events_pack_path(std::string const & gr, std::string const & rn)
     {
-        return eventdetection_root_path() + "/" + eventdetection_group_prefix() + ed_gr + "/Reads/" + rn + "/Events";
+        return eventdetection_events_path(gr, rn) + "_Pack";
     }
-
-    static const std::string& basecall_root_path()
+    static std::string eventdetection_events_params_pack_path(std::string const & gr, std::string const & rn)
+    {
+        return eventdetection_events_pack_path(gr, rn) + "/params";
+    }
+    static std::string basecall_root_path() { return "/Analyses"; }
+    static std::string basecall_group_prefix() { return "Basecall_"; }
+    static std::string strand_name(unsigned st)
+    {
+        static const std::array< std::string, 3 > _strand_name =
+            {{ "template", "complement", "2D" }};
+        return _strand_name.at(st);
+    }
+    static std::string basecall_strand_subgroup(unsigned st)
+    {
+        return std::string("BaseCalled_") + strand_name(st);
+    }
+    static std::string basecall_group_path(std::string const & gr)
+    {
+        return basecall_root_path() + "/" + basecall_group_prefix() + gr;
+    }
+    static std::string basecall_strand_group_path(std::string const & gr, unsigned st)
+    {
+        return basecall_group_path(gr) + "/" + basecall_strand_subgroup(st);
+    }
+    static std::string basecall_log_path(std::string const & gr)
+    {
+        return basecall_group_path(gr) + "/Log";
+    }
+    static std::string basecall_fastq_path(std::string const & gr, unsigned st)
+    {
+        return basecall_strand_group_path(gr, st) + "/Fastq";
+    }
+    static std::string basecall_fastq_pack_path(std::string const & gr, unsigned st)
+    {
+        return basecall_fastq_path(gr, st) + "_Pack";
+    }
+    static std::string basecall_model_path(std::string const & gr, unsigned st)
+    {
+        return basecall_strand_group_path(gr, st) + "/Model";
+    }
+    static std::string basecall_model_file_path(std::string const & gr, unsigned st)
     {
-        static const std::string _basecall_root_path = "/Analyses";
-        return _basecall_root_path;
+        return basecall_group_path(gr) + "/Summary/basecall_1d_" + strand_name(st) + "/model_file";
     }
-    static const std::string& basecall_group_prefix()
+    static std::string basecall_events_path(std::string const & gr, unsigned st)
     {
-        static const std::string _basecall_group_prefix = "Basecall_";
-        return _basecall_group_prefix;
+        return basecall_strand_group_path(gr, st) + "/Events";
     }
-    static const std::string& basecall_strand_subgroup(unsigned st)
+    static std::string basecall_events_pack_path(std::string const & gr, unsigned st)
     {
-        static const std::array< std::string, 3 > _basecall_strand_subgroup =
-            {{ "BaseCalled_template", "BaseCalled_complement", "BaseCalled_2D" }};
-        return _basecall_strand_subgroup[st];
+        return basecall_events_path(gr, st) + "_Pack";
     }
-    static std::string basecall_fastq_path(const std::string& bc_gr, unsigned st)
+    static std::string basecall_events_params_pack_path(std::string const & gr, unsigned st)
     {
-        return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
-            + basecall_strand_subgroup(st) + "/Fastq";
+        return basecall_events_pack_path(gr, st) + "/params";
     }
-    static std::string basecall_model_path(const std::string& bc_gr, unsigned st)
+    static std::string basecall_alignment_path(std::string const & gr)
     {
-        return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
-            + basecall_strand_subgroup(st) + "/Model";
+        return basecall_strand_group_path(gr, 2) + "/Alignment";
     }
-    static std::string basecall_model_file_path(const std::string& bc_gr, unsigned st)
+    static std::string basecall_alignment_pack_path(std::string const & gr)
     {
-        assert(st < 2);
-        return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr
-            + "/Summary/basecall_1d_" + (st == 0? "template" : "complement") + "/model_file";
+        return basecall_alignment_path(gr) + "_Pack";
     }
-    static std::string basecall_events_path(const std::string& bc_gr, unsigned st)
+    static std::string basecall_config_path(std::string const & gr)
     {
-        return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
-            + basecall_strand_subgroup(st) + "/Events";
+        return basecall_group_path(gr) + "/Configuration";
     }
-    static std::string basecall_event_alignment_path(const std::string& bc_gr)
+    static std::string basecall_summary_path(std::string const & gr)
     {
-        return basecall_root_path() + "/" + basecall_group_prefix() + bc_gr + "/"
-            + basecall_strand_subgroup(2) + "/Alignment";
+        return basecall_group_path(gr) + "/Summary";
     }
+    //
+    // Packers
+    //
+    static Huffman_Packer const & rw_coder()          { return Huffman_Packer::get_coder("fast5_rw_1"); }
+    static Huffman_Packer const & ed_skip_coder()     { return Huffman_Packer::get_coder("fast5_ed_skip_1"); }
+    static Huffman_Packer const & ed_len_coder()      { return Huffman_Packer::get_coder("fast5_ed_len_1"); }
+    static Huffman_Packer const & fq_bp_coder()       { return Huffman_Packer::get_coder("fast5_fq_bp_1"); }
+    static Huffman_Packer const & fq_qv_coder()       { return Huffman_Packer::get_coder("fast5_fq_qv_1"); }
+    static Huffman_Packer const & ev_rel_skip_coder() { return Huffman_Packer::get_coder("fast5_ev_rel_skip_1"); }
+    static Huffman_Packer const & ev_move_coder()     { return Huffman_Packer::get_coder("fast5_ev_move_1"); }
+    static Bit_Packer     const & bit_packer()        { return Bit_Packer::get_packer(); }
 }; // class File
 
 } // namespace fast5
diff --git a/src/fast5_version.hpp b/src/fast5_version.hpp
new file mode 100644
index 0000000..5bb54e1
--- /dev/null
+++ b/src/fast5_version.hpp
@@ -0,0 +1,16 @@
+#ifndef __FAST5_VERSION_HPP
+#define __FAST5_VERSION_HPP
+
+namespace fast5
+{
+
+namespace
+{
+
+static char const * const version = "0.6.2";
+
+}
+
+}
+
+#endif
diff --git a/src/hdf5-mod.cpp b/src/hdf5-mod.cpp
index 314bdb3..818d36d 100644
--- a/src/hdf5-mod.cpp
+++ b/src/hdf5-mod.cpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #include <cassert>
 #include <iostream>
 #include <string>
@@ -226,6 +233,10 @@ int main(int argc, char* argv[])
                 clog << a << endl;
             }
 
+            // reopen for reading
+            f.close();
+            f.open(file_name);
+
             //
             // test reading compound
             //
@@ -295,6 +306,7 @@ int main(int argc, char* argv[])
                 }
 
             }
+            f.close();
         }
         catch (hdf5_tools::Exception& e)
         {
diff --git a/src/hdf5_tools.hpp b/src/hdf5_tools.hpp
index 252b1a2..6fa9124 100644
--- a/src/hdf5_tools.hpp
+++ b/src/hdf5_tools.hpp
@@ -1,7 +1,8 @@
 //
-// The MIT License (MIT)
+// Part of: https://github.com/mateidavid/fast5
 //
-// Copyright (c) 2015 Matei David, Ontario Institute for Cancer Research
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
 //
 
 #ifndef __HDF5_TOOLS_HPP
@@ -20,6 +21,7 @@
 #include <deque>
 #include <set>
 #include <map>
+#include <queue>
 #include <limits>
 #include <type_traits>
 
@@ -413,6 +415,11 @@ struct Util
                     [] (void * vp) { return *reinterpret_cast< H5T_class_t * >(vp) != H5T_NO_CLASS; }
                   }
                 },
+                { (void(*)())&H5Tget_cset,
+                  { "H5Tget_cset",
+                    [] (void * vp) { return *reinterpret_cast< H5T_cset_t * >(vp) != H5T_CSET_ERROR; }
+                  }
+                },
                 { (void(*)())&H5Tget_member_index,
                   { "H5Tget_member_index",
                     [] (void * vp) { return *reinterpret_cast< int * >(vp) >= 0; }
@@ -453,6 +460,11 @@ struct Util
                     [] (void * vp) { return *reinterpret_cast< htri_t * >(vp) >= 0; }
                   }
                 },
+                { (void(*)())&H5Tset_cset,
+                  { "H5Tset_cset",
+                    [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
+                  }
+                },
                 { (void(*)())&H5Tset_size,
                   { "H5Tset_size",
                     [] (void * vp) { return *reinterpret_cast< herr_t * >(vp) >= 0; }
@@ -831,6 +843,7 @@ struct Reader_Base
         {
             file_dtype_is_vlen_str = false;
         }
+        // datatype size
         file_dtype_size = Util::wrap(H5Tget_size, file_dtype_id_holder.id);
     }
     HDF_Object_Holder obj_id_holder;
@@ -877,10 +890,13 @@ struct String_reader
         HDF_Object_Holder mem_dtype_id_holder;
         if (file_stype_class == H5T_STRING) // stored as a string
         {
+            auto file_stype_cset = Util::wrap(H5Tget_cset, file_stype_id);
             if (Util::wrap(H5Tis_variable_str, file_stype_id)) // stored as a varlen string
             {
                 // compute mem_type
-                mem_dtype_id_holder = mem_type_wrapper(Util::make_str_type(-1));
+                auto mem_stype_id_holder = Util::make_str_type(-1);
+                Util::wrap(H5Tset_cset, mem_stype_id_holder.id, file_stype_cset);
+                mem_dtype_id_holder = mem_type_wrapper(std::move(mem_stype_id_holder));
                 // prepare buffer to receive data
                 std::vector< char * > charptr_buff(res.size(), nullptr);
                 // perform the read
@@ -899,7 +915,9 @@ struct String_reader
             {
                 // compute mem_type
                 size_t file_stype_size = Util::wrap(H5Tget_size, file_stype_id);
-                mem_dtype_id_holder = mem_type_wrapper(Util::make_str_type(file_stype_size + 1));
+                auto mem_stype_id_holder = Util::make_str_type(file_stype_size + 1);
+                Util::wrap(H5Tset_cset, mem_stype_id_holder.id, file_stype_cset);
+                mem_dtype_id_holder = mem_type_wrapper(std::move(mem_stype_id_holder));
                 // prepare buffer to receieve data
                 std::vector< char > char_buff(res.size() * (file_stype_size + 1), '\0');
                 // perform the read
@@ -999,6 +1017,8 @@ struct Reader_helper< 2, Data_Type >
             and not reader_base.file_dtype_is_vlen_str)
         {
             HDF_Object_Holder mem_dtype_id_holder(Util::make_str_type(sizeof(Data_Type)));
+            auto file_dtype_cset = Util::wrap(H5Tget_cset, reader_base.file_dtype_id_holder.id);
+            Util::wrap(H5Tset_cset, mem_dtype_id_holder.id, file_dtype_cset);
             reader_base.reader(mem_dtype_id_holder.id, out);
         }
         else // conversion needed
@@ -1409,6 +1429,8 @@ struct Writer< std::vector< In_Data_Type > >
 class File
 {
 public:
+    typedef std::map< std::string, std::string > Attr_Map;
+
     File() : _file_id(0) {}
     File(const std::string& file_name, bool rw = false) : _file_id(0) { open(file_name, rw); }
     File(const File&) = delete;
@@ -1421,7 +1443,7 @@ public:
 
     void create(const std::string& file_name, bool truncate = false)
     {
-        assert(not is_open());
+        if (is_open()) close();
         _file_name = file_name;
         _rw = true;
         _file_id = H5Fcreate(file_name.c_str(), truncate? H5F_ACC_TRUNC : H5F_ACC_EXCL, H5P_DEFAULT, H5P_DEFAULT);
@@ -1429,7 +1451,7 @@ public:
     }
     void open(const std::string& file_name, bool rw = false)
     {
-        assert(not is_open());
+        if (is_open()) close();
         _file_name = file_name;
         _rw = rw;
         _file_id = H5Fopen(file_name.c_str(), not rw? H5F_ACC_RDONLY : H5F_ACC_RDWR, H5P_DEFAULT);
@@ -1437,8 +1459,8 @@ public:
     }
     void close()
     {
-        assert(is_open());
-        assert(H5Fget_obj_count(_file_id, H5F_OBJ_ALL | H5F_OBJ_LOCAL) == 1);
+        if (not is_open()) return;
+        if (H5Fget_obj_count(_file_id, H5F_OBJ_ALL | H5F_OBJ_LOCAL) != 1) throw Exception(_file_name + ": HDF5 memory leak");
         int status = H5Fclose(_file_id);
         if (status < 0) throw Exception(_file_name + ": error in H5Fclose");
         _file_id = 0;
@@ -1466,49 +1488,55 @@ public:
     }
 
     /// Check if a group exists
-    bool group_exists(const std::string& loc_full_name) const
+    bool group_exists(std::string const & loc_full_name) const
     {
         assert(is_open());
-        assert(not loc_full_name.empty() and loc_full_name[0] == '/');
-        std::string loc_path;
-        std::string loc_name;
-        std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+        assert(not loc_full_name.empty() and loc_full_name.front() == '/');
+        if (loc_full_name == "/") return true;
+        auto && loc = split_full_name(loc_full_name);
         // check all path elements exist, except for what is to the right of the last '/'
         // sets active path
-        if (not path_exists(loc_path)) return false;
-        return check_object_type(loc_full_name, H5O_TYPE_GROUP);
+        return path_exists(loc.first) and check_object_type(loc_full_name, H5O_TYPE_GROUP);
     }
     /// Check if a dataset exists
-    bool dataset_exists(const std::string& loc_full_name) const
+    bool dataset_exists(std::string const & loc_full_name) const
     {
         assert(is_open());
-        assert(not loc_full_name.empty() and loc_full_name[0] == '/');
-        std::string loc_path;
-        std::string loc_name;
-        std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+        assert(not loc_full_name.empty() and loc_full_name.front() == '/');
+        if (loc_full_name == "/") return false;
+        auto && loc = split_full_name(loc_full_name);
         // check all path elements exist, except for what is to the right of the last '/'
         // sets active path
-        if (not path_exists(loc_path)) return false;
-        return check_object_type(loc_full_name, H5O_TYPE_DATASET);
+        return path_exists(loc.first) and check_object_type(loc_full_name, H5O_TYPE_DATASET);
+    }
+    bool group_or_dataset_exists(std::string const & loc_full_name) const
+    {
+        assert(is_open());
+        assert(not loc_full_name.empty() and loc_full_name.front() == '/');
+        if (loc_full_name == "/") return true;
+        auto && loc = split_full_name(loc_full_name);
+        // check all path elements exist, except for what is to the right of the last '/'
+        // sets active path
+        return (path_exists(loc.first) and
+                (check_object_type(loc_full_name, H5O_TYPE_DATASET) or
+                 check_object_type(loc_full_name, H5O_TYPE_GROUP)));
     }
     /// Check if attribute exists
-    bool attribute_exists(const std::string& loc_full_name) const
+    bool attribute_exists(std::string const & loc_full_name) const
     {
         assert(is_open());
-        assert(not loc_full_name.empty() and loc_full_name[0] == '/');
-        std::string loc_path;
-        std::string loc_name;
-        std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
-        int status;
+        assert(not loc_full_name.empty() and loc_full_name.front() == '/');
+        if (loc_full_name == "/") return false;
+        auto && loc = split_full_name(loc_full_name);
         // check all path elements exist, except for what is to the right of the last '/'
         // sets active path
-        if (not path_exists(loc_path)) return false;
+        if (not group_or_dataset_exists(loc.first)) return false;
         // check if target is an attribute
-        status = H5Aexists_by_name(_file_id, loc_path.c_str(), loc_name.c_str(), H5P_DEFAULT);
+        int status = H5Aexists_by_name(_file_id, loc.first.c_str(), loc.second.c_str(), H5P_DEFAULT);
         if (status < 0) throw Exception("error in H5Aexists_by_name");
         return status > 0;
     }
-    bool exists(const std::string& loc_full_name) const
+    bool exists(std::string const & loc_full_name) const
     {
         return attribute_exists(loc_full_name) or dataset_exists(loc_full_name);
     }
@@ -1519,14 +1547,12 @@ public:
     {
         assert(is_open());
         assert(not loc_full_name.empty() and loc_full_name[0] == '/');
-        std::string loc_path;
-        std::string loc_name;
-        std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+        auto && loc = split_full_name(loc_full_name);
         Exception::active_path() = loc_full_name;
         detail::HDF_Object_Holder grp_id_holder(
-            detail::Util::wrap(H5Oopen, _file_id, loc_path.c_str(), H5P_DEFAULT),
+            detail::Util::wrap(H5Oopen, _file_id, loc.first.c_str(), H5P_DEFAULT),
             detail::Util::wrapped_closer(H5Oclose));
-        detail::Reader< Data_Storage >()(grp_id_holder.id, loc_name,
+        detail::Reader< Data_Storage >()(grp_id_holder.id, loc.second,
                                          out, std::forward< Args >(args)...);
     }
     /// Write attribute or dataset
@@ -1537,16 +1563,13 @@ public:
         assert(is_rw());
         assert(not loc_full_name.empty() and loc_full_name[0] == '/');
         assert(not exists(loc_full_name));
-        std::string loc_path;
-        std::string loc_name;
-        std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+        auto && loc = split_full_name(loc_full_name);
         Exception::active_path() = loc_full_name;
         detail::HDF_Object_Holder grp_id_holder;
-        std::string grp_path = loc_path != "/"? loc_path.substr(0, loc_path.size() - 1) : "/";
-        if (group_exists(grp_path) or dataset_exists(grp_path))
+        if (group_or_dataset_exists(loc.first))
         {
             grp_id_holder.load(
-                detail::Util::wrap(H5Oopen, _file_id, grp_path.c_str(), H5P_DEFAULT),
+                detail::Util::wrap(H5Oopen, _file_id, loc.first.c_str(), H5P_DEFAULT),
                 detail::Util::wrapped_closer(H5Oclose));
         }
         else
@@ -1556,10 +1579,10 @@ public:
                 detail::Util::wrapped_closer(H5Pclose));
             detail::Util::wrap(H5Pset_create_intermediate_group, lcpl_id_holder.id, 1);
             grp_id_holder.load(
-                detail::Util::wrap(H5Gcreate2, _file_id, grp_path.c_str(), lcpl_id_holder.id, H5P_DEFAULT, H5P_DEFAULT),
+                detail::Util::wrap(H5Gcreate2, _file_id, loc.first.c_str(), lcpl_id_holder.id, H5P_DEFAULT, H5P_DEFAULT),
                 detail::Util::wrapped_closer(H5Gclose));
         }
-        detail::Writer< In_Data_Storage >()(grp_id_holder.id, loc_name, as_ds, in, std::forward< Args >(args)...);
+        detail::Writer< In_Data_Storage >()(grp_id_holder.id, loc.second, as_ds, in, std::forward< Args >(args)...);
     }
     template < typename In_Data_Storage, typename ...Args >
     void write_dataset(const std::string& loc_full_name, const In_Data_Storage& in, Args&& ...args) const
@@ -1601,7 +1624,7 @@ public:
     {
         std::vector< std::string > res;
         Exception::active_path() = loc_full_name;
-        assert(group_exists(loc_full_name) or dataset_exists(loc_full_name));
+        assert(group_or_dataset_exists(loc_full_name));
         detail::HDF_Object_Holder id_holder(
             detail::Util::wrap(H5Oopen, _file_id, loc_full_name.c_str(), H5P_DEFAULT),
             detail::Util::wrapped_closer(H5Oclose));
@@ -1619,6 +1642,43 @@ public:
         }
         return res;
     } // get_attr_list
+    Attr_Map
+    get_attr_map(std::string const & path, bool recurse = false) const
+    {
+        Attr_Map res;
+        std::queue< std::string > q;
+        q.push("");
+        while (not q.empty())
+        {
+            auto pt = q.front();
+            q.pop();
+            auto full_path = pt.empty()? path : path + "/" + pt;
+            auto a_list = get_attr_list(full_path);
+            for (auto const & a : a_list)
+            {
+                std::string tmp;
+                read(full_path + "/" + a, tmp);
+                res[pt.empty()? a : pt + "/" + a] = tmp;
+            }
+            if (recurse and group_exists(full_path))
+            {
+                auto sg_l = list_group(full_path);
+                for (auto const & sg : sg_l)
+                {
+                    q.push(pt.empty()? sg : pt + "/" + sg);
+                }
+            }
+        }
+        return res;
+    } // get_attr_map()
+    void
+    add_attr_map(std::string const & path, Attr_Map const & attr_m) const
+    {
+        for (auto const & p : attr_m)
+        {
+            write_attribute(path + "/" + p.first, p.second);
+        }
+    } // add_attr_map()
     /// Return a list of struct field names in the given dataset/attribute
     std::vector< std::string > get_struct_members(const std::string& loc_full_name) const
     {
@@ -1630,11 +1690,9 @@ public:
         detail::HDF_Object_Holder type_id_holder;
         if (attribute_exists(loc_full_name))
         {
-            std::string loc_path;
-            std::string loc_name;
-            std::tie(loc_path, loc_name) = split_full_name(loc_full_name);
+            auto && loc = split_full_name(loc_full_name);
             attr_id_holder.load(
-                detail::Util::wrap(H5Aopen_by_name, _file_id, loc_path.c_str(), loc_name.c_str(),
+                detail::Util::wrap(H5Aopen_by_name, _file_id, loc.first.c_str(), loc.second.c_str(),
                                    H5P_DEFAULT, H5P_DEFAULT),
                 detail::Util::wrapped_closer(H5Aclose));
             type_id_holder.load(
@@ -1664,35 +1722,173 @@ public:
         return res;
     } // get_struct_members
 
+    /*
+    static void copy(File & src_f, File & dst_f, std::string const & path, bool shallow = false)
+    {
+        assert(src_f.is_open());
+        assert(dst_f.is_open());
+        assert(dst_f.is_rw());
+        assert(src_f.group_exists(path) or src_f.dataset_exists(path));
+        assert(not (dst_f.group_exists(path) or dst_f.dataset_exists(path)));
+        detail::HDF_Object_Holder ocpypl_id_holder(
+            detail::Util::wrap(H5Pcreate, H5P_OBJECT_COPY),
+            detail::Util::wrapped_closer(H5Pclose));
+        auto status = hdf5::H5Pset_copy_object(ocpypl_id_holder.id, H5O_COPY_MERGE_COMMITTED_DTYPE_FLAG);
+        if (status < 0) throw Exception("error in H5Pset_copy_object");
+        if (shallow)
+        {
+            status = hdf5::H5Pset_copy_object(ocpypl_id_holder.id, H5O_COPY_SHALLOW_HIERARCHY_FLAG);
+            if (status < 0) throw Exception("error in H5Pset_copy_object");
+        }
+        auto res = hdf5::H5Ocopy(src_f._file_id, path.c_str(),
+                                 dst_f._file_id, path.c_str(),
+                                 ocpypl_id_holder.id, H5P_DEFAULT);
+        if (res < 0) throw Exception("error in H5Ocopy");
+    } // copy
+    */
+
+    static void copy_attribute(File const & src_f, File const & dst_f,
+                               std::string const & src_full_path, std::string const & _dst_full_path = std::string())
+    {
+        if (not src_f.is_open()) throw Exception("source file not open");
+        if (not dst_f.is_open()) throw Exception("destination file not open");
+        if (not dst_f.is_rw()) throw Exception("destination file not writeable");
+        std::string const & dst_full_path = (_dst_full_path.empty()? src_full_path : _dst_full_path);
+        if (not src_f.attribute_exists(src_full_path)) throw Exception("source attribute missing");
+        if (dst_f.group_or_dataset_exists(dst_full_path) or
+            dst_f.attribute_exists(dst_full_path)) throw Exception("destination path exists");
+        // compute paths
+        auto && src_path = split_full_name(src_full_path);
+        auto && dst_path = split_full_name(dst_full_path);
+        // open source attribute
+        detail::HDF_Object_Holder src_attr_id_holder(
+            detail::Util::wrap(H5Aopen_by_name, src_f._file_id, src_path.first.c_str(), src_path.second.c_str(),
+                               H5P_DEFAULT, H5P_DEFAULT),
+            detail::Util::wrapped_closer(H5Aclose));
+        // open source attribute datatype
+        detail::HDF_Object_Holder src_attr_dtype_id_holder(
+            detail::Util::wrap(H5Aget_type, src_attr_id_holder.id),
+            detail::Util::wrapped_closer(H5Tclose));
+        if (hdf5::H5Tget_class(src_attr_dtype_id_holder.id) == H5T_INTEGER)
+        {
+            if (hdf5::H5Tget_sign(src_attr_dtype_id_holder.id) == H5T_SGN_NONE)
+            {
+                unsigned long long tmp;
+                src_f.read(src_full_path, tmp);
+                dst_f.write_attribute(dst_full_path, tmp, src_attr_dtype_id_holder.id);
+            }
+            else if (hdf5::H5Tget_sign(src_attr_dtype_id_holder.id) == H5T_SGN_2)
+            {
+                long long tmp;
+                src_f.read(src_full_path, tmp);
+                dst_f.write_attribute(dst_full_path, tmp, src_attr_dtype_id_holder.id);
+            }
+            else
+            {
+                throw Exception("error in H5Tget_sign");
+            }
+        }
+        else if (hdf5::H5Tget_class(src_attr_dtype_id_holder.id) == H5T_FLOAT)
+        {
+            long double tmp;
+            src_f.read(src_full_path, tmp);
+            dst_f.write_attribute(dst_full_path, tmp, src_attr_dtype_id_holder.id);
+        }
+        else if (hdf5::H5Tget_class(src_attr_dtype_id_holder.id) == H5T_STRING)
+        {
+            std::string tmp;
+            src_f.read(src_full_path, tmp);
+            auto is_varlen = hdf5::H5Tis_variable_str(src_attr_dtype_id_holder.id);
+            if (is_varlen < 0) throw Exception("error in H5Tis_variable_str");
+            if (is_varlen)
+            {
+                dst_f.write_attribute(dst_full_path, tmp, -1);
+            }
+            else
+            {
+                // not varlen; now deal with array-of-size-1 chars
+                int sz = hdf5::H5Tget_size(src_attr_dtype_id_holder.id);
+                if (sz == 0) throw Exception("error in H5Tget_size");
+                detail::HDF_Object_Holder src_attr_dspace_id_holder(
+                    detail::Util::wrap(H5Aget_space, src_attr_id_holder.id),
+                    detail::Util::wrapped_closer(H5Sclose));
+                auto dspace_type = hdf5::H5Sget_simple_extent_type(src_attr_dspace_id_holder.id);
+                if (dspace_type == H5S_SCALAR)
+                {
+                    dst_f.write_attribute(dst_full_path, tmp, 0);
+                }
+                else if (dspace_type == H5S_SIMPLE)
+                {
+                    if (sz != 1) throw Exception("unsupported attribute type for copying: extent of string of size > 1");
+                    std::vector< char[1] > tmp_v(tmp.size());
+                    for (unsigned i = 0; i < tmp.size(); ++i)
+                    {
+                        tmp_v[i][0] = tmp[i];
+                    }
+                    dst_f.write_attribute(dst_full_path, tmp_v);
+                }
+                else
+                {
+                    throw Exception("error in H5Sget_simple_extent_type");
+                }
+            }
+        }
+        else
+        {
+            throw Exception("unsupported attribute type for copying");
+        }
+    } // copy_attribute
+
+    static void
+    copy_attributes(File const & src_f, File const & dst_f, std::string const & path, bool recurse = false)
+    {
+        auto a_l = src_f.get_attr_list(not path.empty()? path : std::string("/"));
+        for (auto const & a : a_l)
+        {
+            copy_attribute(src_f, dst_f, path + "/" + a);
+        }
+        if (not recurse) return;
+        auto sg_l = src_f.list_group(not path.empty()? path : std::string("/"));
+        for (auto const & sg : sg_l)
+        {
+            if (src_f.group_exists(path + "/" + sg))
+            {
+                copy_attributes(src_f, dst_f, path + "/" + sg, true);
+            }
+        }
+    } // copy_attributes()
 private:
     std::string _file_name;
     hid_t _file_id;
     bool _rw;
 
     /// Split a full name into path and name
+    /// full_name must begin with '/', and not end with '/' unless it equals "/"
     static std::pair< std::string, std::string > split_full_name(const std::string& full_name)
     {
-        auto last_slash_pos = full_name.find_last_of('/');
-        std::string path = last_slash_pos != std::string::npos? full_name.substr(0, last_slash_pos + 1) : std::string();
-        std::string name = last_slash_pos != std::string::npos? full_name.substr(last_slash_pos + 1) : full_name;
-        return std::make_pair(path, name);
+        assert(not full_name.empty() and
+               full_name.front() == '/' and
+               (full_name.size() == 1 or full_name.back() != '/'));
+        if (full_name == "/") return std::make_pair(std::string("/"), std::string());
+        auto pos = full_name.find_last_of('/');
+        return (pos != std::string::npos
+                ? std::make_pair(full_name.substr(0, pos > 0? pos : 1), full_name.substr(pos + 1))
+                : std::make_pair(std::string(), std::string()));
     } // split_full_name
 
     /// Determine if a path to an element exists
-    bool path_exists(const std::string& full_path_name) const
+    bool path_exists(std::string const & full_path_name) const
     {
         assert(is_open());
-        assert(not full_path_name.empty()
-               and full_path_name[0] == '/'
-               and full_path_name[full_path_name.size() - 1] == '/');
+        assert(not full_path_name.empty() and full_path_name.front() == '/');
+        if (full_path_name == "/") return true;
         Exception::active_path() = full_path_name;
         // check all path elements exist, except for what is to the right of the last '/'
         size_t pos = 0;
-        while (true)
+        while (pos != std::string::npos)
         {
             ++pos;
             pos = full_path_name.find('/', pos);
-            if (pos == std::string::npos) break;
             std::string tmp = full_path_name.substr(0, pos);
             // check link exists
             if (not detail::Util::wrap(H5Lexists, _file_id, tmp.c_str(), H5P_DEFAULT)) return false;
@@ -1711,7 +1907,7 @@ private:
     } // path_exists()
 
     /// Check if a group exists
-    bool check_object_type(const std::string& loc_full_name, H5O_type_t type_id) const
+    bool check_object_type(std::string const & loc_full_name, H5O_type_t type_id) const
     {
         // check link exists
         if (loc_full_name != "/"
diff --git a/src/huffman-decode.cpp b/src/huffman-decode.cpp
new file mode 100644
index 0000000..80af4a7
--- /dev/null
+++ b/src/huffman-decode.cpp
@@ -0,0 +1,55 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "fast5_pack.hpp"
+#include "logger.hpp"
+
+using namespace std;
+
+int main(int argc, char * argv[])
+{
+    logger::Logger::set_default_level(logger::level::debug);
+    if (argc != 2)
+    {
+        cerr << "use: " << argv[0] << " <codeword_file>" << endl;
+        exit(EXIT_FAILURE);
+    }
+    string cw_fn = argv[1];
+    ifstream cw_f(cw_fn);
+    fast5_pack::Huffman_Diff_Coder hc(cw_f, cw_fn);
+    string l;
+    map< string, string > cw_v_id;
+    vector< uint8_t > cw_v;
+    while (getline(cin, l))
+    {
+        if (l[0] == '#')
+        {
+            istringstream iss(l.substr(1));
+            string tmp0;
+            string tmp1;
+            getline(iss, tmp0, '=');
+            iss >> tmp1;
+            cw_v_id[tmp0] = tmp1;
+        }
+        else
+        {
+            unsigned x;
+            istringstream(l) >> x;
+            cw_v.push_back(x);
+        }
+    }
+    auto val_v = hc.decode<int16_t>(cw_v, cw_v_id);
+    for (auto x : val_v)
+    {
+        cout << x << endl;
+    }
+}
diff --git a/src/huffman-encode.cpp b/src/huffman-encode.cpp
new file mode 100644
index 0000000..20e4c33
--- /dev/null
+++ b/src/huffman-encode.cpp
@@ -0,0 +1,44 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
+#include <iostream>
+#include <fstream>
+#include <string>
+#include <vector>
+
+#include "fast5_pack.hpp"
+#include "logger.hpp"
+
+using namespace std;
+
+int main(int argc, char * argv[])
+{
+    logger::Logger::set_default_level(logger::level::debug);
+    if (argc != 2)
+    {
+        cerr << "use: " << argv[0] << " <codeword_file>" << endl;
+        exit(EXIT_FAILURE);
+    }
+    string cw_fn = argv[1];
+    ifstream cw_f(cw_fn);
+    fast5_pack::Huffman_Diff_Coder hc(cw_f, cw_fn);
+    int16_t x;
+    std::vector< int16_t > val_v;
+    while (cin >> x)
+    {
+        val_v.push_back(x);
+    }
+    auto p = hc.encode(val_v);
+    for (auto const & p2 : p.second)
+    {
+        cout << "#" << p2.first << "=" << p2.second << endl;
+    }
+    for (auto y : p.first)
+    {
+        cout << (int)y << endl;
+    }
+}
diff --git a/src/hufftk b/src/hufftk
new file mode 100755
index 0000000..9706b2e
--- /dev/null
+++ b/src/hufftk
@@ -0,0 +1,171 @@
+#!/usr/bin/env python2
+
+import argparse
+import bisect
+import collections
+import logging
+import os
+
+import fast5
+
+import signal
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+def construct_codewords(val_d, cw_d, n, s, rg):
+    if n < rg[1] + 1:
+        cw_d[n] = s
+    else:
+        construct_codewords(val_d, cw_d, val_d[n][1][0], s + "0", rg)
+        construct_codewords(val_d, cw_d, val_d[n][1][1], s + "1", rg)
+
+def codeword_sort(s0, s1):
+    if len(s0) == len(s1):
+        return [-1, 1][s0 > s1]
+    else:
+        return len(s0) - len(s1)
+
+def load_rw(args):
+    # construct histogram
+    val_d = {val: [1, None] for val in range(args.rw_range[0] - 1, args.rw_range[1] + 1)}
+    #val_d[args.rw_range[0] - 1] = [1, None]
+    for fn in args.input:
+        f = fast5.File(fn)
+        if not f.have_raw_samples():
+            continue
+        a = f.get_raw_int_samples()
+        for i in range(1, len(a)):
+            val = a[i] - a[i-1]
+            if args.rw_range[0] <= val and val <= args.rw_range[1]:
+                val_idx = val
+            else:
+                val_idx = args.rw_range[0] - 1
+            val_d[val_idx][0] += 1
+    return val_d
+
+def load_ed_len(args):
+    val_d = {val: [1, None] for val in range(args.ed_len_range[0] - 1, args.ed_len_range[1] + 1)}
+    #val_d[args.ed_len_range[0] - 1] = [1, None]
+    val_sum = 0
+    val_cnt = 0
+    for fn in args.input:
+        f = fast5.File(fn)
+        if not f.have_eventdetection_events(args.gr):
+            continue
+        d = f.get_eventdetection_events(args.gr)
+        for e in d:
+            val = e.length
+            val_sum += val
+            val_cnt += 1
+            if args.ed_len_range[0] <= val and val <= args.ed_len_range[1]:
+                val_idx = val
+            else:
+                val_idx = args.ed_len_range[0] - 1
+            val_d[val_idx][0] += 1
+    logger.debug("mean val: " + str(float(val_sum)/val_cnt))
+    return val_d
+
+def load_fq_qv(args):
+    val_d = {val: [1, None] for val in range(args.fq_qv_range[0] - 1, args.fq_qv_range[1] + 1)}
+    val_sum = 0
+    val_cnt = 0
+    for fn in args.input:
+        f = fast5.File(fn)
+        if not f.have_basecall_fastq(0, args.gr):
+            continue
+        fq = f.get_basecall_fastq(0, args.gr)
+        qv = fq.split()[3]
+        for c in qv:
+            val = ord(c) - 33
+            val_sum += val
+            val_cnt += 1
+            if args.fq_qv_range[0] <= val and val <= args.fq_qv_range[1]:
+                val_idx = val
+            else:
+                val_idx = args.fq_qv_range[0] - 1
+            val_d[val_idx][0] += 1
+    logger.debug("mean val: " + str(float(val_sum)/val_cnt))
+    return val_d
+
+def run_build_tree(val_d, rg):
+    # dump histogram
+    #for val in range(-args.range_width, args.range_width + 1):
+    #    print('%s\t%s\t%s' % (val, val_d[val], float(val_d[val])/val_count))
+    # initialize codes
+    kw_l = [(val_d[val][0], val) for val in range(rg[0] - 1, rg[1] + 1)]
+    kw_l.sort()
+    logger.debug("smallest frequency: " + str(kw_l[:10]))
+    logger.debug("highest frequency: " + str(kw_l[-10:]))
+    next_node = rg[1] + 1
+    # main loop
+    while len(kw_l) > 1:
+        e = list()
+        for i in range(2):
+            e.append(kw_l[0])
+            del kw_l[0]
+        logger.debug('e=' + str(e))
+        val_d[next_node] = [e[0][0] + e[1][0], [e[0][1], e[1][1]]]
+        logger.debug('next_node=' + str(next_node) + ' val=' + str(val_d[next_node]))
+        bisect.insort(kw_l, (val_d[next_node][0], next_node))
+        next_node += 1
+    # construct codewords
+    assert kw_l[0][1] > rg[1]
+    cw_d = dict()
+    construct_codewords(val_d, cw_d, kw_l[0][1], "", rg)
+    cw_key_l = cw_d.keys()
+    cw_key_l.sort(lambda w0, w1: codeword_sort(cw_d[w0], cw_d[w1]))
+    print("{")
+    for cw in cw_key_l:
+        print('"%s", "%s",' % (['.', cw][cw != rg[0] - 1], cw_d[cw]))
+    print("}")
+
+if __name__ == "__main__":
+    description = """
+    Toolkit for constructing Huffman codes for encoding fast5 files.
+    """
+    parser = argparse.ArgumentParser(description=description, epilog="")
+    parser.add_argument("--log-level", default="info",
+                        help="log level")
+    parser.add_argument("--gr", type=str, default="",
+                        help="Group")
+    parser.add_argument("--rw-range", default=[-100,100],
+                        help="Encoding range for raw sample differences.")
+    parser.add_argument("--ed-skip-range", default=[0,1],
+                        help="Encoding range for ed skip values.")
+    parser.add_argument("--ed-len-range", default=[1,100],
+                        help="Encoding range for ed length values.")
+    parser.add_argument("--fq-qv-range", default=[0,31],
+                        help="Encoding range for fq qv values.")
+    parser.add_argument("command", choices=["rw", "ed-skip", "ed-len", "fq-qv"])
+    parser.add_argument("input", nargs="*",
+                        help="Fast5 file")
+    args = parser.parse_args()
+
+    numeric_log_level = getattr(logging, args.log_level.upper(), None)
+    if not isinstance(numeric_log_level, int):
+        raise ValueError("Invalid log level: '%s'" % args.log_level)
+    logging.basicConfig(level=numeric_log_level,
+                        format="%(asctime)s %(name)s.%(levelname)s %(message)s",
+                        datefmt="%Y/%m/%d %H:%M:%S")
+    logger = logging.getLogger(os.path.basename(__file__))
+    logger.debug("args: " + str(args))
+
+    if type(args.rw_range) != list:
+        args.rw_range = list((int(i) for i in args.rw_range.split(',')))[:2]
+        assert args.rw_range[0] < args.rw_range[1]
+    if type(args.ed_skip_range) != list:
+        args.ed_skip_range = list((int(i) for i in args.ed_skip_range.split(',')))[:2]
+        assert args.ed_skip_range[0] < args.ed_skip_range[1]
+    if type(args.ed_len_range) != list:
+        args.ed_len_range = list((int(i) for i in args.ed_len_range.split(',')))[:2]
+        assert args.ed_len_range[0] < args.ed_len_range[1]
+    logger.debug("args: " + str(args))
+
+    if args.command == "rw":
+        d = load_rw(args)
+        run_build_tree(d, args.rw_range)
+    elif args.command == "ed-len":
+        d = load_ed_len(args)
+        run_build_tree(d, args.ed_len_range)
+    elif args.command == "fq-qv":
+        d = load_fq_qv(args)
+        run_build_tree(d, args.fq_qv_range)
diff --git a/src/logger.hpp b/src/logger.hpp
new file mode 100644
index 0000000..2c69592
--- /dev/null
+++ b/src/logger.hpp
@@ -0,0 +1,378 @@
+/// Part of: https://github.com/mateidavid/hpptools
+/// Commit: 5a6f39c
+
+/// @author    Matei David, Ontario Institute for Cancer Research
+/// @version   1.0
+/// @date      2013-2017
+/// @copyright MIT Public License
+///
+/// Logger mechanism.
+///
+/// Properties:
+/// - thread-safe, non-garbled output (uses c++11's thread_local)
+/// - customizable ostream sink. by default, uses std::clog
+///
+/// Exports:
+/// - macro: LOG (takes 1, 2, or 3 arguments, see below)
+/// - macros: LOG_EXIT_, LOG_ABORT, LOG_EXIT, LOG_THROW_, and LOG_THROW
+/// - namespace logger
+/// - enum logger::level
+/// - class logger::Logger
+///
+/// To use:
+/// - In source code, use:
+///
+///     LOG(info) << "hello" << endl;
+///     // or 
+///     LOG("main", info) << "hello" << endl;
+///     // or 
+///     LOG("main", info, sink_os) << "hello" << endl;
+///
+///   Here, "main" is the facility (a string) and info is the message level.
+///   Note that "logger" is a macro which knows how to look up the name info
+///   inside logger namespace. The macro introduces C++ code equivalent to:
+///
+///     if (...message should be ignored...) then; else sink_os
+///
+///   NOTE: As with assert(), the code in the output stream following the
+///   LOG() macro will ***not be executed*** if the log level of the
+///   facility is higher than the level of the message.
+///
+/// - To set the default log level (for unspecified facilities):
+///
+///     logger::Logger::set_default_level(logger::Logger::level_from_string(s));
+///
+/// - To set the log level for the "main" facility:
+///
+///     logger::Logger::set_facility_level("main", logger::Logger::level_from_string(s));
+///
+/// - To parse a log facility level setting in the form "[<facility>:]<level>":
+///
+///     logger::Logger::set_level_from_option("alt:debug1", &cerr);
+///
+/// - By using these functions, one can set log levels using command-line
+///   parameters and achieve dynamic log level settings without recompiling.
+///
+/// - The macros LOG_EXIT_, LOG_ABORT, LOG_EXIT, LOG_THROW_, and LOG_THROW
+///   provide a way to specify what to do after logging the message.
+///
+///     LOG_EXIT_(exit_code)  << "print this message to std::cerr, call std::exit(exit_code)";
+///     LOG_ABORT             << "print this message to std::cerr, call std::abort()";
+///     LOG_EXIT              << "print this message to std::cerr, call std::exit(EXIT_FAILURE)";
+///     LOG_THROW_(Exception) << "throw Exception with this message";
+///     LOG_THROW             << "throw std::runtime_error with this message";
+
+#ifndef __LOGGER_HPP
+#define __LOGGER_HPP
+
+#include <string>
+#include <vector>
+#include <map>
+#include <sstream>
+#include <iostream>
+#include <mutex>
+#include <stdexcept>
+
+namespace logger
+{
+
+// log levels
+enum level
+{
+    error = 0,
+    warning,
+    info,
+    debug,
+    debug1,
+    debug2
+};
+
+class Logger
+{
+public:
+    // Constructor: initialize buffer.
+    Logger(std::string const & facility, level msg_level,
+           std::string const & file_name, unsigned line_num, std::string const & func_name,
+           std::ostream & os = std::clog)
+        : _os_p(&os)
+    {
+        _oss << "= " << facility << "." << int(msg_level)
+             << " " << file_name << ":" << line_num << " " << func_name << " ";
+        _on_destruct = [&] () {
+            _os_p->write(_oss.str().c_str(), _oss.str().size());
+        };
+    }
+    // Constructor for exiting
+    Logger(int exit_code,
+           std::string const & file_name, unsigned line_num, std::string const & func_name,
+           std::ostream & os = std::cerr)
+        : _os_p(&os), _exit_code(exit_code)
+    {
+        _oss << file_name << ":" << line_num << " " << func_name << " ";
+        _on_destruct = [&] () {
+            _os_p->write(_oss.str().c_str(), _oss.str().size());
+            if (_exit_code < 0)
+            {
+                std::abort();
+            }
+            else
+            {
+                std::exit(_exit_code);
+            }
+        };
+    }
+    // Constructor for throwing exceptions
+    // first argument is only used to deduce the template argument type
+    template <typename Exception>
+    Logger(Exception const &,
+           std::string const & file_name, unsigned line_num, std::string const & func_name,
+           typename std::enable_if<std::is_base_of<std::exception, Exception>::value>::type * = 0)
+    {
+        _oss << file_name << ":" << line_num << " " << func_name << " ";
+        _on_destruct = [&] () {
+            throw Exception(_oss.str());
+        };
+    }
+    // Destructor: dump buffer to output.
+    ~Logger() noexcept(false)
+    {
+        _on_destruct();
+    }
+    // Produce l-value for output chaining.
+    std::ostream & l_value() { return _oss; }
+
+    // static methods for setting and getting facility log levels.
+    static level get_default_level()
+    {
+        return default_level();
+    }
+    static void set_default_level(level l)
+    {
+        static std::mutex m;
+        std::lock_guard<std::mutex> lg(m);
+        default_level() = l;
+    }
+    static void set_default_level(int l)
+    {
+        set_default_level(get_level(l));
+    }
+    static void set_default_level(std::string const & s)
+    {
+        set_default_level(get_level(s));
+    }
+    static level get_facility_level(std::string const & facility)
+    {
+        return (facility_level_map().count(facility) > 0?
+                facility_level_map().at(facility) : get_default_level());
+    }
+    static void set_facility_level(std::string const & facility, level l)
+    {
+        static std::mutex m;
+        std::lock_guard<std::mutex> lg(m);
+        facility_level_map()[facility] = l;
+    }
+    static void set_facility_level(std::string const & facility, int l)
+    {
+        set_facility_level(facility, get_level(l));
+    }
+    static void set_facility_level(std::string const & facility, std::string const & s)
+    {
+        set_facility_level(facility, get_level(s));
+    }
+    // static methods for setting log levels from command-line options
+    static void set_level_from_option(std::string const & l, std::ostream * os_p = nullptr)
+    {
+        size_t i = l.find(':');
+        if (i == std::string::npos)
+        {
+            set_default_level(l);
+            if (os_p)
+            {
+                (*os_p) << "set default log level to: "
+                        << static_cast<int>(Logger::get_default_level()) << std::endl;
+            }
+        }
+        else
+        {
+            set_facility_level(l.substr(0, i), l.substr(i + 1));
+            if (os_p)
+            {
+                (*os_p) << "set log level of '" << l.substr(0, i) << "' to: "
+                        << static_cast<int>(Logger::get_facility_level(l.substr(0, i))) << std::endl;
+            }
+        }
+    }
+    static void set_levels_from_options(std::vector<std::string> const & v, std::ostream * os_p = nullptr)
+    {
+        for (auto const & l : v)
+        {
+            set_level_from_option(l, os_p);
+        }
+    }
+    // public static utility functions (used by LOG macro)
+    static level get_level(level l) { return l; }
+    static level get_level(int i) { return static_cast<level>(i); }
+    static level get_level(std::string const & s) { return level_from_string(s); }
+    // public static member (used by LOG macro)
+    static level& thread_local_last_level()
+    {
+        static thread_local level _last_level = error;
+        return _last_level;
+    }
+private:
+    std::ostringstream _oss;
+    std::function<void()> _on_destruct;
+    std::ostream * _os_p;
+    int _exit_code;
+
+    // private static data members
+    static level & default_level()
+    {
+        static level _default_level = error;
+        return _default_level;
+    }
+    static std::map<std::string, level> & facility_level_map()
+    {
+        static std::map<std::string, level> _facility_level_map;
+        return _facility_level_map;
+    }
+    // private static utility functions
+    static level level_from_string(std::string const & s)
+    {
+        std::istringstream iss(s + "\n");
+        int tmp_int = -1;
+        iss >> tmp_int;
+        if (iss.good())
+        {
+            return level(tmp_int);
+        }
+        else
+        {
+            if (s == "error") return logger::error;
+            else if (s == "warning") return logger::warning;
+            else if (s == "info") return logger::info;
+            else if (s == "debug") return logger::debug;
+            else if (s == "debug1") return logger::debug1;
+            else if (s == "debug2") return logger::debug2;
+            else
+            {
+                std::ostringstream oss;
+                oss << "could not parse log level: " << s;
+                throw std::invalid_argument(oss.str());
+            }
+        }
+    }
+}; // class Logger
+
+} //namespace logger
+
+#define __FILENAME__ (std::string(__FILE__).find('/') != std::string::npos? std::string(__FILE__).substr(std::string(__FILE__).rfind('/') + 1) : std::string(__FILE__))
+
+/**
+ * LOG macro
+ *
+ * Synopsis:
+ *   LOG(facility, level_spec, sink) << message
+ *   LOG(facility, level_spec) << message
+ *   LOG(level_spec) << message
+ *
+ *   `facility`   : string
+ *   `level_spec` : integer, string, or logger level
+ *   `sink`       : sink ostream
+ * 
+ * Log to `facility` at logger level `level_spec` and dump output to `sink`.
+ * If sink is omitted, it defaults to std::clog.
+ * If `facility` is omitted (logger has single argument), the macro LOG_FACILITY
+ * is used instead, defaulting to "main".
+ */
+
+#define __LOG_3(facility, level_spec, sink)                                   \
+    { using namespace logger; logger::Logger::thread_local_last_level() = logger::Logger::get_level(level_spec); } \
+    if (logger::Logger::thread_local_last_level() > logger::Logger::get_facility_level(facility)) ; \
+    else logger::Logger(facility, logger::Logger::thread_local_last_level(), __FILENAME__, __LINE__, __func__, sink).l_value()
+
+#define __LOG_2(facility, level_spec)                                   \
+    { using namespace logger; logger::Logger::thread_local_last_level() = logger::Logger::get_level(level_spec); } \
+    if (logger::Logger::thread_local_last_level() > logger::Logger::get_facility_level(facility)) ; \
+    else logger::Logger(facility, logger::Logger::thread_local_last_level(), __FILENAME__, __LINE__, __func__).l_value()
+
+#define __LOG_1(level_spec) \
+    __LOG_2(LOG_FACILITY, level_spec)
+
+// we need 2-level indirection in order to trigger expansion after token pasting
+// http://stackoverflow.com/questions/1597007/creating-c-macro-with-and-line-token-concatenation-with-positioning-macr
+// http://stackoverflow.com/a/11763196/717706
+#ifdef WIN32
+#define __EXPAND(...) __VA_ARGS__
+#define __LOG_aux2(N, ...) __EXPAND(__LOG_ ## N (__VA_ARGS__))
+#else
+#define __LOG_aux2(N, ...) __LOG_ ## N (__VA_ARGS__)
+#endif
+
+#define __LOG_aux1(N, ...) __LOG_aux2(N, __VA_ARGS__)
+
+#define __NARGS_AUX(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, ...) _9
+
+#ifdef WIN32
+#define __NARGS(...) __EXPAND(__NARGS_AUX(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0))
+#else
+#define __NARGS(...) __NARGS_AUX(__VA_ARGS__, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 0)
+#endif
+
+#ifndef LOG_FACILITY
+#define LOG_FACILITY "main"
+#endif
+
+#define LOG(...) __LOG_aux1(__NARGS(__VA_ARGS__), __VA_ARGS__)
+
+#define LOG_EXIT_(exit_code) logger::Logger((exit_code), __FILENAME__, __LINE__, __func__).l_value()
+#define LOG_ABORT LOG_EXIT_(-1)
+#define LOG_EXIT LOG_EXIT_(EXIT_FAILURE)
+
+#define LOG_THROW_(Exception) logger::Logger(Exception(""), __FILENAME__, __LINE__, __func__).l_value()
+#define LOG_THROW LOG_THROW_(std::runtime_error)
+
+#endif
+
+#ifdef SAMPLE_LOGGER
+
+/*
+
+Compile:
+
+g++ -std=c++11 -D SAMPLE_LOGGER -x c++ logger.hpp -o sample-logger
+
+Run:
+./sample-logger info
+./sample-logger info alt:debug1
+
+*/
+
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+    if (argc < 2)
+    {
+        cerr << "Use: " << argv[0] << " <log_level_setting> ..." << endl
+             << "The program sends 5 log messages with decreasing priority (0=highest, 4=lowest)" << endl
+             << "to 2 facilities \"main\" and \"alt\". Command-line arguments are interpreted as" << endl
+             << "log facility level settings in the form [<facility>:]<level>." << endl;
+        return EXIT_FAILURE;
+    }
+    for (int i = 1; i < argc; ++i)
+    {
+        cerr << "processing argument [" << argv[i] << "]" << endl;
+        logger::Logger::set_level_from_option(argv[i], &cerr);
+    }
+    vector<string> const level_name{ "error", "warning", "info", "debug", "debug1", "debug2" };
+    for (int l = 0; l < 5; ++l)
+    {
+        LOG(level_name[l]) << "message at level " << l << " (" << level_name[l]
+                           << ") for facility main" << endl;
+        LOG("alt", l) << "message at level " << l << " (" << level_name[l]
+                      << ") for facility alt" << endl;
+    }
+}
+
+#endif
diff --git a/src/tmp.cpp b/src/tmp.cpp
index fb9a363..a767537 100644
--- a/src/tmp.cpp
+++ b/src/tmp.cpp
@@ -1,3 +1,10 @@
+//
+// Part of: https://github.com/mateidavid/fast5
+//
+// Copyright (c) 2015-2017 Matei David, Ontario Institute for Cancer Research
+// MIT License
+//
+
 #include <cassert>
 #include <exception>
 #include <functional>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fast5.git



More information about the debian-med-commit mailing list