[med-svn] [Git][med-team/nanopolish][upstream] New upstream version 0.11.0

Thu Jan 24 08:55:05 GMT 2019

Andreas Tille pushed to branch upstream at Debian Med / nanopolish


Commits:
64e79bf2 by Andreas Tille at 2019-01-24T06:33:42Z
New upstream version 0.11.0
- - - - -


28 changed files:

- .travis.yml
- Makefile
- README.md
- + docs/source/quickstart_polya.rst
- + etc/r9-models/r9.4_450bps.gpc.6mer.template.model
- scripts/nanopolish_makerange.py
- src/alignment/nanopolish_eventalign.cpp
- + src/builtin_models/r9_4_450bps_gpc_6mer_template_model.inl
- src/common/nanopolish_alphabet.cpp
- src/common/nanopolish_alphabet.h
- src/common/nanopolish_common.h
- src/common/nanopolish_fast5_io.cpp
- src/common/nanopolish_fast5_io.h
- src/common/nanopolish_variant.cpp
- src/nanopolish_call_methylation.cpp
- src/nanopolish_index.cpp
- src/nanopolish_methyltrain.cpp
- src/nanopolish_raw_loader.cpp
- src/nanopolish_read_db.cpp
- src/nanopolish_squiggle_read.cpp
- src/nanopolish_squiggle_read.h
- src/nanopolish_vcf2fasta.cpp
- src/pore_model/nanopolish_builtin_models.h
- src/thirdparty/scrappie/scrappie_common.c
- − src/thirdparty/scrappie/sse_mathfun.h
- − src/thirdparty/scrappie/util.c
- − src/thirdparty/scrappie/util.h
- src/training_core.cpp


Changes:

=====================================
.travis.yml
=====================================
@@ -1,11 +1,46 @@
 # travis.yml for github.com/jts/nanopolish
 
-language: cpp
+dist: trusty
+sudo: false
+language: generic
+cache: apt
+git:
+    depth: 1
+
+matrix:
+    include:
+    # Set env for both nanoplish and the dependency hdf5.
+    - env:
+        - CC=gcc-4.8
+        - CXX=g++-4.8
+        - AR=gcc-ar-4.8
+        - NM=gcc-nm-4.8
+        - RANLIB=gcc-ranlib-4.8
+    - env:
+        - CC=gcc-8
+        - CXX=g++-8
+        - AR=gcc-ar-8
+        - NM=gcc-nm-8
+        - RANLIB=gcc-ranlib-8
 
 # Install and export newer gcc
 before_install:
     - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
     - sudo apt-get update -qq
-    - sudo apt-get install -qq g++-4.8
+    - |
+      if [[ "${CC}" =~ ^gcc- ]]; then
+          echo "Installing ${CC}."
+          sudo apt-get install -qq "${CC}"
+      fi
+    - |
+      if [[ "${CXX}" =~ ^g\+\+- ]]; then
+          echo "Installing ${CXX}."
+          sudo apt-get install -qq "${CXX}"
+      fi
 
-script: make CXX=g++-4.8 nanopolish && make CXX=g++-4.8 test
+script:
+    # Suppress all compiler warnings for hdf5 Makefile
+    # to display the log without downloading the raw log on Travis log page.
+    # Travis finishs with error when exceeding the limit of 4 MB of log length.
+    - export H5_CFLAGS="-w"
+    - make && make test


=====================================
Makefile
=====================================
@@ -8,17 +8,21 @@ SUBDIRS := src src/hmm src/thirdparty src/thirdparty/scrappie src/common src/ali
 #
 
 #Basic flags every build needs
-LIBS=-lz
+LIBS = -lz
 CXXFLAGS ?= -g -O3
-CXXFLAGS += -std=c++11 -fopenmp -fsigned-char
-CFLAGS ?= -O3 -std=c99
+CXXFLAGS += -std=c++11 -fopenmp -fsigned-char -D_FILE_OFFSET_BITS=64 #D_FILE_OFFSET_BITS=64 makes nanopolish work in 32 bit systems
+CFLAGS ?= -O3 -std=c99 -fsigned-char -D_FILE_OFFSET_BITS=64 
 CXX ?= g++
 CC ?= gcc
 
 # Change the value of HDF5, EIGEN, or HTS below to any value to disable compilation of bundled code
-HDF5?=install
-EIGEN?=install
-HTS?=install
+HDF5 ?= install
+EIGEN ?= install
+HTS ?= install
+
+HDF5_VERSION ?= 1.8.14
+HDF5_CONFIG_ARGS ?= --enable-threadsafe
+EIGEN_VERSION ?= 3.2.5
 
 # Check operating system, OSX doesn't have -lrt
 UNAME_S := $(shell uname -s)
@@ -28,52 +32,53 @@ endif
 
 # Default to automatically installing hdf5
 ifeq ($(HDF5), install)
-    H5_LIB=./lib/libhdf5.a
-    H5_INCLUDE=-I./include
+    H5_LIB = ./lib/libhdf5.a
+    H5_INCLUDE = -I./include
     LIBS += -ldl
 else
     # Use system-wide hdf5
-    H5_LIB=
-    H5_INCLUDE=
+    H5_LIB =
+    H5_INCLUDE =
     LIBS += -lhdf5
 endif
 
 # Default to automatically installing EIGEN
 ifeq ($(EIGEN), install)
-    EIGEN_CHECK=eigen/INSTALL
+    EIGEN_CHECK = eigen/INSTALL
 else
     # Use system-wide eigen
-    EIGEN_CHECK=
+    EIGEN_CHECK =
 endif
 
 # Default to build and link the libhts submodule
 ifeq ($(HTS), install)
-    HTS_LIB=./htslib/libhts.a
-    HTS_INCLUDE=-I./htslib
+    HTS_LIB = ./htslib/libhts.a
+    HTS_INCLUDE = -I./htslib
 else
     # Use system-wide htslib
-    HTS_LIB=
-    HTS_INCLUDE=
+    HTS_LIB =
+    HTS_INCLUDE =
     LIBS += -lhts
 endif
 
 # Include the header-only fast5 library
-FAST5_INCLUDE=-I./fast5/include
+FAST5_INCLUDE = -I./fast5/include
 
 # Include the header-only eigen library
-EIGEN_INCLUDE=-I./eigen/
+EIGEN_INCLUDE = -I./eigen/
 
 # Include the src subdirectories
-NP_INCLUDE=$(addprefix -I./, $(SUBDIRS))
+NP_INCLUDE = $(addprefix -I./, $(SUBDIRS))
 
 # Add include flags
 CPPFLAGS += $(H5_INCLUDE) $(HTS_INCLUDE) $(FAST5_INCLUDE) $(NP_INCLUDE) $(EIGEN_INCLUDE)
 
 # Main programs to build
-PROGRAM=nanopolish
-TEST_PROGRAM=nanopolish_test
+PROGRAM = nanopolish
+TEST_PROGRAM = nanopolish_test
 
-all: $(PROGRAM) $(TEST_PROGRAM)
+.PHONY: all
+all: depend $(PROGRAM)
 
 #
 # Build libhts
@@ -85,16 +90,23 @@ htslib/libhts.a:
 # If this library is a dependency the user wants HDF5 to be downloaded and built.
 #
 lib/libhdf5.a:
-	if [ ! -e hdf5-1.8.14.tar.gz ]; then wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8/hdf5-1.8.14/src/hdf5-1.8.14.tar.gz; fi
-	tar -xzf hdf5-1.8.14.tar.gz || exit 255
-	cd hdf5-1.8.14 && ./configure --enable-threadsafe --prefix=`pwd`/.. && make && make install
+	if [ ! -e hdf5-$(HDF5_VERSION).tar.gz ]; then \
+		version_major_minior=`echo "$(HDF5_VERSION)" | sed -E 's/\.[0-9]+$$//'`; \
+		wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-$${version_major_minior}/hdf5-$(HDF5_VERSION)/src/hdf5-$(HDF5_VERSION).tar.gz; \
+	fi
 
+	tar -xzf hdf5-$(HDF5_VERSION).tar.gz || exit 255
+	cd hdf5-$(HDF5_VERSION) && \
+        ./configure --enable-threadsafe --libdir=`pwd`/../lib --includedir=`pwd`/../include --prefix=`pwd`/.. && \
+		make && make install
 
 # Download and install eigen if not already downloaded
 eigen/INSTALL:
-	if [ ! -e 3.2.5.tar.bz2 ]; then wget http://bitbucket.org/eigen/eigen/get/3.2.5.tar.bz2; fi
-	tar -xjvf 3.2.5.tar.bz2 || exit 255
-	mv eigen-eigen-bdd17ee3b1b3 eigen || exit 255
+	if [ ! -e $(EIGEN_VERSION).tar.bz2 ]; then \
+		wget http://bitbucket.org/eigen/eigen/get/$(EIGEN_VERSION).tar.bz2; \
+	fi
+	tar -xjf $(EIGEN_VERSION).tar.bz2 || exit 255
+	mv eigen-eigen-* eigen || exit 255
 
 #
 # Source files
@@ -103,22 +115,20 @@ eigen/INSTALL:
 # Find the source files by searching subdirectories
 CPP_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.cpp))
 C_SRC := $(foreach dir, $(SUBDIRS), $(wildcard $(dir)/*.c))
-EXE_SRC=src/main/nanopolish.cpp src/test/nanopolish_test.cpp
+EXE_SRC = src/main/nanopolish.cpp src/test/nanopolish_test.cpp
 
 # Automatically generated object names
-CPP_OBJ=$(CPP_SRC:.cpp=.o)
-C_OBJ=$(C_SRC:.c=.o)
+CPP_OBJ = $(CPP_SRC:.cpp=.o)
+C_OBJ = $(C_SRC:.c=.o)
 
 # Generate dependencies
-PHONY=depend
+.PHONY: depend
 depend: .depend
 
 .depend: $(CPP_SRC) $(C_SRC) $(EXE_SRC) $(H5_LIB) $(EIGEN_CHECK)
 	rm -f ./.depend
 	$(CXX) $(CXXFLAGS) $(CPPFLAGS) -MM $(CPP_SRC) $(C_SRC) > ./.depend;
 
-include .depend
-
 # Compile objects
 .cpp.o:
 	$(CXX) -o $@ -c $(CXXFLAGS) $(CPPFLAGS) -fPIC $<
@@ -134,8 +144,11 @@ $(PROGRAM): src/main/nanopolish.o $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(EIG
 $(TEST_PROGRAM): src/test/nanopolish_test.o $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB)
 	$(CXX) -o $@ $(CXXFLAGS) $(CPPFLAGS) -fPIC $< $(CPP_OBJ) $(C_OBJ) $(HTS_LIB) $(H5_LIB) $(LIBS) $(LDFLAGS)
 
+.PHONY: test
 test: $(TEST_PROGRAM)
 	./$(TEST_PROGRAM)
 
+.PHONY: clean
 clean:
-	rm -f $(PROGRAM) $(TEST_PROGRAM) $(CPP_OBJ) $(C_OBJ) src/main/nanopolish.o src/test/nanopolish_test.o
+	rm -f $(PROGRAM) $(TEST_PROGRAM) $(CPP_OBJ) $(C_OBJ) \
+		src/main/nanopolish.o src/test/nanopolish_test.o


=====================================
README.md
=====================================
@@ -6,6 +6,8 @@ Software package for signal-level analysis of Oxford Nanopore sequencing data. N
 
 ## Release notes
 
+* 0.11.0: support for multi-fast5 files. `nanopolish methyltrain` now subsamples input data, improving speed and memory usage
+
 * 0.10.2: added new program `nanopolish polya` to estimate the length of poly-A tails on direct RNA reads (by @paultsw)
 
 * 0.10.1: `nanopolish variants --consensus` now only outputs a VCF file instead of a fasta sequence. The VCF file describes the changes that need to be made to turn the draft sequence into the polished assembly. A new program, `nanopolish vcf2fasta`, is provided to generate the polished genome (this replaces `nanopolish_merge.py`, see usage instructions below). This change is to avoid issues when merging segments that end on repeat boundaries (reported by Michael Wykes and Chris Wright).


=====================================
docs/source/quickstart_polya.rst
=====================================
@@ -0,0 +1,135 @@
+.. _quickstart_polya:
+
+Quickstart - how to estimate poly(A) tail lengths from nanopore native RNA reads
+=================================================================================
+
+Owing to homopolymer effects and the proximity to the sequencing adapters, the polyadenylated tails of reads obtained from nanopore native RNA sequencing are improperly basecalled, making their lengths difficult to quantify. We developed the `polya` subprogram to use an alternative statistical model to estimate these tail lengths.
+
+In this quickstart tutorial, we'll show you how to estimate polyadenylated tail lengths step-by-step, starting from nothing but raw fast5 files. We'll basecall the fast5 files with Oxford Nanopore Technologies' *albacore* basecaller, before aligning the resulting reads with *minimap2*, indexing the files with *nanopolish index*, and finally segmenting the reads and calling the tail lengths with *nanopolish polya*.
+
+We'll be following the steps taken in our `benchmarking analysis <https://github.com/paultsw/polya_analysis>`_ workflow that accompanies `our publication <https://www.biorxiv.org/content/early/2018/11/09/459529>`_. In each step below, we'll generate files in the working directory instead of making subdirectories, except in the case of large datasets.
+
+Software requirements
+---------------------
+* `nanopolish <https://github.com/jts/nanopolish>`_ `>= v10.2`
+* `minimap2 <https://github.com/lh3/minimap2>`_ `>= v2.12`
+* `samtools <http://www.htslib.org/>`_ `>= v1.9`
+* `albacore >= v2.3.3` (from Oxford Nanopore's private customer portal)
+
+Download raw fast5 data and basecall
+------------------------------------
+Let's start by downloading a dataset of fast5 files from the European Nucleotide Archive. We'll download a tarball of fast5 files containing reads that are known to have polyadenylated tail lengths of 30nt. ::
+
+    mkdir data && mkdir data/fastqs
+    wget ftp://ftp.sra.ebi.ac.uk/vol1/ERA158/ERA1580896/oxfordnanopore_native/30xpolyA.tar.gz -O 30xpolyA.tar.gz && mv 30xpolyA.tar.gz data/
+    tar -xzf data/30xpolyA.tar.gz -C data/
+    read_fast5_basecaller.py --worker_threads=8 -f FLO-MIN107 -k SQK-RNA001 -q 0 -s data/fastqs -i data/30xpolyA/fast5/pass
+    cat data/fastqs/workspace/pass/*.fastq > data/30xpolyA.fastq
+
+In the above, change the value of the `-f` and `-k` arguments based on your flow-cell and sequencing kit, as the basecaller's accuracy is highly dependent upon these settings.
+
+Our directory structure should now look something like this: ::
+
+    (current directory)
+    |- data/
+    |-- 30xpolyA.fastq
+    |-- 30xpolyA.tar.gz
+    |-- 30xpolyA/
+    |--- fast5/
+    |---- pass/
+    |----- raw_read1.fast5
+    |----- (... more raw fast5's here ...)
+    |-- fastqs/
+    |--- workspace/
+    |---- pass/
+
+Index with nanopolish index
+---------------------------
+Let's construct an index for our reads with nanopolish's `index` subprogram. This constructs a fast lookup table that tells our program where to find the raw fast5 file for each
+basecalled read. ::
+
+    nanopolish index --directory=data/30xpolyA/fast5/pass --sequencing-summary=data/fastqs/sequencing_summary.txt data/30xpolyA.fastq
+
+This should generate a collection of files in the same directory that contains the `30xpolyA.fastq`.
+
+Align with minimap2 and format the BAM file
+-------------------------------------------
+Now we'll align our basecalled mRNA sequences in the fastq file with our reference. First download a reference fasta: ::
+
+    wget https://raw.githubusercontent.com/paultsw/polya_analysis/master/data/references/enolase_reference.fas
+    wget https://raw.githubusercontent.com/paultsw/polya_analysis/master/data/references/enolase_reference.fai
+    mv enolase_reference.* data/
+
+Note that our directory structure should now look like this: ::
+
+    (current directory)
+    |- data/
+    |-- enolase_reference.fas
+    |-- enolase_reference.fai
+    |-- (... same structure as above ...)
+
+Let's run `minimap2` to align our basecalled reads to this reference and generate a collection of SAM/BAM files with `samtools`: ::
+
+    minimap2 -a -x map-ont data/enolase_reference.fas data/30xpolyA.fastq | samtools view -b - -o data/30xpolyA.bam
+    cd data && samtools sort -T tmp -o 30xpolyA.sorted.bam 30xpolyA.bam && samtools index 30xpolyA.sorted.bam && cd ..
+
+Note that we used `-x map-ont`, which is typically for unspliced reads (e.g. genomic DNA) coming from nanopore sequencers. In typical native mRNA sequencing, you should use `-x splice`,
+which is a splice-aware setting (and uses a different gap cost in the alignment). We're using `map-ont` due to the fact that our reads come from a control dataset with no splicing.
+
+There should be three more files in the `data` directory now: `30xpolyA.bam`, `30xpolyA.sorted.bam`, and `30xpolyA.sorted.bam.bai`.
+
+Segmentation and tail length estimation with nanopolish polya
+-------------------------------------------------------------
+Finally, we can run the polyadenylation estimator: ::
+
+    nanopolish polya --threads=8 --reads=data/30xpolyA.fastq --bam=data/30xpolyA.sorted.bam --genome=data/enolase_reference.fas > polya_results.tsv
+
+Set the `--threads` flag to the number of parallel threads you want to use. Generally speaking, a larger number of threads tends to lower the compute time, but there are diminishing
+returns to a higher value and performance can actually decrease if your CPU is incapable of supporting your desired number of parallel threads. The best number of threads to use is
+highly dependent upon your hardware.
+
+Interpreting the output TSV file
+--------------------------------
+We'll end this quickstart with a look at the output of the polya program. Let's look at the top five lines of the `polya_results.tsv` file we've just generated: ::
+
+    head -20 polya_results.tsv | column -t
+    readname                              contig   position  leader_start  adapter_start  polya_start  transcript_start  read_rate  polya_length  qc_tag
+    d6f42b79-90c6-4edd-8c8f-8a7ce0ac6ecb  YHR174W  0         54.0          3446.0         7216.0       8211.0            130.96     38.22         PASS
+    453f3f3e-d22f-4d9c-81a6-8576e23390ed  YHR174W  0         228.0         5542.0         10298.0      11046.0           130.96     27.48         PASS
+    e02d9858-0c04-4d86-8dba-18a47d9ac005  YHR174W  0         221.0         1812.0         7715.0       8775.0            97.16      29.16         PASS
+    b588dee2-2c5b-410c-91e1-fe8140f4f837  YHR174W  0         22.0          8338.0         13432.0      14294.0           130.96     32.43         PASS
+    af9dfee2-1711-4083-b109-487b99895e0a  YHR174W  0         889.0         3679.0         6140.0       7168.0            130.96     39.65         PASS
+    93f98a86-3b18-48cf-8c4d-15cf277911e2  YHR174W  0         144.0         1464.0         5615.0       6515.0            120.48     30.96         SUFFCLIP
+    af9dfee2-1711-4083-b109-487b99895e0a  YHR174W  0         889.0         3679.0         6140.0       7168.0            130.96     39.65         SUFFCLIP
+    93f98a86-3b18-48cf-8c4d-15cf277911e2  YHR174W  0         144.0         1464.0         5615.0       6515.0            120.48     30.96         PASS
+    ca8d4059-9d82-45ee-aa07-4b8b351618b3  YHR174W  0         1.0           2157.0         4255.0       5862.0            111.56     54.48         PASS
+    3493c123-78d4-4f7c-add0-cbb249aef00a  YHR174W  0         78.0          1938.0         4829.0       5491.0            136.91     25.05         PASS
+    f5ff1802-3fdd-479a-8888-c72de01bbaea  YHR174W  0         150.0         3476.0         7233.0       7932.0            130.96     25.35         PASS
+    bb929728-2ed8-42b0-a5a5-eea4bfd62673  YHR174W  0         91.0          1061.0         6241.0       6910.0            111.56     19.74         PASS
+    17cf3fef-1acb-4045-8252-e9c00fedfb7c  YHR174W  0         447.0         6004.0         20058.0      20964.0           100.40     25.17         ADAPTER
+    e3e38de6-8a99-4029-a067-261f470517ca  YHR174W  0         41.0          1588.0         4057.0       5303.0            130.96     49.13         PASS
+    66f55b56-c22e-4e6d-999e-50687bed6fb7  YHR174W  0         191.0         3160.0         9218.0       10030.0           125.50     28.79         PASS
+    56c116d7-9286-4b57-8329-e74928b11b38  YHR174W  0         13.0          1516.0         5845.0       6773.0            130.96     35.30         PASS
+    5ca1392c-c48f-4135-85d3-271bd4ee7a13  YHR174W  0         1.0           1976.0         4854.0       5947.0            136.91     44.64         PASS
+    66b5a0ef-b8e6-475e-bf20-04b96154a67f  YHR174W  0         98.0          3847.0         7066.0       7925.0            120.48     29.32         PASS
+    34bf2187-5816-4744-8e6a-3250b5247e02  YHR174W  0         1.0           2897.0         6885.0       7547.0            125.50     22.54         PASS
+
+Each row corresponds to the output for a given read. The columns have the following interpretation:
+* `readname` refers to the unique ID associated to this particular read. This string is also used to look up the corresponding fast5 file, e.g. by looking
+  through the `readdb` file generated by `nanopolish index`.
+* `contig` refers to the reference sequence that this read aligns to, and is taken from the BAM file.
+* `position` is the 5' starting position of the alignment to the reference sequence, and also comes from the BAM file.
+* `leader_start`, `adapter_start`, `polya_start`, and `transcript_start` are the indices of the signal segmentation generated by the underlying model within
+  `nanopolish`. Briefly, there are four biologically-meaningful regions of the raw sequence of electrical current readings within each fast5 file; these four
+  entries denote the starting index of each of these consecutive regions. The indices start from 0 and are oriented in the 3'-to-5' direction (due to the
+  inherent orientation of the native RNA nanopore sequencing protocol). A full exposition of this segmentation algorithm is available in the
+  `supplementary note<https://www.biorxiv.org/content/biorxiv/suppl/2018/11/09/459529.DC1/459529-2.pdf>`_ to our associated publication.
+* `read_rate` is the estimated translocation rate (in units of nucleotides/second) of the read through the pore. The translocation rate is non-uniform during
+  the sequencing process of even a single molecule, so this is ultimately a summary statistic of the dynamic, time-varying rate.
+* `polya_length` is the estimated polyadenylated tail length, in number of nucleotides. That this value is a float rather than an integer reflects the fact
+  that our estimated tail length is the output of an estimator based on the translocation rate.
+* `qc_tag` is an additional flag used to indicate the validity of the estimate. Generally speaking, you should only use rows of the output file with this value
+  set to `PASS`; all other rows with (e.g.) the `qc_tag` set to `SUFFCLIP`, `ADAPTER`, etc. display signs of irregularity that indicate that we believe the
+  estimate to be unreliable. You can easily filter away all rows with the tag set to anything other than `PASS` using a `grep`: ::
+
+    grep 'PASS' polya_results.tsv > polya_results.pass_only.tsv


=====================================
etc/r9-models/r9.4_450bps.gpc.6mer.template.model
=====================================
The diff for this file was not included because it is too large.

=====================================
scripts/nanopolish_makerange.py
=====================================
@@ -1,3 +1,4 @@
+#! /usr/bin/env python
 from __future__ import print_function
 
 import sys


=====================================
src/alignment/nanopolish_eventalign.cpp
=====================================
@@ -243,7 +243,11 @@ void emit_tsv_header(FILE* fp)
 
 void emit_sam_header(samFile* fp, const bam_hdr_t* hdr)
 {
-    sam_hdr_write(fp, hdr);
+    int ret = sam_hdr_write(fp, hdr);
+    if(ret < 0) {
+        fprintf(stderr, "error writing sam header\n");
+        exit(EXIT_FAILURE);
+    }
 }
 
 std::string cigar_ops_to_string(const std::vector<uint32_t>& ops)
@@ -382,7 +386,12 @@ void emit_event_alignment_sam(htsFile* fp,
     int stride = alignments.front().event_idx < alignments.back().event_idx ? 1 : -1;
     bam_aux_append(event_record, "ES", 'i', 4, reinterpret_cast<uint8_t*>(&stride));
 
-    sam_write1(fp, base_hdr, event_record);
+    int ret = sam_write1(fp, base_hdr, event_record);
+    if(ret < 0) {
+        fprintf(stderr, "error writing sam record\n");
+        exit(EXIT_FAILURE);
+    }
+
     bam_destroy1(event_record); // automatically frees malloc'd segment
 }
 


=====================================
src/builtin_models/r9_4_450bps_gpc_6mer_template_model.inl
=====================================
The diff for this file was not included because it is too large.

=====================================
src/common/nanopolish_alphabet.cpp
=====================================
@@ -94,6 +94,39 @@ const char* MethylCpGAlphabet::_recognition_sites[] = { "CG" };
 const char* MethylCpGAlphabet::_recognition_sites_methylated[] = { "MG" };
 const char* MethylCpGAlphabet::_recognition_sites_methylated_complement[] = { "GM" };
 
+//
+// methyl-cytosine in GC context
+//
+const uint8_t MethylGpCAlphabet::_rank[256] = {
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,1,0,0,0,2,0,0,0,0,0,3,0,0,
+    0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+};
+
+const char* MethylGpCAlphabet::_name = "gpc";
+const char* MethylGpCAlphabet::_base = "ACGMT";
+const char* MethylGpCAlphabet::_complement = "TGCGA";
+const uint32_t MethylGpCAlphabet::_size = 5;
+
+const uint32_t MethylGpCAlphabet::_num_recognition_sites = 1;
+const uint32_t MethylGpCAlphabet::_recognition_length = 2;
+const char* MethylGpCAlphabet::_recognition_sites[] = { "GC" };
+const char* MethylGpCAlphabet::_recognition_sites_methylated[] = { "GM" };
+const char* MethylGpCAlphabet::_recognition_sites_methylated_complement[] = { "MG" };
+
 //
 // Dam methylation: methyl-adenine in GATC context
 //
@@ -163,6 +196,7 @@ const char* MethylDcmAlphabet::_recognition_sites_methylated_complement[] = { "G
 // Global objects
 DNAAlphabet gDNAAlphabet;
 MethylCpGAlphabet gMCpGAlphabet;
+MethylGpCAlphabet gMethylGpCAlphabet;
 MethylDamAlphabet gMethylDamAlphabet;
 MethylDcmAlphabet gMethylDcmAlphabet;
 UtoTRNAAlphabet gUtoTRNAAlphabet;
@@ -171,6 +205,7 @@ std::vector<const Alphabet*> get_alphabet_list()
 {
     std::vector<const Alphabet*> list = { &gDNAAlphabet, 
                                           &gMCpGAlphabet, 
+                                          &gMethylGpCAlphabet,
                                           &gMethylDamAlphabet,
                                           &gMethylDcmAlphabet,
                                           &gUtoTRNAAlphabet };


=====================================
src/common/nanopolish_alphabet.h
=====================================
@@ -355,6 +355,26 @@ struct MethylCpGAlphabet : public Alphabet
     }
 };
 
+//
+// methyl-cytosine in GC context
+//
+struct MethylGpCAlphabet : public Alphabet
+{
+    // member variables, expanded by macrocs
+    BASIC_MEMBER_BOILERPLATE
+    METHYLATION_MEMBER_BOILERPLATE
+
+    // member functions
+    BASIC_ACCESSOR_BOILERPLATE
+    METHYLATION_ACCESSOR_BOILERPLATE
+
+    // does this alphabet contain all of the nucleotides in bases?
+    virtual inline bool contains_all(const char *bases) const
+    {
+        return strspn(bases, _base) == strlen(bases);
+    }
+};
+
 //
 // Dam methylation: methyl-adenine in GATC context
 // 
@@ -398,6 +418,7 @@ struct MethylDcmAlphabet : public Alphabet
 // Global alphabet objects that can be re-used
 extern DNAAlphabet gDNAAlphabet;
 extern MethylCpGAlphabet gMCpGAlphabet;
+extern MethylGpCAlphabet gMethylGpCAlphabet;
 extern MethylDamAlphabet gMethylDamAlphabet;
 extern MethylDcmAlphabet gMethylDcmAlphabet;
 extern UtoTRNAAlphabet gUtoTRNAAlphabet;


=====================================
src/common/nanopolish_common.h
=====================================
@@ -18,7 +18,7 @@
 #include "logsum.h"
 
 #define PACKAGE_NAME "nanopolish"
-#define PACKAGE_VERSION "0.10.2"
+#define PACKAGE_VERSION "0.11.0"
 #define PACKAGE_BUGREPORT "https://github.com/jts/nanopolish/issues"
 
 //


=====================================
src/common/nanopolish_fast5_io.cpp
=====================================
@@ -8,65 +8,91 @@
 //
 #include <string.h>
 #include <math.h>
+#include <assert.h>
 #include "nanopolish_fast5_io.h"
 
 //#define DEBUG_FAST5_IO 1
 
-#define RAW_ROOT "/Raw/Reads/"
+#define LEGACY_FAST5_RAW_ROOT "/Raw/Reads/"
+
 int verbose = 0;
 
 //
-hid_t fast5_open(const std::string& filename)
+fast5_file fast5_open(const std::string& filename)
 {
-    hid_t hdf5file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
-    return hdf5file;
+    fast5_file fh;
+    fh.hdf5_file = H5Fopen(filename.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
+
+    // Check for attribute that indicates whether it is single or multi-fast5
+    // see: https://community.nanoporetech.com/posts/multi-fast5-format
+    const std::string indicator_p1 = "/UniqueGlobalKey/";
+    const std::string indicator_p2 = indicator_p1 + "tracking_id/";
+    bool has_indicator = H5Lexists(fh.hdf5_file, indicator_p1.c_str(), H5P_DEFAULT) && H5Lexists(fh.hdf5_file, indicator_p2.c_str(), H5P_DEFAULT);
+    fh.is_multi_fast5 = !has_indicator;
+    return fh;
 }
 
 //
-void fast5_close(hid_t hdf5_file)
+bool fast5_is_open(fast5_file& fh)
 {
-    H5Fclose(hdf5_file);
+    return fh.hdf5_file >= 0;
 }
 
 //
-std::string fast5_get_raw_read_group(hid_t hdf5_file)
+void fast5_close(fast5_file& fh)
 {
-    std::string read_name = fast5_get_raw_read_name(hdf5_file);
-    if(read_name != "") {
-        return std::string(RAW_ROOT) + read_name;
-    } else {
-        return "";
-    }
+    H5Fclose(fh.hdf5_file);
 }
 
 //
-std::string fast5_get_raw_read_name(hid_t hdf5_file)
+std::vector<std::string> fast5_get_multi_read_groups(fast5_file& fh)
 {
-    // This code is From scrappie's fast5_interface
-
-    // retrieve the size of the read name
-    ssize_t size =
-        H5Lget_name_by_idx(hdf5_file, RAW_ROOT, H5_INDEX_NAME, H5_ITER_INC, 0, NULL,
-                           0, H5P_DEFAULT);
-
-    if (size < 0) {
-        return "";
+    std::vector<std::string> out;
+    size_t buffer_size = 0;
+    char* buffer = NULL;
+
+    // get the number of groups in the root group
+    H5G_info_t group_info;
+    int ret = H5Gget_info_by_name(fh.hdf5_file, "/", &group_info, H5P_DEFAULT);
+    if(ret < 0) {
+        fprintf(stderr, "error getting group info\n");
+        exit(EXIT_FAILURE);
     }
 
-    // copy the read name out of the fast5
-    char* name = (char*)calloc(1 + size, sizeof(char));
-    H5Lget_name_by_idx(hdf5_file, RAW_ROOT, H5_INDEX_NAME, H5_ITER_INC, 0, name,
-                       1 + size, H5P_DEFAULT);
+    for(size_t group_idx = 0; group_idx < group_info.nlinks; ++group_idx) {
+
+        // retrieve the size of this group name
+        ssize_t size = H5Lget_name_by_idx(fh.hdf5_file, "/", H5_INDEX_NAME, H5_ITER_INC, group_idx, NULL, 0, H5P_DEFAULT);
+
+        if(size < 0) {
+            fprintf(stderr, "error getting group name size\n");
+            exit(EXIT_FAILURE);
+        }
+        size += 1; // for null terminator
+           
+        if(size > buffer_size) {
+            buffer = (char*)realloc(buffer, size);
+            buffer_size = size;
+        }
+    
+        // copy the group name
+        H5Lget_name_by_idx(fh.hdf5_file, "/", H5_INDEX_NAME, H5_ITER_INC, group_idx, buffer, buffer_size, H5P_DEFAULT);
+        buffer[size] = '\0';
+        out.push_back(buffer);
+    }
 
-    // cleanup
-    std::string out(name);
-    free(name);
+    free(buffer);
+    buffer = NULL;
+    buffer_size = 0;
     return out;
 }
 
 //
-std::string fast5_get_read_id(hid_t hdf5_file)
+std::string fast5_get_read_id_single_fast5(fast5_file& fh)
 {
+    // this function is not supported for multi-fast5 files
+    assert(!fh.is_multi_fast5);
+
     int ret;
     hid_t read_name_attribute, raw_group, attribute_type;
     size_t storage_size = 0;
@@ -75,16 +101,16 @@ std::string fast5_get_read_id(hid_t hdf5_file)
     std::string out = "";
     
     // Get the path to the raw read group
-    std::string raw_read_group = fast5_get_raw_read_group(hdf5_file);
+    std::string raw_read_group = fast5_get_raw_read_group(fh, "");
     if(raw_read_group == "") {
         return out;
     }
 
-    return fast5_get_fixed_string_attribute(hdf5_file, raw_read_group, "read_id");
+    return fast5_get_string_attribute(fh, raw_read_group, "read_id");
 }
 
 //
-raw_table fast5_get_raw_samples(hid_t hdf5_file, fast5_raw_scaling scaling)
+raw_table fast5_get_raw_samples(fast5_file& fh, const std::string& read_id, fast5_raw_scaling scaling)
 {
     float* rawptr = NULL;
     hid_t space;
@@ -94,12 +120,12 @@ raw_table fast5_get_raw_samples(hid_t hdf5_file, fast5_raw_scaling scaling)
     raw_table rawtbl = { 0, 0, 0, NULL };
 
     // mostly from scrappie
-    std::string raw_read_group = fast5_get_raw_read_group(hdf5_file);
+    std::string raw_read_group = fast5_get_raw_read_group(fh, read_id);
 
     // Create data set name
     std::string signal_path = raw_read_group + "/Signal";
 
-    hid_t dset = H5Dopen(hdf5_file, signal_path.c_str(), H5P_DEFAULT);
+    hid_t dset = H5Dopen(fh.hdf5_file, signal_path.c_str(), H5P_DEFAULT);
     if (dset < 0) {
 #ifdef DEBUG_FAST5_IO
         fprintf(stderr, "Failed to open dataset '%s' to read raw signal from.\n", signal_path.c_str());
@@ -140,12 +166,27 @@ raw_table fast5_get_raw_samples(hid_t hdf5_file, fast5_raw_scaling scaling)
     return rawtbl;
 }
 
+std::string fast5_get_context_tags_group(fast5_file& fh, const std::string& read_id)
+{
+    std::string group = fh.is_multi_fast5 ? "/read_" + read_id + "/context_tags"
+                                          : "/UniqueGlobalKey/context_tags";
+    return group;
+}
+
 //
-std::string fast5_get_experiment_type(hid_t hdf5_file)
+std::string fast5_get_sequencing_kit(fast5_file& fh, const std::string& read_id)
 {
-    return fast5_get_fixed_string_attribute(hdf5_file, "/UniqueGlobalKey/context_tags", "experiment_type");
+    std::string group = fast5_get_context_tags_group(fh, read_id);
+    return fast5_get_string_attribute(fh, group.c_str(), "sequencing_kit");
 }
 
+std::string fast5_get_experiment_type(fast5_file& fh, const std::string& read_id)
+{
+    std::string group = fast5_get_context_tags_group(fh, read_id);
+    return fast5_get_string_attribute(fh, group.c_str(), "experiment_type");
+}
+
+
 // from scrappie
 float fast5_read_float_attribute(hid_t group, const char *attribute) {
     float val = NAN;
@@ -171,16 +212,18 @@ float fast5_read_float_attribute(hid_t group, const char *attribute) {
 }
 
 //
-fast5_raw_scaling fast5_get_channel_params(hid_t hdf5_file)
+fast5_raw_scaling fast5_get_channel_params(fast5_file& fh, const std::string& read_id)
 {
     // from scrappie
     fast5_raw_scaling scaling = { NAN, NAN, NAN, NAN };
-    const char *scaling_path = "/UniqueGlobalKey/channel_id";
 
-    hid_t scaling_group = H5Gopen(hdf5_file, scaling_path, H5P_DEFAULT);
+    std::string scaling_path = fh.is_multi_fast5 ? "/read_" + read_id + "/channel_id"
+                                                 :  "/UniqueGlobalKey/channel_id";
+
+    hid_t scaling_group = H5Gopen(fh.hdf5_file, scaling_path.c_str(), H5P_DEFAULT);
     if (scaling_group < 0) {
 #ifdef DEBUG_FAST5_IO
-        fprintf(stderr, "Failed to group %s.", scaling_path);
+        fprintf(stderr, "Failed to open group %s\n", scaling_path.c_str());
 #endif
         return scaling;
     }
@@ -200,23 +243,61 @@ fast5_raw_scaling fast5_get_channel_params(hid_t hdf5_file)
 //
 
 //
-std::string fast5_get_fixed_string_attribute(hid_t hdf5_file, const std::string& group_name, const std::string& attribute_name)
+std::string fast5_get_raw_root(fast5_file& fh, const std::string& read_id)
 {
-    size_t storage_size;
-    char* buffer;
-    hid_t group, attribute, attribute_type;
-    int ret;
+    return fh.is_multi_fast5 ? "/read_" + read_id + "/Raw" : "/Raw/Reads";
+}
+
+//
+std::string fast5_get_raw_read_group(fast5_file& fh, const std::string& read_id)
+{
+    if(fh.is_multi_fast5) {
+        return "/read_" + read_id + "/Raw";
+    } else {
+        std::string internal_read_name = fast5_get_raw_read_internal_name(fh);
+        return internal_read_name != "" ? std::string(LEGACY_FAST5_RAW_ROOT) + "/" + internal_read_name : "";
+    }
+}
+
+//
+std::string fast5_get_raw_read_internal_name(fast5_file& fh)
+{
+    // This code is From scrappie's fast5_interface
+
+    // retrieve the size of the read name
+    ssize_t size =
+        H5Lget_name_by_idx(fh.hdf5_file, LEGACY_FAST5_RAW_ROOT, H5_INDEX_NAME, H5_ITER_INC, 0, NULL, 0, H5P_DEFAULT);
+
+    if (size < 0) {
+        return "";
+    }
+
+    // copy the read name out of the fast5
+    char* name = (char*)calloc(1 + size, sizeof(char));
+    H5Lget_name_by_idx(fh.hdf5_file, LEGACY_FAST5_RAW_ROOT, H5_INDEX_NAME, H5_ITER_INC, 0, name, 1 + size, H5P_DEFAULT);
+
+    // cleanup
+    std::string out(name);
+    free(name);
+    return out;
+}
+
+//
+std::string fast5_get_string_attribute(fast5_file& fh, const std::string& group_name, const std::string& attribute_name)
+{
+    hid_t group, attribute, attribute_type, native_type;
     std::string out;
 
     // according to http://hdf-forum.184993.n3.nabble.com/check-if-dataset-exists-td194725.html
     // we should use H5Lexists to check for the existence of a group/dataset using an arbitrary path
-    ret = H5Lexists(hdf5_file, group_name.c_str(), H5P_DEFAULT);
+    // HDF5 1.8 returns 0 on the root group, so we explicitly check for it
+    int ret = group_name == "/" ? 1 : H5Lexists(fh.hdf5_file, group_name.c_str(), H5P_DEFAULT);
     if(ret <= 0) {
         return "";
     }
 
-    // Open the group /Raw/Reads/Read_nnn
-    group = H5Gopen(hdf5_file, group_name.c_str(), H5P_DEFAULT);
+    // Open the group containing the attribute we want
+    group = H5Gopen(fh.hdf5_file, group_name.c_str(), H5P_DEFAULT);
     if(group < 0) {
 #ifdef DEBUG_FAST5_IO
         fprintf(stderr, "could not open group %s\n", group_name.c_str());
@@ -241,25 +322,61 @@ std::string fast5_get_fixed_string_attribute(hid_t hdf5_file, const std::string&
 
     // Get data type and check it is a fixed-length string
     attribute_type = H5Aget_type(attribute);
-    if(H5Tis_variable_str(attribute_type)) {
+    if(attribute_type < 0) {
 #ifdef DEBUG_FAST5_IO
-        fprintf(stderr, "variable length string detected -- ignoring attribute\n");
+        fprintf(stderr, "failed to get attribute type %s\n", attribute_name.c_str());
 #endif
         goto close_type;
     }
 
-    // Get the storage size and allocate
-    storage_size = H5Aget_storage_size(attribute);
-    buffer = (char*)calloc(storage_size + 1, sizeof(char));
+    if(H5Tget_class(attribute_type) != H5T_STRING) {
+#ifdef DEBUG_FAST5_IO
+        fprintf(stderr, "attribute %s is not a string\n", attribute_name.c_str());
+#endif
+        goto close_type;
+    }
 
-    // finally read the attribute
-    ret = H5Aread(attribute, attribute_type, buffer);
-    if(ret >= 0) {
+    native_type = H5Tget_native_type(attribute_type, H5T_DIR_ASCEND);
+    if(native_type < 0) {
+#ifdef DEBUG_FAST5_IO
+        fprintf(stderr, "failed to get native type for %s\n", attribute_name.c_str());
+#endif
+        goto close_native_type;
+    }
+
+    if(H5Tis_variable_str(attribute_type) > 0) {
+        // variable length string
+        char* buffer;
+        ret = H5Aread(attribute, native_type, &buffer);
+        if(ret < 0) {
+            fprintf(stderr, "error reading attribute %s\n", attribute_name.c_str());
+            exit(EXIT_FAILURE);
+        }
         out = buffer;
+        free(buffer);
+        buffer = NULL;
+
+    } else {
+        // fixed length string
+        size_t storage_size;
+        char* buffer;
+
+        // Get the storage size and allocate
+        storage_size = H5Aget_storage_size(attribute);
+        buffer = (char*)calloc(storage_size + 1, sizeof(char));
+
+        // finally read the attribute
+        ret = H5Aread(attribute, attribute_type, buffer);
+        if(ret >= 0) {
+            out = buffer;
+        }
+
+        // clean up
+        free(buffer);
     }
 
-    // clean up
-    free(buffer);
+close_native_type:
+    H5Tclose(native_type);    
 close_type:
     H5Tclose(attribute_type);
 close_attr:
@@ -269,4 +386,3 @@ close_group:
 
     return out;
 }
-


=====================================
src/common/nanopolish_fast5_io.h
=====================================
@@ -21,45 +21,75 @@ extern "C" {
 
 // From scrappie
 typedef struct {
-    //  Information for scaling raw data from ADC values to pA
+    // Parameters for scaling raw data from ADC values to pA
     float digitisation;
     float offset;
     float range;
     float sample_rate;
 } fast5_raw_scaling;
 
+//
+struct fast5_file
+{
+    hid_t hdf5_file;
+    bool is_multi_fast5;
+};
 
 //
 // External API
 //
 
-// open the file and return the hdf ID
-hid_t fast5_open(const std::string& filename);
+//
+// Basic I/O
+//
+
+// open the file and return handle
+fast5_file fast5_open(const std::string& filename);
+
+// check if file is open
+bool fast5_is_open(fast5_file& fh);
 
 // close the file
-void fast5_close(hid_t hdf5_file);
+void fast5_close(fast5_file& fh);
 
-// get the raw samples from this file
-raw_table fast5_get_raw_samples(hid_t hdf5_file, fast5_raw_scaling scaling);
+//
+// Functions to get the names of the read(s) contained in the file
+//
 
-// get the name of the raw read in the file (eg Read_1234)
-std::string fast5_get_raw_read_name(hid_t hdf5_file);
+// Get the identifier of a read from the hdf5 file (eg 00041f-....)
+std::string fast5_get_read_id_single_fast5(fast5_file& fh);
+
+// Get a vector of read groups for a multi-fast5 file (eg [read_00041f-..., read_1243fe-....])
+std::vector<std::string> fast5_get_multi_read_groups(fast5_file& fh);
 
-// get the name of the raw read group (eg /Raw/Read/Read_1234)
-std::string fast5_get_raw_read_group(hid_t hdf5_file);
+//
+// Functions to get the samples or metadata
+//
 
-// Get the identifier of a read from the hdf5 file
-std::string fast5_get_read_id(hid_t hdf5_file);
+// get the raw samples from this file
+raw_table fast5_get_raw_samples(fast5_file& fh, const std::string& read_id, fast5_raw_scaling scaling);
+
+// Get the sequencing kit
+std::string fast5_get_sequencing_kit(fast5_file& fh, const std::string& read_id);
 
 // Get the experiment type attribute
-std::string fast5_get_experiment_type(hid_t hdf5_file);
+std::string fast5_get_experiment_type(fast5_file& fh, const std::string& read_id);
 
 // Get sample rate, and ADC-to-pA scalings
-fast5_raw_scaling fast5_get_channel_params(hid_t hdf5_file);
+fast5_raw_scaling fast5_get_channel_params(fast5_file& fh, const std::string& read_id);
 
 //
 // Internal utility functions
 //
-std::string fast5_get_fixed_string_attribute(hid_t hdf5_file, const std::string& group_name, const std::string& attribute_name);
+
+// get the name of the raw read in the file (eg Read_1234)
+std::string fast5_get_raw_read_internal_name(fast5_file& fh);
+
+// get the name of the raw read group (eg /Raw/Read/Read_1234 or /read_00041f-.../Raw/)
+std::string fast5_get_raw_read_group(fast5_file& fh, const std::string& read_id);
+
+//
+std::string fast5_get_string_attribute(fast5_file& fh, const std::string& group_name, const std::string& attribute_name);
+
 
 #endif


=====================================
src/common/nanopolish_variant.cpp
=====================================
@@ -391,7 +391,13 @@ std::vector<Variant> simple_call(VariantGroup& variant_group,
         v.add_info("TotalReads", group_reads.size());
         v.add_info("AlleleCount", var_count);
         v.add_info("SupportFraction", read_variant_support[vi] / group_reads.size());
-        v.genotype = make_genotype(var_count, ploidy);
+
+        if(group_reads.size() > 0) {
+            v.genotype = make_genotype(var_count, ploidy);
+        } else {
+            v.genotype = ".";
+        }
+
         output_variants.push_back(v);
     }
 


=====================================
src/nanopolish_call_methylation.cpp
=====================================
@@ -98,7 +98,7 @@ static const char *CALL_METHYLATION_USAGE_MESSAGE =
 "  -r, --reads=FILE                     the ONT reads are in fasta FILE\n"
 "  -b, --bam=FILE                       the reads aligned to the genome assembly are in bam FILE\n"
 "  -g, --genome=FILE                    the genome we are computing a consensus for is in FILE\n"
-"  -q, --methylation=STRING             the type of methylation (cpg,dam,dcm)\n"
+"  -q, --methylation=STRING             the type of methylation (cpg,gpc,dam,dcm)\n"
 "  -t, --threads=NUM                    use NUM threads (default: 1)\n"
 "      --progress                       print out a progress message\n"
 "  -K  --batchsize=NUM                  the batch size (default: 512)\n"


=====================================
src/nanopolish_index.cpp
=====================================
@@ -55,6 +55,7 @@ namespace opt
 }
 static std::ostream* os_p;
 
+//
 void index_file_from_map(ReadDB& read_db, const std::string& fn, const std::map<std::string, std::string>& fast5_to_read_name_map)
 {
     PROFILE_FUNC("index_file_from_map")
@@ -73,24 +74,38 @@ void index_file_from_map(ReadDB& read_db, const std::string& fn, const std::map<
             fprintf(stderr, "Could not find read %s in sequencing summary file\n", fn.c_str());
         }
     }
-} // process_file
+}
 
+//
 void index_file_from_fast5(ReadDB& read_db, const std::string& fn, const std::map<std::string, std::string>& fast5_to_read_name_map)
 {
     PROFILE_FUNC("index_file_from_fast5")
 
-    hid_t hdf5_file = fast5_open(fn);
-    if(hdf5_file < 0) {
+    fast5_file f5_file = fast5_open(fn);
+    if(!fast5_is_open(f5_file)) {
         fprintf(stderr, "could not open fast5 file: %s\n", fn.c_str());
     }
 
-    std::string read_id = fast5_get_read_id(hdf5_file);
-    if(read_id != "") {
-        read_db.add_signal_path(read_id, fn);
+    if(f5_file.is_multi_fast5) {
+        std::vector<std::string> read_groups = fast5_get_multi_read_groups(f5_file);
+        std::string prefix = "read_";
+        for(size_t group_idx = 0; group_idx < read_groups.size(); ++group_idx) {
+            std::string group_name = read_groups[group_idx];
+            if(group_name.find(prefix) == 0) {
+                std::string read_id = group_name.substr(prefix.size());
+                read_db.add_signal_path(read_id, fn);
+            }
+        }
+    } else {
+        std::string read_id = fast5_get_read_id_single_fast5(f5_file);
+        if(read_id != "") {
+            read_db.add_signal_path(read_id, fn);
+        }
     }
-    fast5_close(hdf5_file);
-} // process_file
+    fast5_close(f5_file);
+}
 
+//
 void index_path(ReadDB& read_db, const std::string& path, const std::map<std::string, std::string>& fast5_to_read_name_map)
 {
     fprintf(stderr, "[readdb] indexing %s\n", path.c_str());
@@ -114,7 +129,7 @@ void index_path(ReadDB& read_db, const std::string& path, const std::map<std::st
             }
         }
     }
-} // process_path
+}
 
 // read sequencing summary files from the fofn and add them to the list
 void process_summary_fofn()
@@ -137,6 +152,7 @@ void process_summary_fofn()
     }
 }
 
+//
 void exit_bad_header(const std::string& str, const std::string& filename)
 {
     fprintf(stderr, "Could not find %s column in the header of %s\n", str.c_str(), filename.c_str());
@@ -148,8 +164,8 @@ void parse_sequencing_summary(const std::string& filename, std::map<std::string,
 {
     // open
     std::ifstream in_file(filename.c_str());
-    if(in_file.bad()) {
-        fprintf(stderr, "error: could not file %s\n", filename.c_str());
+    if(!in_file.good()) {
+        fprintf(stderr, "error: could not read file %s\n", filename.c_str());
         exit(EXIT_FAILURE);
     }
 
@@ -261,7 +277,6 @@ int index_main(int argc, char** argv)
 {
     parse_index_options(argc, argv);
 
-
     // Read a map from fast5 files to read name from the sequencing summary files (if any)
     process_summary_fofn();
     std::map<std::string, std::string> fast5_to_read_name_map;


=====================================
src/nanopolish_methyltrain.cpp
=====================================
@@ -23,6 +23,9 @@
 #include <omp.h>
 #include <getopt.h>
 #include <cstddef>
+#include <random>
+#include <functional>
+#include <unordered_map>
 #include "htslib/faidx.h"
 #include "nanopolish_methyltrain.h"
 #include "nanopolish_eventalign.h"
@@ -112,6 +115,7 @@ static const char *METHYLTRAIN_USAGE_MESSAGE =
 "      --max-reads=NUM                  stop after processing NUM reads in each round\n"
 "      --progress                       print out a progress message\n"
 "      --stdv                           enable stdv modelling\n"
+"      --max-events=NUM                 use NUM events for training (default: 1000)\n"
 "\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
 
 namespace opt
@@ -141,6 +145,7 @@ namespace opt
     static unsigned min_distance_from_alignment_end = 5;
     static unsigned min_number_of_events_to_train = 100;
     static unsigned num_training_rounds = 5;
+    static unsigned max_events = 1000;
 }
 
 static const char* shortopts = "r:b:g:t:m:vnc";
@@ -160,7 +165,8 @@ enum { OPT_HELP = 1,
        OPT_P_SKIP_SELF,
        OPT_P_BAD,
        OPT_P_BAD_SELF,
-       OPT_MAX_READS
+       OPT_MAX_READS,
+       OPT_MAX_EVENTS
      };
 
 static const struct option longopts[] = {
@@ -188,6 +194,7 @@ static const struct option longopts[] = {
     { "filter-policy",      required_argument, NULL, OPT_FILTER_POLICY },
     { "rounds",             required_argument, NULL, OPT_NUM_ROUNDS },
     { "max-reads",          required_argument, NULL, OPT_MAX_READS },
+    { "max-events",         required_argument, NULL, OPT_MAX_EVENTS },
     { NULL, 0, NULL, 0 }
 };
 
@@ -299,6 +306,25 @@ bool recalibrate_model(SquiggleRead &sr,
     return recalibrated;
 }
 
+// Use reservoir sampling to limit the amount of events for each kmer
+void add_event(StateSummary& kmer_summary, StateTrainingData std, int event_count)
+{
+    // Add all events for each kmer until we have the maximum number of events to train
+    if(event_count <= opt::max_events) {
+        kmer_summary.events.push_back(std);
+    } else {
+        // Select a random number between 0 and the total number of events for this kmer
+        std::mt19937 rng(event_count);
+        std::uniform_int_distribution<int> gen(0, event_count);
+        int rs_location = gen(rng);
+
+        // Only update the training data if the random number is below the maximum number of events
+        if(rs_location < opt::max_events) {
+            kmer_summary.events[rs_location] = std;
+        }
+    }
+}
+
 // Update the training data with aligned events from a read
 void add_aligned_events(const ReadDB& read_db,
                         const faidx_t* fai,
@@ -311,7 +337,8 @@ void add_aligned_events(const ReadDB& read_db,
                         const std::string& training_alphabet,
                         size_t training_k,
                         size_t round,
-                        ModelTrainingMap& training)
+                        ModelTrainingMap& training,
+                        std::unordered_map<uint32_t, int>& event_count)
 {
     // Load a squiggle read for the mapped read
     std::string read_name = bam_get_qname(record);
@@ -429,7 +456,10 @@ void add_aligned_events(const ReadDB& read_db,
             if(use_for_training) {
                 StateTrainingData std(sr, ea, rank, prev_kmer, next_kmer);
                 #pragma omp critical(kmer)
-                kmer_summary.events.push_back(std);
+                {
+                    event_count[rank]++;
+                    add_event(kmer_summary, std, event_count[rank]);
+                }
             }
 
             if(ea.hmm_state == 'M')  {
@@ -473,6 +503,7 @@ void parse_methyltrain_options(int argc, char** argv)
             case OPT_P_BAD: arg >> g_p_bad; break;
             case OPT_P_BAD_SELF: arg >> g_p_bad_self; break;
             case OPT_MAX_READS: arg >> opt::max_reads; break;
+            case OPT_MAX_EVENTS: arg >> opt::max_events; break;
             case OPT_HELP:
                 std::cout << METHYLTRAIN_USAGE_MESSAGE;
                 exit(EXIT_SUCCESS);
@@ -755,6 +786,7 @@ void train_one_round(const ReadDB& read_db,
     size_t num_reads_realigned = 0;
     size_t num_records_buffered = 0;
     Progress progress("[methyltrain]");
+    std::unordered_map<uint32_t, int> event_count;
 
     do {
         assert(num_records_buffered < records.size());
@@ -773,7 +805,7 @@ void train_one_round(const ReadDB& read_db,
                     add_aligned_events(read_db, fai, hdr, record, read_idx,
                                        clip_start, clip_end,
                                        kit_name, alphabet, k,
-                                       round, model_training_data);
+                                       round, model_training_data, event_count);
                 }
             }
 


=====================================
src/nanopolish_raw_loader.cpp
=====================================
@@ -69,6 +69,11 @@ SquiggleScalings estimate_scalings_using_mom(const std::string& sequence,
 #define move_down(curr_band) { curr_band.event_idx + 1, curr_band.kmer_idx }
 #define move_right(curr_band) { curr_band.event_idx, curr_band.kmer_idx + 1 }
 
+#define ALN_BANDWIDTH 100
+
+#define BAND_ARRAY(r, c) ( bands[((r)*(ALN_BANDWIDTH)+(c))] )
+#define TRACE_ARRAY(r, c) ( trace[((r)*(ALN_BANDWIDTH)+(c))] )
+
 std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read, const PoreModel& pore_model, const std::string& sequence)
 {
     size_t strand_idx = 0;
@@ -87,7 +92,7 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
     int max_gap_threshold = 50;
 
     // banding
-    int bandwidth = 100;
+    int bandwidth = ALN_BANDWIDTH;
     int half_bandwidth = bandwidth / 2;
  
     // transition penalties
@@ -115,15 +120,21 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
         kmer_ranks[i] = alphabet->kmer_rank(sequence.substr(i, k).c_str(), k);
     }
 
-    typedef std::vector<float> bandscore;
-    typedef std::vector<uint8_t> bandtrace;
-
-    std::vector<bandscore> bands(n_bands);
-    std::vector<bandtrace> trace(n_bands);
-
-    for(size_t i = 0; i < n_bands; ++i) {
-        bands[i].resize(bandwidth, -INFINITY);
-        trace[i].resize(bandwidth, 0);
+    float* bands = (float*)malloc(sizeof(float) * n_bands * bandwidth);
+    if(bands==NULL){
+        fprintf(stderr,"Memory allocation failed at %s\n",__func__);
+        exit(1);
+    }
+    uint8_t* trace = (uint8_t*)malloc(sizeof(uint8_t) * n_bands * bandwidth);
+    if(trace==NULL){
+        fprintf(stderr,"Memory allocation failed at %s\n",__func__);
+        exit(1);
+    }
+    for (size_t i = 0; i < n_bands; i++) {
+        for (int j = 0; j < bandwidth; j++) {
+            BAND_ARRAY(i,j) = -INFINITY;
+            TRACE_ARRAY(i,j) = 0;
+        }
     }
 
     // Keep track of the event/kmer index for the lower left corner of the band
@@ -146,18 +157,18 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
     int start_cell_offset = band_kmer_to_offset(0, -1);
     assert(is_offset_valid(start_cell_offset));
     assert(band_event_to_offset(0, -1) == start_cell_offset);
-    bands[0][start_cell_offset] = 0.0f;
-    
+    BAND_ARRAY(0,start_cell_offset) = 0.0f;
+
     // band 1: first event is trimmed
     int first_trim_offset = band_event_to_offset(1, 0);
     assert(kmer_at_offset(1, first_trim_offset) == -1);
     assert(is_offset_valid(first_trim_offset));
-    bands[1][first_trim_offset] = lp_trim;
-    trace[1][first_trim_offset] = FROM_U;
+    BAND_ARRAY(1,first_trim_offset) = lp_trim;
+    TRACE_ARRAY(1,first_trim_offset) = FROM_U;
 
     int fills = 0;
 #ifdef DEBUG_ADAPTIVE
-    fprintf(stderr, "[trim] bi: %d o: %d e: %d k: %d s: %.2lf\n", 1, first_trim_offset, 0, -1, bands[1][first_trim_offset]);
+    fprintf(stderr, "[trim] bi: %d o: %d e: %d k: %d s: %.2lf\n", 1, first_trim_offset, 0, -1, BAND_ARRAY(1,first_trim_offset));
 #endif
 
     // fill in remaining bands
@@ -165,11 +176,11 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
         // Determine placement of this band according to Suzuki's adaptive algorithm
         // When both ll and ur are out-of-band (ob) we alternate movements
         // otherwise we decide based on scores
-        float ll = bands[band_idx - 1][0];
-        float ur = bands[band_idx - 1][bandwidth - 1];
+        float ll = BAND_ARRAY(band_idx - 1,0);
+        float ur = BAND_ARRAY(band_idx - 1,bandwidth - 1);
         bool ll_ob = ll == -INFINITY;
         bool ur_ob = ur == -INFINITY;
-        
+
         bool right = false;
         if(ll_ob && ur_ob) {
             right = band_idx % 2 == 1;
@@ -206,10 +217,10 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
         if(is_offset_valid(trim_offset)) {
             int event_idx = event_at_offset(band_idx, trim_offset);
             if(event_idx >= 0 && event_idx < n_events) {
-                bands[band_idx][trim_offset] = lp_trim * (event_idx + 1);
-                trace[band_idx][trim_offset] = FROM_U;
+                BAND_ARRAY(band_idx,trim_offset) = lp_trim * (event_idx + 1);
+                TRACE_ARRAY(band_idx,trim_offset) = FROM_U;
             } else {
-                bands[band_idx][trim_offset] = -INFINITY;
+                BAND_ARRAY(band_idx,trim_offset) = -INFINITY;
             }
         }
 
@@ -231,11 +242,11 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
             int kmer_idx = kmer_at_offset(band_idx, offset);
 
             size_t kmer_rank = kmer_ranks[kmer_idx];
- 
-            int offset_up   = band_event_to_offset(band_idx - 1, event_idx - 1); 
+
+            int offset_up   = band_event_to_offset(band_idx - 1, event_idx - 1);
             int offset_left = band_kmer_to_offset(band_idx - 1, kmer_idx - 1);
             int offset_diag = band_kmer_to_offset(band_idx - 2, kmer_idx - 1);
- 
+
 #ifdef DEBUG_ADAPTIVE
             // verify loop conditions
             assert(kmer_idx >= 0 && kmer_idx < n_kmers);
@@ -245,10 +256,10 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
             assert(offset >= 0 && offset < bandwidth);
 #endif
 
-            float up   = is_offset_valid(offset_up)   ? bands[band_idx - 1][offset_up]   : -INFINITY;
-            float left = is_offset_valid(offset_left) ? bands[band_idx - 1][offset_left] : -INFINITY;
-            float diag = is_offset_valid(offset_diag) ? bands[band_idx - 2][offset_diag] : -INFINITY;
- 
+            float up   = is_offset_valid(offset_up)   ? BAND_ARRAY(band_idx - 1,offset_up)   : -INFINITY;
+            float left = is_offset_valid(offset_left) ? BAND_ARRAY(band_idx - 1,offset_left) : -INFINITY;
+            float diag = is_offset_valid(offset_diag) ? BAND_ARRAY(band_idx - 2,offset_diag) : -INFINITY;
+
             float lp_emission = log_probability_match_r9(read, pore_model, kmer_rank, event_idx, strand_idx);
             float score_d = diag + lp_step + lp_emission;
             float score_u = up + lp_stay + lp_emission;
@@ -261,18 +272,18 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
             from = max_score == score_u ? FROM_U : from;
             max_score = score_l > max_score ? score_l : max_score;
             from = max_score == score_l ? FROM_L : from;
-    
+
 #ifdef DEBUG_ADAPTIVE
             fprintf(stderr, "[adafill] offset-up: %d offset-diag: %d offset-left: %d\n", offset_up, offset_diag, offset_left);
             fprintf(stderr, "[adafill] up: %.2lf diag: %.2lf left: %.2lf\n", up, diag, left);
             fprintf(stderr, "[adafill] bi: %d o: %d e: %d k: %d s: %.2lf f: %d emit: %.2lf\n", band_idx, offset, event_idx, kmer_idx, max_score, from, lp_emission);
 #endif
-            bands[band_idx][offset] = max_score;
-            trace[band_idx][offset] = from;
+            BAND_ARRAY(band_idx,offset) = max_score;
+            TRACE_ARRAY(band_idx,offset) = from;
             fills += 1;
         }
     }
-    
+
     /*
     // Debug, print some of the score matrix
     for(int col = 0; col <= 10; ++col) {
@@ -294,7 +305,7 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
     double sum_emission = 0;
     double n_aligned_events = 0;
     std::vector<AlignedPair> out;
-    
+
     float max_score = -INFINITY;
     int curr_event_idx = 0;
     int curr_kmer_idx = n_kmers -1;
@@ -302,17 +313,16 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
     // Find best score between an event and the last k-mer. after trimming the remaining evnets
     for(int event_idx = 0; event_idx < n_events; ++event_idx) {
         int band_idx = event_kmer_to_band(event_idx, curr_kmer_idx);
-        assert(band_idx < bands.size());
         int offset = band_event_to_offset(band_idx, event_idx);
         if(is_offset_valid(offset)) {
-            float s = bands[band_idx][offset] + (n_events - event_idx) * lp_trim;
+            float s = BAND_ARRAY(band_idx,offset)  + (n_events - event_idx) * lp_trim;
             if(s > max_score) {
                 max_score = s;
                 curr_event_idx = event_idx;
             }
         }
     }
-    
+
 #ifdef DEBUG_ADAPTIVE
     fprintf(stderr, "[adaback] ei: %d ki: %d s: %.2f\n", curr_event_idx, curr_kmer_idx, max_score);
 #endif
@@ -320,12 +330,12 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
     int curr_gap = 0;
     int max_gap = 0;
     while(curr_kmer_idx >= 0 && curr_event_idx >= 0) {
-        
+
         // emit alignment
         out.push_back({curr_kmer_idx, curr_event_idx});
 #ifdef DEBUG_ADAPTIVE
         fprintf(stderr, "[adaback] ei: %d ki: %d\n", curr_event_idx, curr_kmer_idx);
-#endif       
+#endif
         // qc stats
         size_t kmer_rank = alphabet->kmer_rank(sequence.substr(curr_kmer_idx, k).c_str(), k);
         sum_emission += log_probability_match_r9(read, pore_model, kmer_rank, curr_event_idx, strand_idx);
@@ -335,7 +345,7 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
         int offset = band_event_to_offset(band_idx, curr_event_idx);
         assert(band_kmer_to_offset(band_idx, curr_kmer_idx) == offset);
 
-        uint8_t from = trace[band_idx][offset];
+        uint8_t from = TRACE_ARRAY(band_idx,offset);
         if(from == FROM_D) {
             curr_kmer_idx -= 1;
             curr_event_idx -= 1;
@@ -347,7 +357,7 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
             curr_kmer_idx -= 1;
             curr_gap += 1;
             max_gap = std::max(curr_gap, max_gap);
-        }   
+        }
     }
     std::reverse(out.begin(), out.end());
     
@@ -361,6 +371,9 @@ std::vector<AlignedPair> adaptive_banded_simple_event_align(SquiggleRead& read,
         out.clear();
     }
 
+    free(bands);
+    free(trace);
+
     //fprintf(stderr, "ada\t%s\t%s\t%.2lf\t%zu\t%.2lf\t%d\t%d\t%d\n", read.read_name.substr(0, 6).c_str(), failed ? "FAILED" : "OK", events_per_kmer, sequence.size(), avg_log_emission, curr_event_idx, max_gap, fills);
     return out;
 }


=====================================
src/nanopolish_read_db.cpp
=====================================
@@ -139,7 +139,7 @@ void ReadDB::import_reads(const std::string& input_filename, const std::string&
             path = fields.back();
 
             // as a sanity check we require the path name to end in ".fast5"
-            if(path.substr(path.length() - 6) != ".fast5") {
+            if(path.length() < 6 || path.substr(path.length() - 6) != ".fast5") {
                 path = "";
             }
         }


=====================================
src/nanopolish_squiggle_read.cpp
=====================================
@@ -82,17 +82,17 @@ SquiggleRead::SquiggleRead(const std::string& name, const ReadDB& read_db, const
         return;
     }
 
-
     // Get the read type from the fast5 file
-    hid_t hdf5_file = fast5_open(fast5_path);
-    if(hdf5_file >= 0) {
+    fast5_file f5_file = fast5_open(fast5_path);
+    if(fast5_is_open(f5_file)) {
 
-        //fprintf(stderr, "file: %s\n", fast5_path.c_str());
-        std::string experiment_type = fast5_get_experiment_type(hdf5_file);
-        //fprintf(stderr, "type: %s\n", experiment_type.c_str());
+        std::string sequencing_kit = fast5_get_sequencing_kit(f5_file, this->read_name);
+        std::string experiment_type = fast5_get_experiment_type(f5_file, this->read_name);
 
         // Try to detect whether this read is DNA or RNA
-        this->nucleotide_type = experiment_type == "rna" || experiment_type == "internal_rna" ? SRNT_RNA : SRNT_DNA;
+        // Fix issue 531: experiment_type in fast5 is "rna" for cDNA kit dcs108
+        bool rna_experiment = experiment_type == "rna" || experiment_type == "internal_rna";
+        this->nucleotide_type = rna_experiment && sequencing_kit != "sqk-dcs108" ? SRNT_RNA : SRNT_DNA;
 
         // Did this read come from nanopolish extract?
         bool is_event_read = is_extract_read_name(this->read_name);
@@ -115,10 +115,10 @@ SquiggleRead::SquiggleRead(const std::string& name, const ReadDB& read_db, const
             this->f_p = nullptr;
         } else {
             this->read_sequence = read_db.get_read_sequence(read_name);
-            load_from_raw(hdf5_file, flags);
+            load_from_raw(f5_file, flags);
         }
 
-        fast5_close(hdf5_file);
+        fast5_close(f5_file);
 
     } else {
         fprintf(stderr, "[warning] fast5 file is unreadable and will be skipped: %s\n", fast5_path.c_str());
@@ -271,7 +271,7 @@ void SquiggleRead::load_from_events(const uint32_t flags)
 }
 
 //
-void SquiggleRead::load_from_raw(hid_t hdf5_file, const uint32_t flags)
+void SquiggleRead::load_from_raw(fast5_file& f5_file, const uint32_t flags)
 {
     // File not in db, can't load
     if(this->fast5_path == "" || this->read_sequence == "") {
@@ -304,11 +304,11 @@ void SquiggleRead::load_from_raw(hid_t hdf5_file, const uint32_t flags)
     assert(this->base_model[strand_idx] != NULL);
 
     // Read the sample rate
-    auto channel_params = fast5_get_channel_params(hdf5_file);
+    auto channel_params = fast5_get_channel_params(f5_file, this->read_name);
     this->sample_rate = channel_params.sample_rate;
 
     // Read the actual samples
-    raw_table rt = fast5_get_raw_samples(hdf5_file, channel_params);
+    raw_table rt = fast5_get_raw_samples(f5_file, this->read_name, channel_params);
 
     // trim using scrappie's internal method
     // parameters taken directly from scrappie defaults


=====================================
src/nanopolish_squiggle_read.h
=====================================
@@ -15,6 +15,7 @@
 #include "nanopolish_eventalign.h"
 #include "nanopolish_read_db.h"
 #include "nanopolish_pore_model_set.h"
+#include "nanopolish_fast5_io.h"
 #include <string>
 
 enum PoreType
@@ -303,7 +304,7 @@ class SquiggleRead
         void load_from_events(const uint32_t flags);
 
         // Load all read data from raw samples
-        void load_from_raw(hid_t hdf5_file, const uint32_t flags);
+        void load_from_raw(fast5_file& f5_file, const uint32_t flags);
 
         // Version-specific intialization functions
         void _load_R7(uint32_t si);


=====================================
src/nanopolish_vcf2fasta.cpp
=====================================
@@ -47,6 +47,7 @@ static const char *VCF2FASTA_USAGE_MESSAGE =
 "      --version                        display version\n"
 "      --help                           display this help and exit\n"
 "  -g, --genome=FILE                    the input genome is in FILE\n"
+"  -f, --fofn=FILE                      read the list of VCF files to use from FILE\n"
 "      --skip-checks                    skip the sanity checks\n"
 "\nReport bugs to " PACKAGE_BUGREPORT "\n\n";
 
@@ -54,11 +55,12 @@ namespace opt
 {
     static unsigned int verbose;
     static std::vector<std::string> input_vcf_files;
+    static std::string vcf_fofn;
     static std::string genome_file;
     static bool skip_checks = false;
 }
 
-static const char* shortopts = "g:v";
+static const char* shortopts = "g:f:v";
 
 enum { OPT_HELP = 1, OPT_VERSION, OPT_SKIP_CHECKS };
 
@@ -67,7 +69,8 @@ static const struct option longopts[] = {
     { "help",          no_argument,       NULL, OPT_HELP },
     { "version",       no_argument,       NULL, OPT_VERSION },
     { "skip-checks",   no_argument,       NULL, OPT_SKIP_CHECKS },
-    { "genome",      required_argument, NULL, 'g' },
+    { "genome",        required_argument, NULL, 'g' },
+    { "fofn",          required_argument, NULL, 'f' },
     { NULL, 0, NULL, 0 }
 };
 
@@ -80,6 +83,7 @@ void parse_vcf2fasta_options(int argc, char** argv)
             case '?': die = true; break;
             case 'v': opt::verbose++; break;
             case 'g': arg >> opt::genome_file; break;
+            case 'f': arg >> opt::vcf_fofn; break;
             case OPT_SKIP_CHECKS: opt::skip_checks = true; break;
             case OPT_HELP:
                 std::cout << VCF2FASTA_USAGE_MESSAGE;
@@ -95,7 +99,7 @@ void parse_vcf2fasta_options(int argc, char** argv)
         die = true;
     }
 
-    if (argc - optind < 1) {
+    if (argc - optind < 1 && opt::vcf_fofn.empty()) {
         std::cerr << SUBPROGRAM ": not enough arguments\n";
         die = true;
     }
@@ -109,6 +113,15 @@ void parse_vcf2fasta_options(int argc, char** argv)
     for(; optind < argc; ++optind) {
         opt::input_vcf_files.push_back(argv[optind]);
     }
+
+    // add files from the fofn
+    if(!opt::vcf_fofn.empty()) {
+        std::ifstream infile(opt::vcf_fofn);
+        std::string line;
+        while(getline(infile, line)) {
+            opt::input_vcf_files.push_back(line);
+        }
+    }
 }
 
 int vcf2fasta_main(int argc, char** argv)
@@ -134,10 +147,10 @@ int vcf2fasta_main(int argc, char** argv)
             if(line[0] == '#') {
 
                 // check for window coordinates
-                if(line.find("nanopolish_window") != std::string::npos) {
-                    std::vector<std::string> fields = split(line, '=');
-                    assert(fields.size() == 2);
-                    window_str = fields[1];
+                std::string window_key = "nanopolish_window=";
+                size_t key_pos = line.find(window_key);
+                if(key_pos != std::string::npos) {
+                    window_str = line.substr(key_pos + window_key.size());
                 }
             } else {
                 Variant v(line);


=====================================
src/pore_model/nanopolish_builtin_models.h
=====================================
@@ -21,6 +21,7 @@
 #include "builtin_models/r9_250bps_nucleotide_6mer_complement_pop2_model.inl"
 #include "builtin_models/r9_250bps_nucleotide_6mer_template_model.inl"
 #include "builtin_models/r9_4_450bps_cpg_6mer_template_model.inl"
+#include "builtin_models/r9_4_450bps_gpc_6mer_template_model.inl"
 #include "builtin_models/r9_4_450bps_nucleotide_5mer_template_model.inl"
 #include "builtin_models/r9_4_450bps_nucleotide_6mer_template_model.inl"
 #include "builtin_models/r9_4_70bps_u_to_t_rna_5mer_template_model.inl"
@@ -29,19 +30,20 @@
 
 // Autogenerated from convert_all_models.py
 const static std::vector<PoreModel> builtin_models({
-        initialize_r9_250bps_cpg_6mer_template_model_builtin(),
-        initialize_r9_250bps_nucleotide_5mer_complement_pop1_model_builtin(),
-        initialize_r9_250bps_nucleotide_5mer_complement_pop2_model_builtin(),
-        initialize_r9_250bps_nucleotide_5mer_template_model_builtin(),
-        initialize_r9_250bps_nucleotide_6mer_complement_pop1_model_builtin(),
-        initialize_r9_250bps_nucleotide_6mer_complement_pop2_model_builtin(),
-        initialize_r9_250bps_nucleotide_6mer_template_model_builtin(),
-        initialize_r9_4_450bps_cpg_6mer_template_model_builtin(),
-        initialize_r9_4_450bps_nucleotide_5mer_template_model_builtin(),
-        initialize_r9_4_450bps_nucleotide_6mer_template_model_builtin(),
-        initialize_r9_4_70bps_u_to_t_rna_5mer_template_model_builtin(),
-        initialize_r9_4_450bps_dcm_6mer_template_model_builtin(),
-        initialize_r9_4_450bps_dam_6mer_template_model_builtin()
+	initialize_r9_250bps_cpg_6mer_template_model_builtin(),
+	initialize_r9_250bps_nucleotide_5mer_complement_pop1_model_builtin(),
+	initialize_r9_250bps_nucleotide_5mer_complement_pop2_model_builtin(),
+	initialize_r9_250bps_nucleotide_5mer_template_model_builtin(),
+	initialize_r9_250bps_nucleotide_6mer_complement_pop1_model_builtin(),
+	initialize_r9_250bps_nucleotide_6mer_complement_pop2_model_builtin(),
+	initialize_r9_250bps_nucleotide_6mer_template_model_builtin(),
+	initialize_r9_4_450bps_cpg_6mer_template_model_builtin(),
+	initialize_r9_4_450bps_gpc_6mer_template_model_builtin(),
+	initialize_r9_4_450bps_nucleotide_5mer_template_model_builtin(),
+	initialize_r9_4_450bps_nucleotide_6mer_template_model_builtin(),
+	initialize_r9_4_70bps_u_to_t_rna_5mer_template_model_builtin(),
+	initialize_r9_4_450bps_dcm_6mer_template_model_builtin(),
+	initialize_r9_4_450bps_dam_6mer_template_model_builtin()
 });
 
 #endif


=====================================
src/thirdparty/scrappie/scrappie_common.c
=====================================
@@ -1,6 +1,123 @@
 #include "scrappie_common.h"
 #include "scrappie_stdlib.h"
-#include "util.h"
+//#include "util.h"
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+int floatcmp(const void *x, const void *y) {
+    float d = *(float *)x - *(float *)y;
+    if (d > 0) {
+        return 1;
+    }
+    return -1;
+}
+
+/**  Quantiles from n array
+ *
+ *  Using a relatively inefficent qsort resulting in O(n log n)
+ *  performance but better performance is possible for small np.
+ *  The array p is modified inplace, containing which quantiles to
+ *  calculation on input and the quantiles on output; on error, p
+ *  is filled with the value NAN.
+ *
+ *  @param x An array to calculate quantiles from
+ *  @param nx Length of array x
+ *  @param p An array containing quantiles to calculate [in/out]
+ *  @param np Length of array p
+ *
+ *  @return void
+ **/
+void quantilef(const float *x, size_t nx, float *p, size_t np) {
+    if (NULL == p) {
+        return;
+    }
+    for (int i = 0; i < np; i++) {
+        assert(p[i] >= 0.0f && p[i] <= 1.0f);
+    }
+    if (NULL == x) {
+        for (int i = 0; i < np; i++) {
+            p[i] = NAN;
+        }
+        return;
+    }
+    // Sort array
+    float *space = malloc(nx * sizeof(float));
+    if (NULL == space) {
+        for (int i = 0; i < np; i++) {
+            p[i] = NAN;
+        }
+        return;
+    }
+    memcpy(space, x, nx * sizeof(float));
+    qsort(space, nx, sizeof(float), floatcmp);
+
+    // Extract quantiles
+    for (int i = 0; i < np; i++) {
+        const size_t idx = p[i] * (nx - 1);
+        const float remf = p[i] * (nx - 1) - idx;
+        if (idx < nx - 1) {
+            p[i] = (1.0 - remf) * space[idx] + remf * space[idx + 1];
+        } else {
+            // Should only occur when p is exactly 1.0
+            p[i] = space[idx];
+        }
+    }
+
+    free(space);
+    return;
+}
+
+/** Median of an array
+ *
+ *  Using a relatively inefficent qsort resulting in O(n log n)
+ *  performance but O(n) is possible.
+ *
+ *  @param x An array to calculate median of
+ *  @param n Length of array
+ *
+ *  @return Median of array on success, NAN otherwise.
+ **/
+float medianf(const float *x, size_t n) {
+    float p = 0.5;
+    quantilef(x, n, &p, 1);
+    return p;
+}
+
+/** Median Absolute Deviation of an array
+ *
+ *  @param x An array to calculate the MAD of
+ *  @param n Length of array
+ *  @param med Median of the array.  If NAN then median is calculated.
+ *
+ *  @return MAD of array on success, NAN otherwise.
+ **/
+float madf(const float *x, size_t n, const float *med) {
+    const float mad_scaling_factor = 1.4826;
+    if (NULL == x) {
+        return NAN;
+    }
+    if (1 == n) {
+        return 0.0f;
+    }
+
+    float *absdiff = malloc(n * sizeof(float));
+    if (NULL == absdiff) {
+        return NAN;
+    }
+
+    const float _med = (NULL == med) ? medianf(x, n) : *med;
+
+    for (size_t i = 0; i < n; i++) {
+        absdiff[i] = fabsf(x[i] - _med);
+    }
+
+    const float mad = medianf(absdiff, n);
+    free(absdiff);
+    return mad * mad_scaling_factor;
+}
+
 
 raw_table trim_and_segment_raw(raw_table rt, int trim_start, int trim_end, int varseg_chunk, float varseg_thresh) {
     RETURN_NULL_IF(NULL == rt.raw, (raw_table){0});


=====================================
src/thirdparty/scrappie/sse_mathfun.h deleted
=====================================
@@ -1,722 +0,0 @@
-/* SIMD (SSE1+MMX or SSE2) implementation of sin, cos, exp and log
-
-   Inspired by Intel Approximate Math library, and based on the
-   corresponding algorithms of the cephes math library
-
-   The default is to use the SSE1 version. If you define USE_SSE2 the
-   the SSE2 intrinsics will be used in place of the MMX intrinsics. Do
-   not expect any significant performance improvement with SSE2.
-*/
-
-/* Copyright (C) 2007  Julien Pommier
-
-  This software is provided 'as-is', without any express or implied
-  warranty.  In no event will the authors be held liable for any damages
-  arising from the use of this software.
-
-  Permission is granted to anyone to use this software for any purpose,
-  including commercial applications, and to alter it and redistribute it
-  freely, subject to the following restrictions:
-
-  1. The origin of this software must not be misrepresented; you must not
-     claim that you wrote the original software. If you use this software
-     in a product, an acknowledgment in the product documentation would be
-     appreciated but is not required.
-  2. Altered source versions must be plainly marked as such, and must not be
-     misrepresented as being the original software.
-  3. This notice may not be removed or altered from any source distribution.
-
-  (this is the zlib license)
-*/
-
-/* Copyright (C) 2016 Oxford Nanopore technologies
-
-   This software contain modifications (C) Oxford Nanopore Technologies
-   and is not the original software, which can be obtained from
-   http://gruntthepeon.free.fr/ssemath/
-*/
-#pragma once
-#ifndef SSE_MATHFUN_H
-#define SSE_MATHFUN_H
-
-
-#include <xmmintrin.h>
-
-/* yes I know, the top of this file is quite ugly */
-
-#ifdef _MSC_VER /* visual c++ */
-# define ALIGN16_BEG __declspec(align(16))
-# define ALIGN16_END
-#else /* gcc or icc */
-# define ALIGN16_BEG
-# define ALIGN16_END __attribute__((aligned(16)))
-#endif
-
-/* __m128 is ugly to write */
-typedef __m128 v4sf;  // vector of 4 float (sse1)
-
-#ifdef USE_SSE2
-# include <emmintrin.h>
-typedef __m128i v4si; // vector of 4 int (sse2)
-#else
-typedef __m64 v2si;   // vector of 2 int (mmx)
-#endif
-
-/* declare some SSE constants -- why can't I figure a better way to do that? */
-#define _PS_CONST(Name, Val)                                            \
-  static const ALIGN16_BEG float _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
-#define _PI32_CONST(Name, Val)                                            \
-  static const ALIGN16_BEG int _pi32_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
-#define _PS_CONST_TYPE(Name, Type, Val)                                 \
-  static const ALIGN16_BEG Type _ps_##Name[4] ALIGN16_END = { Val, Val, Val, Val }
-
-_PS_CONST(1  , 1.0f);
-_PS_CONST(0p5, 0.5f);
-/* the smallest non denormalized float number */
-_PS_CONST_TYPE(min_norm_pos, int, 0x00800000);
-_PS_CONST_TYPE(mant_mask, int, 0x7f800000);
-_PS_CONST_TYPE(inv_mant_mask, int, ~0x7f800000);
-
-_PS_CONST_TYPE(sign_mask, int, (int)0x80000000);
-_PS_CONST_TYPE(inv_sign_mask, int, ~0x80000000);
-
-_PI32_CONST(1, 1);
-_PI32_CONST(inv1, ~1);
-_PI32_CONST(2, 2);
-_PI32_CONST(4, 4);
-_PI32_CONST(0x7f, 0x7f);
-
-_PS_CONST(cephes_SQRTHF, 0.707106781186547524);
-_PS_CONST(cephes_log_p0, 7.0376836292E-2);
-_PS_CONST(cephes_log_p1, - 1.1514610310E-1);
-_PS_CONST(cephes_log_p2, 1.1676998740E-1);
-_PS_CONST(cephes_log_p3, - 1.2420140846E-1);
-_PS_CONST(cephes_log_p4, + 1.4249322787E-1);
-_PS_CONST(cephes_log_p5, - 1.6668057665E-1);
-_PS_CONST(cephes_log_p6, + 2.0000714765E-1);
-_PS_CONST(cephes_log_p7, - 2.4999993993E-1);
-_PS_CONST(cephes_log_p8, + 3.3333331174E-1);
-_PS_CONST(cephes_log_q1, -2.12194440e-4);
-_PS_CONST(cephes_log_q2, 0.693359375);
-
-#ifndef USE_SSE2
-typedef union xmm_mm_union {
-  __m128 xmm;
-  __m64 mm[2];
-} xmm_mm_union;
-
-#define COPY_XMM_TO_MM(xmm_, mm0_, mm1_) {          \
-    xmm_mm_union u; u.xmm = xmm_;                   \
-    mm0_ = u.mm[0];                                 \
-    mm1_ = u.mm[1];                                 \
-}
-
-#define COPY_MM_TO_XMM(mm0_, mm1_, xmm_) {                         \
-    xmm_mm_union u; u.mm[0]=mm0_; u.mm[1]=mm1_; xmm_ = u.xmm;      \
-  }
-
-#endif // USE_SSE2
-
-/* natural logarithm computed for 4 simultaneous float
-   return NaN for x <= 0
-*/
-static inline v4sf __attribute__((__always_inline__)) log_ps(v4sf x) {
-#ifdef USE_SSE2
-  v4si emm0;
-#else
-  v2si mm0, mm1;
-#endif
-  v4sf one = *(v4sf*)_ps_1;
-
-  v4sf invalid_mask = _mm_cmple_ps(x, _mm_setzero_ps());
-
-  x = _mm_max_ps(x, *(v4sf*)_ps_min_norm_pos);  /* cut off denormalized stuff */
-
-#ifndef USE_SSE2
-  /* part 1: x = frexpf(x, &e); */
-  COPY_XMM_TO_MM(x, mm0, mm1);
-  mm0 = _mm_srli_pi32(mm0, 23);
-  mm1 = _mm_srli_pi32(mm1, 23);
-#else
-  emm0 = _mm_srli_epi32(_mm_castps_si128(x), 23);
-#endif
-  /* keep only the fractional part */
-  x = _mm_and_ps(x, *(v4sf*)_ps_inv_mant_mask);
-  x = _mm_or_ps(x, *(v4sf*)_ps_0p5);
-
-#ifndef USE_SSE2
-  /* now e=mm0:mm1 contain the really base-2 exponent */
-  mm0 = _mm_sub_pi32(mm0, *(v2si*)_pi32_0x7f);
-  mm1 = _mm_sub_pi32(mm1, *(v2si*)_pi32_0x7f);
-  v4sf e = _mm_cvtpi32x2_ps(mm0, mm1);
-  _mm_empty(); /* bye bye mmx */
-#else
-  emm0 = _mm_sub_epi32(emm0, *(v4si*)_pi32_0x7f);
-  v4sf e = _mm_cvtepi32_ps(emm0);
-#endif
-
-  e = _mm_add_ps(e, one);
-
-  /* part2:
-     if( x < SQRTHF ) {
-       e -= 1;
-       x = x + x - 1.0;
-     } else { x = x - 1.0; }
-  */
-  v4sf mask = _mm_cmplt_ps(x, *(v4sf*)_ps_cephes_SQRTHF);
-  v4sf tmp = _mm_and_ps(x, mask);
-  x = _mm_sub_ps(x, one);
-  e = _mm_sub_ps(e, _mm_and_ps(one, mask));
-  x = _mm_add_ps(x, tmp);
-
-
-  v4sf z = _mm_mul_ps(x,x);
-
-  v4sf y = *(v4sf*)_ps_cephes_log_p0;
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p1);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p2);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p3);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p4);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p5);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p6);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p7);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_log_p8);
-  y = _mm_mul_ps(y, x);
-
-  y = _mm_mul_ps(y, z);
-
-
-  tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q1);
-  y = _mm_add_ps(y, tmp);
-
-
-  tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-
-  tmp = _mm_mul_ps(e, *(v4sf*)_ps_cephes_log_q2);
-  x = _mm_add_ps(x, y);
-  x = _mm_add_ps(x, tmp);
-  x = _mm_or_ps(x, invalid_mask); // negative arg will be NAN
-  return x;
-}
-
-_PS_CONST(exp_hi,	88.3762626647949f);
-_PS_CONST(exp_lo,	-88.3762626647949f);
-
-_PS_CONST(cephes_LOG2EF, 1.44269504088896341);
-_PS_CONST(cephes_exp_C1, 0.693359375);
-_PS_CONST(cephes_exp_C2, -2.12194440e-4);
-
-_PS_CONST(cephes_exp_p0, 1.9875691500E-4);
-_PS_CONST(cephes_exp_p1, 1.3981999507E-3);
-_PS_CONST(cephes_exp_p2, 8.3334519073E-3);
-_PS_CONST(cephes_exp_p3, 4.1665795894E-2);
-_PS_CONST(cephes_exp_p4, 1.6666665459E-1);
-_PS_CONST(cephes_exp_p5, 5.0000001201E-1);
-
-static inline __attribute__((__always_inline__)) v4sf exp_ps(v4sf x) {
-  v4sf tmp = _mm_setzero_ps(), fx;
-#ifdef USE_SSE2
-  v4si emm0;
-#else
-  v2si mm0, mm1;
-#endif
-  v4sf one = *(v4sf*)_ps_1;
-
-  x = _mm_min_ps(x, *(v4sf*)_ps_exp_hi);
-  x = _mm_max_ps(x, *(v4sf*)_ps_exp_lo);
-
-  /* express exp(x) as exp(g + n*log(2)) */
-  fx = _mm_mul_ps(x, *(v4sf*)_ps_cephes_LOG2EF);
-  fx = _mm_add_ps(fx, *(v4sf*)_ps_0p5);
-
-  /* how to perform a floorf with SSE: just below */
-#ifndef USE_SSE2
-  /* step 1 : cast to int */
-  tmp = _mm_movehl_ps(tmp, fx);
-  mm0 = _mm_cvttps_pi32(fx);
-  mm1 = _mm_cvttps_pi32(tmp);
-  /* step 2 : cast back to float */
-  tmp = _mm_cvtpi32x2_ps(mm0, mm1);
-#else
-  emm0 = _mm_cvttps_epi32(fx);
-  tmp  = _mm_cvtepi32_ps(emm0);
-#endif
-  /* if greater, substract 1 */
-  v4sf mask = _mm_cmpgt_ps(tmp, fx);
-  mask = _mm_and_ps(mask, one);
-  fx = _mm_sub_ps(tmp, mask);
-
-  tmp = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C1);
-  v4sf z = _mm_mul_ps(fx, *(v4sf*)_ps_cephes_exp_C2);
-  x = _mm_sub_ps(x, tmp);
-  x = _mm_sub_ps(x, z);
-
-  z = _mm_mul_ps(x,x);
-
-  v4sf y = *(v4sf*)_ps_cephes_exp_p0;
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p1);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p2);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p3);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p4);
-  y = _mm_mul_ps(y, x);
-  y = _mm_add_ps(y, *(v4sf*)_ps_cephes_exp_p5);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, x);
-  y = _mm_add_ps(y, one);
-
-  /* build 2^n */
-#ifndef USE_SSE2
-  z = _mm_movehl_ps(z, fx);
-  mm0 = _mm_cvttps_pi32(fx);
-  mm1 = _mm_cvttps_pi32(z);
-  mm0 = _mm_add_pi32(mm0, *(v2si*)_pi32_0x7f);
-  mm1 = _mm_add_pi32(mm1, *(v2si*)_pi32_0x7f);
-  mm0 = _mm_slli_pi32(mm0, 23);
-  mm1 = _mm_slli_pi32(mm1, 23);
-
-  v4sf pow2n;
-  COPY_MM_TO_XMM(mm0, mm1, pow2n);
-  _mm_empty();
-#else
-  emm0 = _mm_cvttps_epi32(fx);
-  emm0 = _mm_add_epi32(emm0, *(v4si*)_pi32_0x7f);
-  emm0 = _mm_slli_epi32(emm0, 23);
-  v4sf pow2n = _mm_castsi128_ps(emm0);
-#endif
-  y = _mm_mul_ps(y, pow2n);
-  return y;
-}
-
-_PS_CONST(minus_cephes_DP1, -0.78515625);
-_PS_CONST(minus_cephes_DP2, -2.4187564849853515625e-4);
-_PS_CONST(minus_cephes_DP3, -3.77489497744594108e-8);
-_PS_CONST(sincof_p0, -1.9515295891E-4);
-_PS_CONST(sincof_p1,  8.3321608736E-3);
-_PS_CONST(sincof_p2, -1.6666654611E-1);
-_PS_CONST(coscof_p0,  2.443315711809948E-005);
-_PS_CONST(coscof_p1, -1.388731625493765E-003);
-_PS_CONST(coscof_p2,  4.166664568298827E-002);
-_PS_CONST(cephes_FOPI, 1.27323954473516); // 4 / M_PI
-
-
-/* evaluation of 4 sines at onces, using only SSE1+MMX intrinsics so
-   it runs also on old athlons XPs and the pentium III of your grand
-   mother.
-
-   The code is the exact rewriting of the cephes sinf function.
-   Precision is excellent as long as x < 8192 (I did not bother to
-   take into account the special handling they have for greater values
-   -- it does not return garbage for arguments over 8192, though, but
-   the extra precision is missing).
-
-   Note that it is such that sinf((float)M_PI) = 8.74e-8, which is the
-   surprising but correct result.
-
-   Performance is also surprisingly good, 1.33 times faster than the
-   macos vsinf SSE2 function, and 1.5 times faster than the
-   __vrs4_sinf of amd's ACML (which is only available in 64 bits). Not
-   too bad for an SSE1 function (with no special tuning) !
-   However the latter libraries probably have a much better handling of NaN,
-   Inf, denormalized and other special arguments..
-
-   On my core 1 duo, the execution of this function takes approximately 95 cycles.
-
-   From what I have observed on the experiments with Intel AMath lib, switching to an
-   SSE2 version would improve the perf by only 10%.
-
-   Since it is based on SSE intrinsics, it has to be compiled at -O2 to
-   deliver full speed.
-*/
-static v4sf sin_ps(v4sf x) { // any x
-  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
-
-#ifdef USE_SSE2
-  v4si emm0, emm2;
-#else
-  v2si mm0, mm1, mm2, mm3;
-#endif
-  sign_bit = x;
-  /* take the absolute value */
-  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
-  /* extract the sign bit (upper one) */
-  sign_bit = _mm_and_ps(sign_bit, *(v4sf*)_ps_sign_mask);
-
-  /* scale by 4/Pi */
-  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
-
-#ifdef USE_SSE2
-  /* store the integer part of y in mm0 */
-  emm2 = _mm_cvttps_epi32(y);
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
-  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
-  y = _mm_cvtepi32_ps(emm2);
-
-  /* get the swap sign flag */
-  emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
-  emm0 = _mm_slli_epi32(emm0, 29);
-  /* get the polynom selection mask
-     there is one polynom for 0 <= x <= Pi/4
-     and another one for Pi/4<x<=Pi/2
-
-     Both branches will be computed.
-  */
-  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
-  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-
-  v4sf swap_sign_bit = _mm_castsi128_ps(emm0);
-  v4sf poly_mask = _mm_castsi128_ps(emm2);
-  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
-
-#else
-  /* store the integer part of y in mm0:mm1 */
-  xmm2 = _mm_movehl_ps(xmm2, y);
-  mm2 = _mm_cvttps_pi32(y);
-  mm3 = _mm_cvttps_pi32(xmm2);
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
-  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
-  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
-  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
-  y = _mm_cvtpi32x2_ps(mm2, mm3);
-  /* get the swap sign flag */
-  mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
-  mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
-  mm0 = _mm_slli_pi32(mm0, 29);
-  mm1 = _mm_slli_pi32(mm1, 29);
-  /* get the polynom selection mask */
-  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
-  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
-  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
-  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
-  v4sf swap_sign_bit, poly_mask;
-  COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit);
-  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
-  sign_bit = _mm_xor_ps(sign_bit, swap_sign_bit);
-  _mm_empty(); /* good-bye mmx */
-#endif
-
-  /* The magic pass: "Extended precision modular arithmetic"
-     x = ((x - y * DP1) - y * DP2) - y * DP3; */
-  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
-  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
-  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
-  xmm1 = _mm_mul_ps(y, xmm1);
-  xmm2 = _mm_mul_ps(y, xmm2);
-  xmm3 = _mm_mul_ps(y, xmm3);
-  x = _mm_add_ps(x, xmm1);
-  x = _mm_add_ps(x, xmm2);
-  x = _mm_add_ps(x, xmm3);
-
-  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-  y = *(v4sf*)_ps_coscof_p0;
-  v4sf z = _mm_mul_ps(x,x);
-
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
-  y = _mm_mul_ps(y, z);
-  y = _mm_mul_ps(y, z);
-  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-  y = _mm_add_ps(y, *(v4sf*)_ps_1);
-
-  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-  v4sf y2 = *(v4sf*)_ps_sincof_p0;
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_mul_ps(y2, x);
-  y2 = _mm_add_ps(y2, x);
-
-  /* select the correct result from the two polynoms */
-  xmm3 = poly_mask;
-  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
-  y = _mm_andnot_ps(xmm3, y);
-  y = _mm_add_ps(y,y2);
-  /* update the sign */
-  y = _mm_xor_ps(y, sign_bit);
-  return y;
-}
-
-/* almost the same as sin_ps */
-static v4sf cos_ps(v4sf x) { // any x
-  v4sf xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
-#ifdef USE_SSE2
-  v4si emm0, emm2;
-#else
-  v2si mm0, mm1, mm2, mm3;
-#endif
-  /* take the absolute value */
-  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
-
-  /* scale by 4/Pi */
-  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
-
-#ifdef USE_SSE2
-  /* store the integer part of y in mm0 */
-  emm2 = _mm_cvttps_epi32(y);
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
-  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
-  y = _mm_cvtepi32_ps(emm2);
-
-  emm2 = _mm_sub_epi32(emm2, *(v4si*)_pi32_2);
-
-  /* get the swap sign flag */
-  emm0 = _mm_andnot_si128(emm2, *(v4si*)_pi32_4);
-  emm0 = _mm_slli_epi32(emm0, 29);
-  /* get the polynom selection mask */
-  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
-  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-
-  v4sf sign_bit = _mm_castsi128_ps(emm0);
-  v4sf poly_mask = _mm_castsi128_ps(emm2);
-#else
-  /* store the integer part of y in mm0:mm1 */
-  xmm2 = _mm_movehl_ps(xmm2, y);
-  mm2 = _mm_cvttps_pi32(y);
-  mm3 = _mm_cvttps_pi32(xmm2);
-
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
-  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
-  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
-  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
-
-  y = _mm_cvtpi32x2_ps(mm2, mm3);
-
-
-  mm2 = _mm_sub_pi32(mm2, *(v2si*)_pi32_2);
-  mm3 = _mm_sub_pi32(mm3, *(v2si*)_pi32_2);
-
-  /* get the swap sign flag in mm0:mm1 and the
-     polynom selection mask in mm2:mm3 */
-
-  mm0 = _mm_andnot_si64(mm2, *(v2si*)_pi32_4);
-  mm1 = _mm_andnot_si64(mm3, *(v2si*)_pi32_4);
-  mm0 = _mm_slli_pi32(mm0, 29);
-  mm1 = _mm_slli_pi32(mm1, 29);
-
-  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
-  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
-
-  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
-  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
-
-  v4sf sign_bit, poly_mask;
-  COPY_MM_TO_XMM(mm0, mm1, sign_bit);
-  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
-  _mm_empty(); /* good-bye mmx */
-#endif
-  /* The magic pass: "Extended precision modular arithmetic"
-     x = ((x - y * DP1) - y * DP2) - y * DP3; */
-  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
-  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
-  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
-  xmm1 = _mm_mul_ps(y, xmm1);
-  xmm2 = _mm_mul_ps(y, xmm2);
-  xmm3 = _mm_mul_ps(y, xmm3);
-  x = _mm_add_ps(x, xmm1);
-  x = _mm_add_ps(x, xmm2);
-  x = _mm_add_ps(x, xmm3);
-
-  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-  y = *(v4sf*)_ps_coscof_p0;
-  v4sf z = _mm_mul_ps(x,x);
-
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
-  y = _mm_mul_ps(y, z);
-  y = _mm_mul_ps(y, z);
-  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-  y = _mm_add_ps(y, *(v4sf*)_ps_1);
-
-  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-  v4sf y2 = *(v4sf*)_ps_sincof_p0;
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_mul_ps(y2, x);
-  y2 = _mm_add_ps(y2, x);
-
-  /* select the correct result from the two polynoms */
-  xmm3 = poly_mask;
-  y2 = _mm_and_ps(xmm3, y2); //, xmm3);
-  y = _mm_andnot_ps(xmm3, y);
-  y = _mm_add_ps(y,y2);
-  /* update the sign */
-  y = _mm_xor_ps(y, sign_bit);
-
-  return y;
-}
-
-/* since sin_ps and cos_ps are almost identical, sincos_ps could replace both of them..
-   it is almost as fast, and gives you a free cosine with your sine */
-static void sincos_ps(v4sf x, v4sf *s, v4sf *c) {
-  v4sf xmm1, xmm2, xmm3 = _mm_setzero_ps(), sign_bit_sin, y;
-#ifdef USE_SSE2
-  v4si emm0, emm2, emm4;
-#else
-  v2si mm0, mm1, mm2, mm3, mm4, mm5;
-#endif
-  sign_bit_sin = x;
-  /* take the absolute value */
-  x = _mm_and_ps(x, *(v4sf*)_ps_inv_sign_mask);
-  /* extract the sign bit (upper one) */
-  sign_bit_sin = _mm_and_ps(sign_bit_sin, *(v4sf*)_ps_sign_mask);
-
-  /* scale by 4/Pi */
-  y = _mm_mul_ps(x, *(v4sf*)_ps_cephes_FOPI);
-
-#ifdef USE_SSE2
-  /* store the integer part of y in emm2 */
-  emm2 = _mm_cvttps_epi32(y);
-
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  emm2 = _mm_add_epi32(emm2, *(v4si*)_pi32_1);
-  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_inv1);
-  y = _mm_cvtepi32_ps(emm2);
-
-  emm4 = emm2;
-
-  /* get the swap sign flag for the sine */
-  emm0 = _mm_and_si128(emm2, *(v4si*)_pi32_4);
-  emm0 = _mm_slli_epi32(emm0, 29);
-  v4sf swap_sign_bit_sin = _mm_castsi128_ps(emm0);
-
-  /* get the polynom selection mask for the sine*/
-  emm2 = _mm_and_si128(emm2, *(v4si*)_pi32_2);
-  emm2 = _mm_cmpeq_epi32(emm2, _mm_setzero_si128());
-  v4sf poly_mask = _mm_castsi128_ps(emm2);
-#else
-  /* store the integer part of y in mm2:mm3 */
-  xmm3 = _mm_movehl_ps(xmm3, y);
-  mm2 = _mm_cvttps_pi32(y);
-  mm3 = _mm_cvttps_pi32(xmm3);
-
-  /* j=(j+1) & (~1) (see the cephes sources) */
-  mm2 = _mm_add_pi32(mm2, *(v2si*)_pi32_1);
-  mm3 = _mm_add_pi32(mm3, *(v2si*)_pi32_1);
-  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_inv1);
-  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_inv1);
-
-  y = _mm_cvtpi32x2_ps(mm2, mm3);
-
-  mm4 = mm2;
-  mm5 = mm3;
-
-  /* get the swap sign flag for the sine */
-  mm0 = _mm_and_si64(mm2, *(v2si*)_pi32_4);
-  mm1 = _mm_and_si64(mm3, *(v2si*)_pi32_4);
-  mm0 = _mm_slli_pi32(mm0, 29);
-  mm1 = _mm_slli_pi32(mm1, 29);
-  v4sf swap_sign_bit_sin;
-  COPY_MM_TO_XMM(mm0, mm1, swap_sign_bit_sin);
-
-  /* get the polynom selection mask for the sine */
-
-  mm2 = _mm_and_si64(mm2, *(v2si*)_pi32_2);
-  mm3 = _mm_and_si64(mm3, *(v2si*)_pi32_2);
-  mm2 = _mm_cmpeq_pi32(mm2, _mm_setzero_si64());
-  mm3 = _mm_cmpeq_pi32(mm3, _mm_setzero_si64());
-  v4sf poly_mask;
-  COPY_MM_TO_XMM(mm2, mm3, poly_mask);
-#endif
-
-  /* The magic pass: "Extended precision modular arithmetic"
-     x = ((x - y * DP1) - y * DP2) - y * DP3; */
-  xmm1 = *(v4sf*)_ps_minus_cephes_DP1;
-  xmm2 = *(v4sf*)_ps_minus_cephes_DP2;
-  xmm3 = *(v4sf*)_ps_minus_cephes_DP3;
-  xmm1 = _mm_mul_ps(y, xmm1);
-  xmm2 = _mm_mul_ps(y, xmm2);
-  xmm3 = _mm_mul_ps(y, xmm3);
-  x = _mm_add_ps(x, xmm1);
-  x = _mm_add_ps(x, xmm2);
-  x = _mm_add_ps(x, xmm3);
-
-#ifdef USE_SSE2
-  emm4 = _mm_sub_epi32(emm4, *(v4si*)_pi32_2);
-  emm4 = _mm_andnot_si128(emm4, *(v4si*)_pi32_4);
-  emm4 = _mm_slli_epi32(emm4, 29);
-  v4sf sign_bit_cos = _mm_castsi128_ps(emm4);
-#else
-  /* get the sign flag for the cosine */
-  mm4 = _mm_sub_pi32(mm4, *(v2si*)_pi32_2);
-  mm5 = _mm_sub_pi32(mm5, *(v2si*)_pi32_2);
-  mm4 = _mm_andnot_si64(mm4, *(v2si*)_pi32_4);
-  mm5 = _mm_andnot_si64(mm5, *(v2si*)_pi32_4);
-  mm4 = _mm_slli_pi32(mm4, 29);
-  mm5 = _mm_slli_pi32(mm5, 29);
-  v4sf sign_bit_cos;
-  COPY_MM_TO_XMM(mm4, mm5, sign_bit_cos);
-  _mm_empty(); /* good-bye mmx */
-#endif
-
-  sign_bit_sin = _mm_xor_ps(sign_bit_sin, swap_sign_bit_sin);
-
-
-  /* Evaluate the first polynom  (0 <= x <= Pi/4) */
-  v4sf z = _mm_mul_ps(x,x);
-  y = *(v4sf*)_ps_coscof_p0;
-
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p1);
-  y = _mm_mul_ps(y, z);
-  y = _mm_add_ps(y, *(v4sf*)_ps_coscof_p2);
-  y = _mm_mul_ps(y, z);
-  y = _mm_mul_ps(y, z);
-  v4sf tmp = _mm_mul_ps(z, *(v4sf*)_ps_0p5);
-  y = _mm_sub_ps(y, tmp);
-  y = _mm_add_ps(y, *(v4sf*)_ps_1);
-
-  /* Evaluate the second polynom  (Pi/4 <= x <= 0) */
-
-  v4sf y2 = *(v4sf*)_ps_sincof_p0;
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p1);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_add_ps(y2, *(v4sf*)_ps_sincof_p2);
-  y2 = _mm_mul_ps(y2, z);
-  y2 = _mm_mul_ps(y2, x);
-  y2 = _mm_add_ps(y2, x);
-
-  /* select the correct result from the two polynoms */
-  xmm3 = poly_mask;
-  v4sf ysin2 = _mm_and_ps(xmm3, y2);
-  v4sf ysin1 = _mm_andnot_ps(xmm3, y);
-  y2 = _mm_sub_ps(y2,ysin2);
-  y = _mm_sub_ps(y, ysin1);
-
-  xmm1 = _mm_add_ps(ysin1,ysin2);
-  xmm2 = _mm_add_ps(y,y2);
-
-  /* update the sign */
-  *s = _mm_xor_ps(xmm1, sign_bit_sin);
-  *c = _mm_xor_ps(xmm2, sign_bit_cos);
-}
-#endif /* SSE_MATHFUN_H */


=====================================
src/thirdparty/scrappie/util.c deleted
=====================================
@@ -1,288 +0,0 @@
-// Too many calls to quantile lead to high failure rate for `scrappie raw`
-#define BANANA 1
-#include <assert.h>
-#include <err.h>
-#include <math.h>
-#include "scrappie_stdlib.h"
-#include "util.h"
-
-int argmaxf(const float *x, int n) {
-    assert(n > 0);
-    if (NULL == x) {
-        return -1;
-    }
-    int imax = 0;
-    float vmax = x[0];
-    for (int i = 1; i < n; i++) {
-        if (x[i] > vmax) {
-            vmax = x[i];
-            imax = i;
-        }
-    }
-    return imax;
-}
-
-int argminf(const float *x, int n) {
-    assert(n > 0);
-    if (NULL == x) {
-        return -1;
-    }
-    int imin = 0;
-    float vmin = x[0];
-    for (int i = 1; i < n; i++) {
-        if (x[i] > vmin) {
-            vmin = x[i];
-            imin = i;
-        }
-    }
-    return imin;
-}
-
-float valmaxf(const float *x, int n) {
-    assert(n > 0);
-    if (NULL == x) {
-        return NAN;
-    }
-    float vmax = x[0];
-    for (int i = 1; i < n; i++) {
-        if (x[i] > vmax) {
-            vmax = x[i];
-        }
-    }
-    return vmax;
-}
-
-float valminf(const float *x, int n) {
-    assert(n > 0);
-    if (NULL == x) {
-        return NAN;
-    }
-    float vmin = x[0];
-    for (int i = 1; i < n; i++) {
-        if (x[i] > vmin) {
-            vmin = x[i];
-        }
-    }
-    return vmin;
-}
-
-int floatcmp(const void *x, const void *y) {
-    float d = *(float *)x - *(float *)y;
-    if (d > 0) {
-        return 1;
-    }
-    return -1;
-}
-
-/**  Quantiles from n array
- *
- *  Using a relatively inefficent qsort resulting in O(n log n)
- *  performance but better performance is possible for small np.
- *  The array p is modified inplace, containing which quantiles to
- *  calculation on input and the quantiles on output; on error, p
- *  is filled with the value NAN.
- *
- *  @param x An array to calculate quantiles from
- *  @param nx Length of array x
- *  @param p An array containing quantiles to calculate [in/out]
- *  @param np Length of array p
- *
- *  @return void
- **/
-void quantilef(const float *x, size_t nx, float *p, size_t np) {
-    if (NULL == p) {
-        return;
-    }
-    for (int i = 0; i < np; i++) {
-        assert(p[i] >= 0.0f && p[i] <= 1.0f);
-    }
-    if (NULL == x) {
-        for (int i = 0; i < np; i++) {
-            p[i] = NAN;
-        }
-        return;
-    }
-    // Sort array
-    float *space = malloc(nx * sizeof(float));
-    if (NULL == space) {
-        for (int i = 0; i < np; i++) {
-            p[i] = NAN;
-        }
-        return;
-    }
-    memcpy(space, x, nx * sizeof(float));
-    qsort(space, nx, sizeof(float), floatcmp);
-
-    // Extract quantiles
-    for (int i = 0; i < np; i++) {
-        const size_t idx = p[i] * (nx - 1);
-        const float remf = p[i] * (nx - 1) - idx;
-        if (idx < nx - 1) {
-            p[i] = (1.0 - remf) * space[idx] + remf * space[idx + 1];
-        } else {
-            // Should only occur when p is exactly 1.0
-            p[i] = space[idx];
-        }
-    }
-
-    free(space);
-    return;
-}
-
-/** Median of an array
- *
- *  Using a relatively inefficent qsort resulting in O(n log n)
- *  performance but O(n) is possible.
- *
- *  @param x An array to calculate median of
- *  @param n Length of array
- *
- *  @return Median of array on success, NAN otherwise.
- **/
-float medianf(const float *x, size_t n) {
-    float p = 0.5;
-    quantilef(x, n, &p, 1);
-    return p;
-}
-
-/** Median Absolute Deviation of an array
- *
- *  @param x An array to calculate the MAD of
- *  @param n Length of array
- *  @param med Median of the array.  If NAN then median is calculated.
- *
- *  @return MAD of array on success, NAN otherwise.
- **/
-float madf(const float *x, size_t n, const float *med) {
-    const float mad_scaling_factor = 1.4826;
-    if (NULL == x) {
-        return NAN;
-    }
-    if (1 == n) {
-        return 0.0f;
-    }
-
-    float *absdiff = malloc(n * sizeof(float));
-    if (NULL == absdiff) {
-        return NAN;
-    }
-
-    const float _med = (NULL == med) ? medianf(x, n) : *med;
-
-    for (size_t i = 0; i < n; i++) {
-        absdiff[i] = fabsf(x[i] - _med);
-    }
-
-    const float mad = medianf(absdiff, n);
-    free(absdiff);
-    return mad * mad_scaling_factor;
-}
-
-/** Med-MAD normalisation of an array
- *
- *  Normalise an array using the median and MAD as measures of
- *  location and scale respectively.  The array is updated inplace.
- *
- *  @param x An array containing values to normalise
- *  @param n Length of array
- *  @return void
- **/
-void medmad_normalise_array(float *x, size_t n) {
-    if (NULL == x) {
-        return;
-    }
-    if (1 == n) {
-        x[0] = 0.0;
-        return;
-    }
-
-    const float xmed = medianf(x, n);
-    const float xmad = madf(x, n, &xmed);
-    for (int i = 0; i < n; i++) {
-        x[i] = (x[i] - xmed) / xmad;
-    }
-}
-
-/** Studentise array using Kahan summation algorithm
- *
- *  Studentise an array using the Kahan summation
- *  algorithm for numerical stability. The array is updated inplace.
- *  https://en.wikipedia.org/wiki/Kahan_summation_algorithm
- *
- *  @param x An array to normalise
- *  @param n Length of array
- *  @return void
- **/
-void studentise_array_kahan(float *x, size_t n) {
-    if (NULL == x) {
-        return;
-    }
-
-    double sum, sumsq, comp, compsq;
-    sumsq = sum = comp = compsq = 0.0;
-    for (int i = 0; i < n; i++) {
-        double d1 = x[i] - comp;
-        double sum_tmp = sum + d1;
-        comp = (sum_tmp - sum) - d1;
-        sum = sum_tmp;
-
-        double d2 = x[i] * x[i] - compsq;
-        double sumsq_tmp = sumsq + d2;
-        compsq = (sumsq_tmp - sumsq) - d2;
-        sumsq = sumsq_tmp;
-    }
-    sum /= n;
-    sumsq /= n;
-    sumsq -= sum * sum;
-
-    sumsq = sqrt(sumsq);
-
-    const float sumf = sum;
-    const float sumsqf = sumsq;
-    for (int i = 0; i < n; i++) {
-        x[i] = (x[i] - sumf) / sumsqf;
-    }
-}
-
-bool equality_array(double const * x, double const * y, size_t n, double const tol){
-
-    if(NULL == x || NULL == y){
-        return NULL == x && NULL == y;
-    }
-    for(size_t i=0 ; i < n ; i++){
-        if(fabs(x[i] - y[i]) > tol){
-            warnx("Failure at elt %zu: %f %f\n", i, x[i], y[i]);
-            return false;
-        }
-    }
-
-    return true;
-}
-
-bool equality_arrayf(float const * x, float const * y, size_t n, float const tol){
-    if(NULL == x || NULL == y){
-        return NULL == x && NULL == y;
-    }
-    for(size_t i=0 ; i < n ; i++){
-        if(fabsf(x[i] - y[i]) > tol){
-            warnx("Failure at elt %zu: %f %f\n", i, x[i], y[i]);
-            return false;
-        }
-    }
-
-    return true;
-}
-
-bool equality_arrayi(int const * x, int const * y, size_t n){
-    if(NULL == x || NULL == y){
-        return NULL == x && NULL == y;
-    }
-    for(size_t i=0 ; i < n ; i++){
-        if(x[i] != y[i]){
-            warnx("Failure at elt %zu: %d %d\n", i, x[i], y[i]);
-            return false;
-        }
-    }
-
-    return true;
-}


=====================================
src/thirdparty/scrappie/util.h deleted
=====================================
@@ -1,197 +0,0 @@
-#pragma once
-#ifndef UTIL_H
-#    define UTIL_H
-#    include <immintrin.h>
-#    include <math.h>
-#    include <stdbool.h>
-#    include <stdint.h>
-#    include <stdio.h>
-#    include "sse_mathfun.h"
-
-#    ifdef FAST_LOG
-#        define LOGFV fast_logfv
-#    else
-#        define LOGFV logfv
-#    endif
-
-#    ifdef FAST_EXP
-#        define EXPFV fast_expfv
-#    else
-#        define EXPFV expfv
-#    endif
-
-#    ifdef FAST_TANH
-#        define TANHFV fast_tanhfv
-#    else
-#        define TANHFV tanhfv
-#    endif
-
-#    ifdef FAST_LOGISTIC
-#        define LOGISTICFV fast_logisticfv
-#    else
-#        define LOGISTICFV logisticfv
-#    endif
-
-#    ifdef FAST_ELU
-#        define ELUFV fast_elufv
-#    else
-#        define ELUFV elufv
-#    endif
-
-/* Create a vector of  ones.  */
-extern __inline __m128 __attribute__ ((__gnu_inline__, __always_inline__))
-    _mm_setone_ps(void) {
-    return __extension__(__m128) {
-    1.0f, 1.0f, 1.0f, 1.0f};
-}
-
-extern __inline __m128d __attribute__ ((__gnu_inline__, __always_inline__))
-    _mm_setone_pd(void) {
-    return __extension__(__m128d) {
-    1.0, 1.0};
-}
-
-
-/**
- *   Standard implementations
- **/
-static inline float logisticf(float x) {
-    return 1.0f / (1.0f + expf(-x));
-}
-
-static inline float eluf(float x){
-    return (x >= 0.0f) ? x : expm1f(x);
-}
-
-// Constants for fast exp approximation.  See Schraudolph (1999)
-#    define _A 12102203.161561485f
-//  Minimum maximum relative error
-//#    define _B 1064986822.5027076f
-//  No bias at zero
-#    define _B 1065353216.0f
-//  Minimum RMS relative error
-//#    define _B 1064866803.6193156f
-//  Minimum mean relative error
-//#    define _B 1064807268.0425934f
-#    define _BOUND 88.02969193111305
-static inline float fast_expf(float x) {
-    x = fmaxf(-_BOUND, fminf(_BOUND, x));
-    union {
-        uint32_t i;
-        float f;
-    } value = {
-    .i = (uint32_t) (_A * x + _B)};
-    return value.f;
-}
-
-static inline float fast_logisticf(float x) {
-    return 1.0 / (1.0 + fast_expf(-x));
-}
-
-static inline float fast_tanhf(float x) {
-    const float y = fast_logisticf(x + x);
-    return y + y - 1.0;
-}
-
-static inline float fast_eluf(float x){
-    return (x >= 0.0f) ? x : (fast_expf(x) - 1.0);
-}
-
-/**
- *   Vectorised functions
- **/
-static inline __m128 __attribute__ ((__always_inline__)) expfv(__m128 x) {
-    __v4sf y = (__v4sf) x;
-    return (__m128) exp_ps(y);
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) logfv(__m128 x) {
-    __v4sf y = (__v4sf) x;
-    return (__m128) log_ps(y);
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) logisticfv(__m128 x) {
-    const __m128 ones = _mm_setone_ps();
-    return _mm_div_ps(ones, _mm_add_ps(ones, expfv(-x)));
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) tanhfv(__m128 x) {
-    const __m128 y = logisticfv(_mm_add_ps(x, x));
-    return _mm_sub_ps(_mm_add_ps(y, y), _mm_setone_ps());
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) elufv(__m128 x) {
-    if(0 == _mm_movemask_ps(x)){
-        // All positive, early return.
-        return x;
-    }
-    const __m128 mask = _mm_cmpge_ps(x, _mm_setzero_ps());
-    const __m128 y = expfv(x) - _mm_setone_ps();
-    return _mm_or_ps(_mm_and_ps(mask, x),  _mm_andnot_ps(mask, y));
-}
-
-/**
- *    Fast vectorised approximations
- **/
-static inline __m128 fast_expfv(__m128 x) {
-    const __m128 a = (__m128) (__v4sf) { _A, _A, _A, _A };
-    const __m128 b = (__m128) (__v4sf) { _B, _B, _B, _B };
-    const __m128 _bound = (__m128) (__v4sf) { _BOUND, _BOUND, _BOUND, _BOUND };
-    x = _mm_max_ps(-_bound, _mm_min_ps(_bound, x));
-
-    __m128 y = a * x + b;
-    return _mm_castsi128_ps(_mm_cvtps_epi32(y));
-}
-
-static inline __m128 fast_logfv(__m128 x) {
-#    define _Alogfv 8.262958294867817e-08f
-#    define _Blogfv 1064872507.1541044f
-    const __m128 a = (__m128) (__v4sf) { _Alogfv, _Alogfv, _Alogfv, _Alogfv };
-    const __m128 b = (__m128) (__v4sf) { _Blogfv, _Blogfv, _Blogfv, _Blogfv };
-    x = _mm_cvtepi32_ps(_mm_castps_si128(x));
-    return a * (x - b);
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) fast_logisticfv(__m128 x) {
-    return _mm_rcp_ps(_mm_add_ps(_mm_setone_ps(), fast_expfv(-x)));
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) fast_tanhfv(__m128 x) {
-    const __m128 y = fast_logisticfv(_mm_add_ps(x, x));
-    return _mm_sub_ps(_mm_add_ps(y, y), _mm_setone_ps());
-}
-
-static inline __m128 __attribute__ ((__always_inline__)) fast_elufv(__m128 x) {
-    if(0 == _mm_movemask_ps(x)){
-        // All positive, early return.
-        return x;
-    }
-    const __m128 mask = _mm_cmpge_ps(x, _mm_setzero_ps());
-    const __m128 y = fast_expfv(x) - _mm_setone_ps();
-    return _mm_or_ps(_mm_and_ps(mask, x),  _mm_andnot_ps(mask, y));
-}
-
-int argmaxf(const float *x, int n);
-int argminf(const float *x, int n);
-float valmaxf(const float *x, int n);
-float valminf(const float *x, int n);
-
-static inline int iceil(int x, int y) {
-    return (x + y - 1) / y;
-}
-
-static inline int ifloor(int x, int y) {
-    return x / y;
-}
-
-void quantilef(const float *x, size_t nx, float *p, size_t np);
-float medianf(const float *x, size_t n);
-float madf(const float *x, size_t n, const float *med);
-void medmad_normalise_array(float *x, size_t n);
-void studentise_array_kahan(float *x, size_t n);
-
-bool equality_array(double const * x, double const * y, size_t n, double const tol);
-bool equality_arrayf(float const * x, float const * y, size_t n, float const tol);
-bool equality_arrayi(int const * x, int const * y, size_t n);
-
-#endif                          /* UTIL_H */


=====================================
src/training_core.cpp
=====================================
@@ -8,18 +8,12 @@ using std::vector;
 using std::multiset;
 using std::endl;
 
-const bool use_multiset_logsum = 
-#ifndef USE_MULTISET_LOGSUM
-    false;
-#else
-    true;
-#endif
+const bool use_multiset_logsum = true;
 
 ParamMixture train_gaussian_mixture(const vector< StateTrainingData >& data, const ParamMixture& input_mixture)
 {
     size_t n_components = input_mixture.params.size();
     size_t n_data = data.size();
-    float log_n_data = std::log(n_data);
     assert(input_mixture.log_weights.size() == n_components);
     ParamMixture curr_mixture = input_mixture;
 
@@ -76,6 +70,16 @@ ParamMixture train_gaussian_mixture(const vector< StateTrainingData >& data, con
             }
         }
 
+        // compute logsum
+        //
+        logsumset< float > denom_terms(use_multiset_logsum);
+        for (size_t j = 0; j < n_components; ++j) {
+            for (size_t i = 0; i < n_data; ++i) {
+                denom_terms.add(log_resp[i][j]);
+            }
+        }
+        float log_n_data = denom_terms.val();
+
         // update weights
         //
         //   w'[j] := sum_i resp[i][j] / n_data



View it on GitLab: https://salsa.debian.org/med-team/nanopolish/commit/64e79bf282fc4c2aaad5fe45bfac8226b79d97bd

-- 
View it on GitLab: https://salsa.debian.org/med-team/nanopolish/commit/64e79bf282fc4c2aaad5fe45bfac8226b79d97bd
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190124/9fbba56f/attachment-0001.html>