[med-svn] [lofreq] 01/03: Imported Upstream version 2.1.2+ds

Afif Elghraoui afif-guest at moszumanska.debian.org
Thu Dec 24 08:07:20 UTC 2015


This is an automated email from the git hooks/post-receive script.

afif-guest pushed a commit to branch master
in repository lofreq.

commit 08970cb103c1a805d19cdcedeb0e39669aca929a
Author: Afif Elghraoui <afif at ghraoui.name>
Date:   Wed Dec 23 23:26:23 2015 -0800

    Imported Upstream version 2.1.2+ds
---
 .gitignore                                       |   41 +
 .travis.yml                                      |   13 +
 AUTHORS                                          |   24 +
 LICENSE                                          |   34 +
 Makefile.am                                      |   18 +
 NEWS                                             |    0
 README.md                                        |   64 +
 binary_installer.sh                              |   58 +
 bootstrap                                        |   23 +
 configure.ac                                     |  226 +
 devel-doc/FIXME                                  |   24 +
 devel-doc/autotools.README                       |   46 +
 devel-doc/best-practices.txt                     |   31 +
 devel-doc/debug.README                           |   42 +
 devel-doc/dist.README                            |   23 +
 devel-doc/docker.README                          |   40 +
 devel-doc/git.README                             |    7 +
 m4/ax_pthread.m4                                 |  317 +
 make_release.sh                                  |   35 +
 src/cdflib90.README                              |   46 +
 src/cdflib90/.gitignore                          |   15 +
 src/cdflib90/Makefile.am                         |    5 +
 src/cdflib90/cdflib.h                            |   90 +
 src/cdflib90/dcdflib.c                           | 9085 ++++++++++++++++++++++
 src/cdflib90/ipmpar.c                            |  426 +
 src/cdflib90/readme                              |  384 +
 src/lofreq/.gitignore                            |   19 +
 src/lofreq/Makefile.am                           |   32 +
 src/lofreq/bam_index.c                           |  110 +
 src/lofreq/bam_index.c.LICENSE                   |   37 +
 src/lofreq/bam_index.c.README                    |    2 +
 src/lofreq/bam_md_ext.c                          |  495 ++
 src/lofreq/bam_md_ext.h                          |   34 +
 src/lofreq/bedidx.c                              |  258 +
 src/lofreq/bedidx.c.LICENSE                      |   37 +
 src/lofreq/binom.c                               |  135 +
 src/lofreq/binom.h                               |   35 +
 src/lofreq/defaults.h                            |  111 +
 src/lofreq/fet.c                                 |  116 +
 src/lofreq/fet.c.LICENSE                         |   25 +
 src/lofreq/fet.h                                 |    6 +
 src/lofreq/kprobaln_ext.c                        |  307 +
 src/lofreq/kprobaln_ext.h                        |   50 +
 src/lofreq/lofreq_alnqual.c                      |  169 +
 src/lofreq/lofreq_alnqual.h                      |   35 +
 src/lofreq/lofreq_bamstats.c                     |  491 ++
 src/lofreq/lofreq_bamstats.h                     |   35 +
 src/lofreq/lofreq_call.c                         | 1558 ++++
 src/lofreq/lofreq_call.h                         |   33 +
 src/lofreq/lofreq_checkref.c                     |   75 +
 src/lofreq/lofreq_checkref.h                     |   33 +
 src/lofreq/lofreq_filter.c                       | 1262 +++
 src/lofreq/lofreq_filter.h                       |   33 +
 src/lofreq/lofreq_indelqual.c                    |  453 ++
 src/lofreq/lofreq_indelqual.h                    |   33 +
 src/lofreq/lofreq_index.c                        |   52 +
 src/lofreq/lofreq_index.h                        |    8 +
 src/lofreq/lofreq_main.c                         |  345 +
 src/lofreq/lofreq_uniq.c                         |  778 ++
 src/lofreq/lofreq_uniq.h                         |   33 +
 src/lofreq/lofreq_vcfset.c                       |  539 ++
 src/lofreq/lofreq_vcfset.h                       |   33 +
 src/lofreq/lofreq_viterbi.c                      |  477 ++
 src/lofreq/lofreq_viterbi.h                      |   35 +
 src/lofreq/log.c                                 |   48 +
 src/lofreq/log.h                                 |   55 +
 src/lofreq/multtest.c                            |  603 ++
 src/lofreq/multtest.h                            |   67 +
 src/lofreq/plp.c                                 | 1455 ++++
 src/lofreq/plp.h                                 |  173 +
 src/lofreq/samutils.c                            |  669 ++
 src/lofreq/samutils.h                            |   95 +
 src/lofreq/snpcaller.c                           | 1278 +++
 src/lofreq/snpcaller.h                           |  103 +
 src/lofreq/utils.c                               |  708 ++
 src/lofreq/utils.h                               |  141 +
 src/lofreq/vcf.c                                 |  941 +++
 src/lofreq/vcf.h                                 |  130 +
 src/lofreq/viterbi.c                             |  358 +
 src/lofreq/viterbi.h                             |   33 +
 src/scripts/Makefile.am                          |    4 +
 src/scripts/README                               |    1 +
 src/scripts/lofreq2_call_pparallel.py            |  742 ++
 src/scripts/lofreq2_local.py                     |   31 +
 src/scripts/lofreq2_somatic.py                   |  844 ++
 src/tools/.gitignore                             |    2 +
 src/tools/Makefile.am                            |   32 +
 src/tools/README                                 |    2 +
 src/tools/lofreq_star/__init__.py                |    2 +
 src/tools/lofreq_star/fdr.py                     |   56 +
 src/tools/lofreq_star/multiple_testing.py        |  113 +
 src/tools/lofreq_star/multiple_testing.py.README |   12 +
 src/tools/lofreq_star/multiple_testing.py.org    |  123 +
 src/tools/lofreq_star/utils.py                   |  142 +
 src/tools/phased_out/lofreq2_filter.py           |  577 ++
 src/tools/phased_out/lofreq2_vcfset.py           |  235 +
 src/tools/phased_out/vcf.py                      |  650 ++
 src/tools/phased_out/vcf.py.README               |   27 +
 src/tools/scripts/lofreq2_add_sample.py          |  312 +
 src/tools/scripts/lofreq2_analyze_somatic_fn.py  |  123 +
 src/tools/scripts/lofreq2_bias.py                |  356 +
 src/tools/scripts/lofreq2_cluster.py             |  304 +
 src/tools/scripts/lofreq2_indel_ovlp.py          |  115 +
 src/tools/scripts/lofreq2_local.py               |   31 +
 src/tools/scripts/lofreq2_vcfplot.py             |  613 ++
 src/tools/scripts/mutect_alt_allele_in_normal.py |  229 +
 src/tools/setup.py                               |   56 +
 src/tools/setup_conf.py.README                   |    2 +
 src/tools/setup_conf.py.in                       |    6 +
 tests/.gitignore                                 |    1 +
 tests/af_tests.sh                                |   59 +
 tests/alnqual.sh.FIXME                           |    2 +
 tests/bamstats.sh.FIXME                          |   40 +
 tests/baq-calls-less-than-nobaq.sh               |   50 +
 tests/bed.sh                                     |   13 +
 tests/bgzf_getline.supp                          |   22 +
 tests/binom_vs_poisson.FIXME                     |    3 +
 tests/bonf_auto_vs_dyn.sh                        |   84 +
 tests/consvar_noqual_filter.sh                   |   16 +
 tests/denv2-pseudoclonal-source-qual.sh          |   56 +
 tests/denv2-pseudoclonal.sh                      |   55 +
 tests/denv2-simulation.sh                        |   97 +
 tests/denv2-validation.sh                        |   65 +
 tests/diff_opts_same_out.sh.OLD                  |   65 +
 tests/doctest.sh                                 |   17 +
 tests/ecoli-clone_incl_parallel.sh               |   70 +
 tests/ecoli_spikein.sh                           |   66 +
 tests/exome_in_silico.sh.FIXME                   |    0
 tests/faidx_fetch_seq.supp                       |   12 +
 tests/fdr.sh                                     |   27 +
 tests/filter.sh                                  |  122 +
 tests/filter_c.sh                                |  181 +
 tests/filter_only_snvs_or_indels.sh              |   17 +
 tests/icgc-tcga-dream-indel_chr19.sh             |   86 +
 tests/icgc-tcga-dream-testproject.sh             |   67 +
 tests/indel_misc.sh                              |   31 +
 tests/indel_qual.sh                              |   34 +
 tests/indels.sh.FIXME                            |    3 +
 tests/lewis_known.sh.FIXME                       |    0
 tests/lib.sh                                     |   36 +
 tests/no_snvs_on_cons_indels.sh.FIXME            |    5 +
 tests/not-matching-ref.sh                        |   19 +
 tests/parallel.sh                                |   71 +
 tests/pseudomonas_jade.sh.FIXME                  |    0
 tests/pylint.rc                                  |  238 +
 tests/pylint.sh                                  |   25 +
 tests/run_all.sh                                 |   39 +
 tests/somatic_CHH966_chr22.sh                    |   38 +
 tests/uniq.sh                                    |   80 +
 tests/valgrind_call.sh                           |   75 +
 tests/valgrind_uniq.sh                           |   38 +
 tests/valgrind_vcfset.sh                         |   30 +
 tests/valid_vcf_output.sh                        |   22 +
 tests/vcf_setop.sh                               |  111 +
 tests/viterbi.sh                                 |   19 +
 155 files changed, 34242 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0640166
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,41 @@
+# auto generated files
+MANIFEST
+
+# backup files
+*~
+*#
+
+# objects
+*.so
+*.pyc
+build/
+lofreq/.fuse_hidden*
+.fuse_hidden*
+
+# tmp
+tmp/
+schmock*
+clang_output*
+nohup.out
+gmon.*
+_*
+
+# automake stuff which gets rebuild after autoreconf
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache/
+config.*
+configure
+install-sh
+libtool
+ltmain.sh
+missing
+depcomp
+INSTALL
+compile
+
+m4/*m4
+*.plist
+
+tests/*log
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..a9655a2
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,13 @@
+language: c
+compiler:
+  - gcc
+  - clang
+# Change this to your needs
+# Change this to your needs
+before_script:
+  - wget 'http://downloads.sourceforge.net/project/samtools/samtools/1.1/samtools-1.1.tar.bz2' -O /tmp/samtools-1.1.tar.bz2
+  - tar -xjf /tmp/samtools-1.1.tar.bz2
+  - cd samtools-1.1/
+  - make
+  - cd ..
+script: libtoolize; ./bootstrap && ./configure SAMTOOLS=${PWD}/samtools-1.1/ HTSLIB=${PWD}/samtools-1.1/htslib-1.1/ && make
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..b02f0b7
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,24 @@
+LoFreq-Star authors:
+- Niranjan Nagarajan:
+  had all the brilliant ideas
+- Grace Hui Ting Yeo:
+  implemented and tested the indel calling functions
+- Andreas Wilm:
+  did all the rest
+
+
+LoFreq-Star uses some BSD licensed pieces of external software. See
+READMEs in sub-directories for a list of authors whose software we
+incorporate.
+
+
+LoFreq-Star is based on LoFreq (<1.0), which is described here
+http://www.ncbi.nlm.nih.gov/pubmed/23066108. For that version:
+- Niranjan Nagarajan:
+  sketched out the core algorithm and implemented the pruned DP
+  properly
+- Grace Hui Ting Yeo:
+  implemented an early prototype of the EM (NQ) caller
+- Andreas Wilm:
+  did all the rest
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..5f67939
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,34 @@
+LoFreq is licensed under the MIT License (see below).
+
+Licenses for third party software that is part of the source:
+- cdflib90 (see src/cdflib90.README)
+- uthash (see src/uthash/LICENSE)
+
+Licenses external libraries (part of the statically compiled binary):
+- samtools (see src/samtools-1.1.LICENSE)
+- htslib (see src/htslib-1.1.LICENSE)
+
+----------------------------------------------------------------------
+
+The MIT License (MIT)
+
+Copyright (c) 2013,2014 Genome Institute of Singapore
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..1f17078
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,18 @@
+# Top-level Makefile.am for LoFreq
+#
+# based on Makefile.am for Clustal Omega
+
+# we do not use GNU-style files: NEWS README AUTHORS ChangeLog
+# AUTOMAKE_OPTIONS = foreign
+
+if ENABLE_TOOLS
+TOOLS = src/tools
+endif
+
+LICENSES = LICENSE src/cdflib90.README src/uthash/LICENSE
+EXTRA_DIST = binary_installer.sh $(LICENSES)
+
+SUBDIRS = src/cdflib90 src/uthash src/lofreq src/scripts $(TOOLS) 
+bug-tests: all
+	cd tests && $(SHELL) run_all.sh;
+
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..55082be
--- /dev/null
+++ b/README.md
@@ -0,0 +1,64 @@
+# LoFreq*: A sequence-quality aware, ultra-sensitive variant caller for NGS data
+
+
+
+## Note
+
+Most users will want to use either the binary or the source-code
+package. Both are distributed via
+[LoFreq's Sourceforge site](https://sourceforge.net/projects/lofreq/files/).
+The source hosted here on github is mainly for developers!
+
+
+
+## Building the Source
+
+### Current Build Status
+
+[![Build Status](https://travis-ci.org/CSB5/lofreq.svg?branch=master)](https://travis-ci.org/CSB5/lofreq)
+
+### Prerequisites
+
+You will need:
+
+- a C compiler (e.g. gcc or clang)
+- a Python 2.7 interpreter
+- zlib developer files
+- a compiled version of [samtools (>=1.1)]((http://sourceforge.net/projects/samtools/files/samtools/1.1/samtools-1.1.tar.bz2/download))
+- a compiled version of htslib (>= 1.1; use the one that comes bundled with samtools!)
+
+### Compilation
+
+- Clone the repo (or download the current master as zip package and unpack)
+- Run `./bootstrap` to set up the required automake files
+  - If you get an error like `required file './ltmain.sh'
+    not found`, run `libtoolize` (or `glibtoolize`) first and then
+    `bootstrap` again
+  - Subsequent pulls won't require rerunning `./bootstrap`. This is
+    only necesary when changing `configure.ac` or any of the `Makefile.am`
+- Run `./configure` with the **absolute** path to samtools and htslib
+  (e.g. `./configure SAMTOOLS=/path-to-samtools HTSLIB=/path-to-htslib
+  [--prefix=inst-path]`)
+- Run `make`
+  - At this point you can already start using lofreq: `./bin/lofreq`
+- Run `make install` to properly install the package
+  - Default is `/usr/local/`. If `--prefix` was given to `configure`,
+    the corresponding argument is used
+  - Depending on the used prefix you might need to adjust your PATH (and PYTHONPATH).
+
+
+## Documentation
+
+- Simply calling `lofreq` on the command line will display a list of
+subcommands
+- `lofreq cmd` will then display help for `cmd`
+- See [LoFreq's website](http://csb5.github.io/lofreq/) for full documentation
+
+
+## License
+
+LoFreq is licensed under the MIT License (see LICENSE).
+
+Licenses for third party software that is part of the source:
+- cdflib90 (see src/cdflib90.README)
+- uthash (see src/uthash/LICENSE)
diff --git a/binary_installer.sh b/binary_installer.sh
new file mode 100755
index 0000000..f50d649
--- /dev/null
+++ b/binary_installer.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+set -o pipefail
+
+PREFIX=/usr/local
+
+usage() {
+    # keep in sync with arg parsing below
+cat <<EOF
+$(basename $0): binary installer for LoFreq
+
+  Options:
+    -p | --prefix  : Install to this directory (default: $PREFIX)
+    -h | --help    : Display this help
+EOF
+}
+
+while [ "$1" != "" ]; do
+    case $1 in
+        -p | --prefix )
+            shift
+            prefix=$1
+            ;;
+        -h | --help )
+            usage
+            exit
+            ;;
+        * ) 
+            echo "FATAL: unknown argument \"$1\""
+            usage
+            exit 1
+    esac
+    shift
+done
+
+test -z "$prefix" && prefix=$PREFIX
+echo "Using $prefix as installation prefix"
+
+echo "Installing binaries"
+test -d "$prefix/bin" || mkdir -p $prefix/bin || exit 1
+BINARIES="./src/lofreq/lofreq src/scripts/lofreq2_call_pparallel.py src/scripts/lofreq2_somatic.py"
+for f in $BINARIES; do
+    if [ ! -s $f ]; then
+        echo "FATAL: can't find $f" 1>&2
+        exit 1
+    fi
+    cp $f $prefix/bin/ || exit 1
+done
+
+echo "Installing Python tools"
+pushd ./src/tools >/dev/null
+pythonpath=$(python setup.py install --prefix $prefix | \
+    grep 'egg-info$' | head -n1 | cut -f 2 -d ' ' | sed -e 's,LoFreq.*,,') || exit 1
+
+echo "NOTE: Make sure $pythonpath is in your PYTHONPATH"
+popd >/dev/null
+
+echo "Successful exit"
diff --git a/bootstrap b/bootstrap
new file mode 100755
index 0000000..b580ebf
--- /dev/null
+++ b/bootstrap
@@ -0,0 +1,23 @@
+#!/bin/sh
+
+# A bootstrapping script replacing autogen.sh and autoreconf. Brings
+# source tree into a state where end user can run configure and make.
+#
+# From https://www.sourceware.org/autobook/autobook/autobook_43.html:
+# "Autoconf comes with a program called autoreconf which essentially
+# does the work of the bootstrap script. autoreconf is rarely used
+# because, historically, has not been very well known, and only in
+# Autoconf 2.13 did it acquire the ability to work with Automake.
+# Unfortunately, even the Autoconf 2.13 autoreconf does not handle
+# libtoolize and some automake-related options that are frequently
+# nice to use.
+#
+# We recommend the bootstrap method, until autoreconf is fixed. At
+# this point bootstrap has not been standardized, so here is a version
+# of the script we used while writing this book"
+#
+
+aclocal && \
+	automake --gnu --add-missing && \
+	autoconf
+
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..8cccf35
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,226 @@
+# configure.ac for LoFreq
+
+AC_PREREQ(2.63)
+# 2.60 needed for AC_USE_SYSTEM_EXTENSIONS
+# 2.64 which allows to define a URL as well
+# 2.68 seems to have updated ax_pthread
+
+AC_INIT([LoFreq_Star], [2.1.2], [wilma at gis.a-star.edu.sg])
+
+# The AC_INIT macro can take any source file as an argument. It just
+# checks that the file is there, which should, in turn, mean that the
+# source directory is there. AC_INIT generates the following m4
+# macros, output variables and preprocessor symbols:
+AC_SUBST(PACKAGE_NAME)
+AC_SUBST(PACKAGE_TARNAME)
+AC_SUBST(PACKAGE_VERSION)
+AC_SUBST(PACKAGE_STRING)
+AC_SUBST(PACKAGE_BUGREPORT)
+
+#AC_REVISION([m4_esyscmd_s([git describe --always --dirty])])
+AC_REVISION([m4_esyscmd_s([git describe --always])])
+# saves revision to configure
+
+git_version=`git describe --always --dirty`
+if test x"$git_version" = x; then
+   AC_DEFINE(GIT_VERSION, ["unknown"])
+else
+   AC_DEFINE_UNQUOTED(GIT_VERSION, "$git_version")
+fi
+
+AC_MSG_NOTICE([Configuring $PACKAGE_NAME (version $PACKAGE_VERSION) for your system...
+
+            |             ____|                 
+            |       _ \   |     __|  _ \   _` | 
+            |      (   |  __|  |     __/  (   | 
+           _____| \___/  _|   _|   \___| \__, | 
+                                             _| 
+])
+#
+# Created with http://patorjk.com/software/taag/#p=display&f=Shadow&t=LoFreq
+
+
+# The AM_INIT_AUTOMAKE line adds several standard checks. This macro
+# is always needed for automake. Obsolete: It takes the program name
+# and version number as arguments.
+AM_INIT_AUTOMAKE([foreign])
+
+
+# This macro was introduced in Autoconf 2.60. If possible, enable
+# extensions to C or Posix on hosts that normally disable the
+# extensions.
+AC_USE_SYSTEM_EXTENSIONS
+
+# for cross compilation
+#
+AC_CANONICAL_HOST
+## AC_CANONICAL_BUILD
+## AC_CANONICAL_TARGET
+
+# Set the @SET_MAKE@ variable=make if $(MAKE) not set
+AC_PROG_MAKE_SET
+
+# AC_PROG_RANLIB will allow you to build code in sub-directories into
+# temporary libraries, which make will then link in with the rest of
+# the code.
+# AC_PROG_RANLIB rendered obsolte by LT_INIT
+#
+# disable-static: Need to disable static libraries, otherwise we can't
+# link them into the dynamically loaded Python extension
+#
+LT_INIT([disable-static])
+
+# AC_PROG_INSTALL will generate an install target so that users may
+# just type 'make install' to install the software.
+AC_PROG_INSTALL
+
+# check for C++ preprocessor and compiler and the library compiler
+# (might change the compiler flags)
+#
+# Determine a C compiler to use. If using the GNU C compiler, set
+# shell variable GCC to `yes'. If output variable CFLAGS was not
+# already set, set it to `-g -O2' for the GNU C compiler (`-O2' on
+# systems where GCC does not accept `-g'), or `-g' for other
+# compilers. 
+# Note, optimization level can be overriden by user with env var
+# CFLAGS
+AC_PROG_CC
+# Set output variable CPP to a command that runs the C preprocessor. 
+AC_PROG_CPP
+
+# If the C compiler cannot compile ISO Standard C (currently C99),
+# try to add an option to output variable `CC' to make it work.  If
+# the compiler does not support C99, fall back to supporting ANSI
+# C89 (ISO C90).
+# LoFreq: In theory we need for variadic macro support 
+# AC_PROG_CC_STDC
+#
+#If the C compiler is not in C99 mode by default, try to add an
+#option to output variable `CC' to make it so.  This macro tries
+#various options that select C99 on some system or another,
+#preferring extended functionality modes over strict conformance
+#modes.  It considers the compiler to be in C99 mode if it handles
+#`_Bool', `//' comments, flexible array members, `inline', signed
+#and unsigned `long long int', mixed code and declarations, named
+#initialization of structs, `restrict', `va_copy', varargs macros,
+#variable declarations in `for' loops, and variable length arrays.
+#After calling this macro you can check whether the C compiler has
+#been set to accept C99; if not, the shell variable
+#`ac_cv_prog_cc_c99' is set to `no'.
+AC_PROG_CC_C99
+
+
+
+# safety check: AC_CONFIG_SRCDIR([src/clustal.h])
+
+
+# Checks for standard header files.
+# obsolescent according to http://www.gnu.org/software/hello/manual/autoconf/Particular-Headers.html
+AC_HEADER_STDC
+# Guess the next one is obsolete as well then?
+# FIXME AC_CHECK_HEADERS(assert.h ctype.h float.h limits.h math.h memory.h stdarg.h stdio.h stdlib.h string.h time.h unistd.h)
+#  awk  '/^#include *</ {print $2}' $(find src/ -name \*.[ch] -or -name \*.cpp) | grep -v argtable2.h  | sort -u | tr -d '<>'
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_TYPE_SIZE_T
+AC_C_INLINE
+
+# Checks for library functions.
+# AC_CHECK_FUNCS(strdup strstr strchr erand48)
+# AW is there an automatic way to extract library functions used in the program?
+
+# pthread support
+#
+AC_MSG_CHECKING([if we have pthreads support])
+m4_include([m4/ax_pthread.m4])
+AC_CONFIG_MACRO_DIR([m4])
+AX_PTHREAD([
+        AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.])
+        CLIBS="$PTHREAD_LIBS $LIBS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        LDFLAGS="$LDFLAGS $PTHREAD_CFLAGS"
+        CC="$PTHREAD_CC"],
+        AC_MSG_ERROR([No pthread support on this machine]))
+#AX_PTHREAD()
+
+
+# explicit libm check
+AC_CHECK_LIB(m, log,, AC_MSG_ERROR([Could not find libm]))
+AC_CHECK_LIB(z, gzread,, AC_MSG_ERROR([Could not find libz]))
+
+
+# http://www.gnu.org/software/automake/manual/html_node/Python.html
+AM_PATH_PYTHON([2.7])
+# argparse and a lot of other stuff is not available in 2.7
+
+# on by default
+AC_ARG_ENABLE([tools],
+    AS_HELP_STRING([--disable-tools], [Disable installation of tools]))
+AM_CONDITIONAL(ENABLE_TOOLS, [test "x$enable_tools" != "xno"])
+
+# ---   debugging
+#
+AC_MSG_CHECKING([whether to build with debug information])
+AC_ARG_ENABLE([debug],
+              [AS_HELP_STRING([--enable-debug],
+                              [Enable debugging (default=no)])],
+              [debugit="$enableval"],
+              [debugit=no])
+AC_MSG_RESULT([$debugit])
+if test x"$debugit" = x"yes"; then
+    # User variables CFLAGS/CXXFLAGS are used last during compilation and
+    # can therefore overwrite system settings.
+    #
+    # if debugging was requested, add debugging info at the end of
+    # C[XX]FLAGS, which makes sure previous optimisation flags are
+    # overwritten (normally C[XX]FLAGS should not be modified as they
+    # are user variables):
+    # switch off assert etc
+    # AC_DEFINE([DEBUG],[],[Debug Mode])
+    CFLAGS="${CFLAGS} -O0 -pedantic"
+    CXXFLAGS="${CXXFLAGS} -O0 -pedantic"
+else
+    # switch off assert etc
+    AC_DEFINE([NDEBUG],[],[No-debug Mode])
+    # Could set -O3 but that should be a user choice (env var CFLAGS)
+fi
+
+# currently samtools/libbam and htslib are not properly installed by default
+# so we use an envvar to point us to the directory
+#AC_LIB_LINKFLAGS([hts])
+#AC_LIB_LINKFLAGS([bam])
+AC_ARG_VAR(SAMTOOLS, [*Absolute* path to precompiled samtools source directory])
+if test x"$SAMTOOLS" = x""; then
+   AC_MSG_ERROR([Samtools directory not defined. Please use SAMTOOLS=/fullpath/to/samtoolsdir/])
+fi
+AC_ARG_VAR(HTSLIB, [*Absolute* path to precompiled htslib source directory])
+if test x"$HTSLIB" = x""; then
+   AC_MSG_ERROR([Htslib directory not defined. Please use HTSLIB=/fullpath/to/htslibdir/])
+fi
+AC_CHECK_FILE(${SAMTOOLS}/bam.h, [], [AC_MSG_ERROR([bam.h not found])])
+AC_CHECK_FILE(${HTSLIB}/htslib/hts.h, [], [AC_MSG_ERROR([hts.h not found])])
+
+AC_SUBST([AM_CFLAGS])
+AC_SUBST([AM_LDFLAGS])
+
+AC_DEFINE([_USE_KNETFILE], [1], [KNETFILE for samtools])
+# AC_DEFINE([SOURCEQUAL_IGNORES_INDELS], [1], [ignore indels in SQ computation as long as we can't predict them])
+
+
+#AC_ARG_ENABLE([alnerrprof],
+#        [AS_HELP_STRING([--enable-alnerrprof], [Enable use of alignment/mapping error profile (default=no)])],
+#        AC_DEFINE([USE_ALNERRPROF], [1], [support mapping error profile in LoFreq]))
+
+
+AC_CONFIG_FILES(Makefile
+        src/cdflib90/Makefile   
+        src/uthash/Makefile
+        src/lofreq/Makefile
+        src/scripts/Makefile        
+        src/tools/setup_conf.py        
+        src/tools/Makefile
+        )
+AC_OUTPUT
+
+AC_MSG_NOTICE([Done. Next, run 'make' to compile the source, followed by 'make install' to install it.])
diff --git a/devel-doc/FIXME b/devel-doc/FIXME
new file mode 100644
index 0000000..cd8ad28
--- /dev/null
+++ b/devel-doc/FIXME
@@ -0,0 +1,24 @@
+Todos:
+
+High Prio:
+==========
+
+- fix/check af computation for indels, esp. in presence of head/tails
+- support of indels during ignore vcf loading and source qual computation
+
+
+Med Prio:
+=========
+
+- what to do at high coverages where X>cap (e.g. Lewis' HepB)? replace already read values randomly?
+- add normal and tumor plp info to somatic calls. normal could be added in uniq
+- use indexing for source quality ignore vcfs if region was given
+
+Low Prio:
+=========
+
+- keep var info field with hash
+- profile code e.g.
+  $LOFREQ call -f mers_coronavirus_NC_019843.2.fa --verbose --debug NC_019843.2:2399-2399 dwgsim-ion-samba-mers-100kX-pool.bwamem.bam
+- parallelize viterbi avoiding overlapping reads. endpos of last read in last bin could be start pos for next bin
+- multiprocessing version of alnqual (best per chrom as we get overlapping reads if run per bin)
\ No newline at end of file
diff --git a/devel-doc/autotools.README b/devel-doc/autotools.README
new file mode 100644
index 0000000..3b81ab6
--- /dev/null
+++ b/devel-doc/autotools.README
@@ -0,0 +1,46 @@
+autotools
+=========
+
+autotools allow to automagically generate the configure and Makefile
+files that allow you to semi-automatically configure, compile and
+install the source, like this (GNU triple jump):
+$ ./configure
+$ make
+$ make install
+
+The only files you will ever have to edit as a developer are the
+configure.ac and Makefile.am files. autoconf looks for a file called
+configure.ac (or configure.in). It then creates the configure script,
+based on the macros which it finds. After editing configure.ac you
+should run autoreconf (which usually also happens automatically).
+Changes to Makefile, Makefile.in and configure will be overwritten!
+
+
+Setting things up
+================
+
+After first checkout and each time a file was added/removed you have
+to do
+the following to setup the automake environment:
+$ autoreconf (best with -Wall)
+and possibly
+$ autoreconf -i
+
+If autoreconf complains: "required file `./ltmain.sh' not found" the
+run
+$ [g]libtoolize
+
+If autoreconf complains: "required file `./config.guess|./config.sub|'
+not
+found" then run
+$ automake --add-missing
+and run autoreconf again.
+
+NOTE: all the above has been replaced with the bootstrap script
+
+Adding source files
+===================
+
+Normally, the only thing you will have to do is to just add your
+c-files to the corresponding Makefile.am _SOURCES variable.
+
diff --git a/devel-doc/best-practices.txt b/devel-doc/best-practices.txt
new file mode 100644
index 0000000..71586d1
--- /dev/null
+++ b/devel-doc/best-practices.txt
@@ -0,0 +1,31 @@
+- Align your reads with a good aligner. We recommend BWA-MEM. It's best to
+  add a read-group (required if GATK is used later) during the alignment
+- Recommended: For PE reads fix mate-pair information and clean BAM file
+  (Picard's FixMateInformation and CleanSam)
+(last two steps can be run with bwamem_wrapper.sh)
+
+- Recommended for Illumina data: realignment with 'lofreq viterbi'.
+
+- Mark PCR Duplicates with Picard's MarkDuplicates (Skip for high coverage amplicon data)
+- Left alignment of indels with GATK's IndelRealigner. Untested alternative: Freebayes' bamleftalign)
+- Base-quality recalibration with GATK's BaseRecalibrator (also adds indel qualities
+  in versions >= 2). FIXME note about known.vcf
+(last three steps can be run with ngs_pipeline.py)
+
+- If LoFreq is used to predict indels and if BQSR was not run (i.e. indel qualities  are missing from the BAM
+- file). Use
+  'lofreq indelqual'
+
+- Somatic:
+  -l for targeted sequencign and exomes etc (see elsewhere)
+  -d dbsnp. dbsnp matching your reference genome version. we recommend to remove
+   variants marked as somatic (zgrep SAO=[23]) and variants found in cosmic from it.
+
+- FIXME: lofreq parameters:
+  source qual
+  baq
+  indel
+  somatic
+  parallel
+ 
+
diff --git a/devel-doc/debug.README b/devel-doc/debug.README
new file mode 100644
index 0000000..d6972e6
--- /dev/null
+++ b/devel-doc/debug.README
@@ -0,0 +1,42 @@
+General
+-------
+
+Try
+$ make CFLAGS='-c99 -Wall -pedantic'
+every now and then. Ignore the following warnings:
+"ISO C does not permit named variadic macros"
+
+Enable debuggin every now and then and run
+$ ./configure --enable-debug; make clean; make
+
+
+Memory checks
+-------------
+
+make clean; make CFLAGS='-O0 -g' LDFLAGS='-O0 -g'
+cd src/lofreq
+# and then for example
+valgrind --tool=memcheck --leak-check=full \
+  ./lofreq call -r consensus:100-200 -f ~/scratch/ref.fa  ../../../lofreq-test-data/denv2-multiplex-replicates/ACAGTG_1.bam  --verbose >/dev/null
+
+
+Profiling
+---------
+
+make clean; make CFLAGS='-pg' LDFLAGS='-pg'
+and then for example
+./src/lofreq/lofreq call \
+  -f tests/data/denv2-pseudoclonal/denv2-pseudoclonal_cons.fa \
+	  -l tests/data/denv2-pseudoclonal/denv2-pseudoclonal_incl.bed \
+   tests/data/denv2-pseudoclonal/denv2-pseudoclonal.bam
+gprof ./src/lofreq/lofreq gmon.out > gmon.txt
+
+
+static code checker
+-------------------
+
+To use clang's static code checker:
+$ scan-build ./configure
+$ scan-build make
+
+note, there are a lot of warnings produced for libbam
diff --git a/devel-doc/dist.README b/devel-doc/dist.README
new file mode 100644
index 0000000..472dd8c
--- /dev/null
+++ b/devel-doc/dist.README
@@ -0,0 +1,23 @@
+To create a new distribution:
+
+- Make sure tests work test/run_all.sh
+- Update version in configure.ac
+- Update top-level README
+- run 'make dist' to compile a tarball
+- Either
+  - Upload source and update the websites with info on new usage/bug-fixes/new function
+- or
+  - Unpack tarball
+  - ./configure --enable-static
+  - make
+  - compile against static libz if necessary, check with ldd ./src/lofreq/lofreq (or otool -L)
+  - find src -name \*.[choa] -or -name Makefile\*  | xargs rm;
+  -  rm -rf src//tools/build/
+  -  find src -name .deps | xargs rm -rf;
+  -  find src -name .libs | xargs rm -rf;
+  - cd .. and pack
+- Commit your changes
+- Tag this version
+  e.g git tag -a v0.3.1 -m 'my version 0.3.1')
+  push and push origin --tags
+
diff --git a/devel-doc/docker.README b/devel-doc/docker.README
new file mode 100644
index 0000000..760eb1e
--- /dev/null
+++ b/devel-doc/docker.README
@@ -0,0 +1,40 @@
+boot2docker init
+boot2docker start
+$(boot2docker shellinit)
+
+# ---
+
+docker run -t -i ubuntu:14.04 /bin/bash
+
+apt-get update
+apt-get upgrade
+
+apt-get install make
+apt-get install libc6-dev libc-dev
+apt-get install zlib1g-dev
+apt-get install python2.7
+apt-get install libncurses-dev
+apt-get install wget
+
+ln -s /usr/bin/python2.7 /usr/bin/python
+
+cd /usr/local/src
+wget -nd 'http://downloads.sourceforge.net/project/lofreq/lofreq_star-2.1.1.tar.gz'
+#
+wget 'http://downloads.sourceforge.net/project/samtools/samtools/1.1/samtools-1.1.tar.bz2'
+tar -xjf samtools-1.1.tar.bz2
+cd samtools-1.1
+make
+make install
+
+cd ..
+tar xvzf lofreq_star-2.1.1.tar.gz
+cd lofreq_star-2.1.1
+./configure SAMTOOLS=/usr/local/src/samtools-1.1/ HTSLIB=/usr/local/src/samtools-1.1/htslib-1.1/
+make
+make install
+
+# ---
+
+docker commit -m="Added samtools/htslib 1.1 and lofreq 2.1.1" -a="Andreas Wilm" 78c85ef2e74a andreaswilm/lofreq:v2.1.1
+docker push andreaswilm/lofreq
diff --git a/devel-doc/git.README b/devel-doc/git.README
new file mode 100644
index 0000000..7666083
--- /dev/null
+++ b/devel-doc/git.README
@@ -0,0 +1,7 @@
+Try to implement a rebase workflow AKA github workflow
+See
+http://mettadore.com/analysis/a-simple-git-rebase-workflow-explained/
+http://mettadore.com/analysis/the-ever-deployable-github-workflow/
+http://randyfay.com/content/rebase-workflow-git
+http://git-scm.com/book/en/Git-Branching-Rebasing
+http://isis.apache.org/contributors/git-cookbook.html
diff --git a/m4/ax_pthread.m4 b/m4/ax_pthread.m4
new file mode 100644
index 0000000..6d400ed
--- /dev/null
+++ b/m4/ax_pthread.m4
@@ -0,0 +1,317 @@
+# ===========================================================================
+#        http://www.gnu.org/software/autoconf-archive/ax_pthread.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
+#
+# DESCRIPTION
+#
+#   This macro figures out how to build C programs using POSIX threads. It
+#   sets the PTHREAD_LIBS output variable to the threads library and linker
+#   flags, and the PTHREAD_CFLAGS output variable to any special C compiler
+#   flags that are needed. (The user can also force certain compiler
+#   flags/libs to be tested by setting these environment variables.)
+#
+#   Also sets PTHREAD_CC to any special C compiler that is needed for
+#   multi-threaded programs (defaults to the value of CC otherwise). (This
+#   is necessary on AIX to use the special cc_r compiler alias.)
+#
+#   NOTE: You are assumed to not only compile your program with these flags,
+#   but also link it with them as well. e.g. you should link with
+#   $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
+#
+#   If you are only building threads programs, you may wish to use these
+#   variables in your default LIBS, CFLAGS, and CC:
+#
+#     LIBS="$PTHREAD_LIBS $LIBS"
+#     CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+#     CC="$PTHREAD_CC"
+#
+#   In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
+#   has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name
+#   (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
+#
+#   Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
+#   PTHREAD_PRIO_INHERIT symbol is defined when compiling with
+#   PTHREAD_CFLAGS.
+#
+#   ACTION-IF-FOUND is a list of shell commands to run if a threads library
+#   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
+#   is not found. If ACTION-IF-FOUND is not specified, the default action
+#   will define HAVE_PTHREAD.
+#
+#   Please let the authors know if this macro fails on any platform, or if
+#   you have any other suggestions or comments. This macro was based on work
+#   by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
+#   from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
+#   Alejandro Forero Cuervo to the autoconf macro repository. We are also
+#   grateful for the helpful feedback of numerous users.
+#
+#   Updated for Autoconf 2.68 by Daniel Richard G.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Steven G. Johnson <stevenj at alum.mit.edu>
+#   Copyright (c) 2011 Daniel Richard G. <skunk at iSKUNK.ORG>
+#
+#   This program is free software: you can redistribute it and/or modify it
+#   under the terms of the GNU General Public License as published by the
+#   Free Software Foundation, either version 3 of the License, or (at your
+#   option) any later version.
+#
+#   This program is distributed in the hope that it will be useful, but
+#   WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+#   Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+#   As a special exception, the respective Autoconf Macro's copyright owner
+#   gives unlimited permission to copy, distribute and modify the configure
+#   scripts that are the output of Autoconf when processing the Macro. You
+#   need not follow the terms of the GNU General Public License when using
+#   or distributing such scripts, even though portions of the text of the
+#   Macro appear in them. The GNU General Public License (GPL) does govern
+#   all other use of the material that constitutes the Autoconf Macro.
+#
+#   This special exception to the GPL applies to versions of the Autoconf
+#   Macro released by the Autoconf Archive. When you make and distribute a
+#   modified version of the Autoconf Macro, you may extend this special
+#   exception to the GPL to apply to your modified version as well.
+
+#serial 20
+
+AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
+AC_DEFUN([AX_PTHREAD], [
+AC_REQUIRE([AC_CANONICAL_HOST])
+AC_LANG_PUSH([C])
+ax_pthread_ok=no
+
+# We used to check for pthread.h first, but this fails if pthread.h
+# requires special compiler flags (e.g. on True64 or Sequent).
+# It gets checked for in the link test anyway.
+
+# First of all, check if the user has set any of the PTHREAD_LIBS,
+# etcetera environment variables, and if threads linking works using
+# them:
+if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+        save_LIBS="$LIBS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS])
+        AC_TRY_LINK_FUNC(pthread_join, ax_pthread_ok=yes)
+        AC_MSG_RESULT($ax_pthread_ok)
+        if test x"$ax_pthread_ok" = xno; then
+                PTHREAD_LIBS=""
+                PTHREAD_CFLAGS=""
+        fi
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+fi
+
+# We must check for the threads library under a number of different
+# names; the ordering is very important because some systems
+# (e.g. DEC) have both -lpthread and -lpthreads, where one of the
+# libraries is broken (non-POSIX).
+
+# Create a list of thread flags to try.  Items starting with a "-" are
+# C compiler flags, and other items are library names, except for "none"
+# which indicates that we try without any flags at all, and "pthread-config"
+# which is a program returning the flags for the Pth emulation library.
+
+ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
+
+# The ordering *is* (sometimes) important.  Some notes on the
+# individual items follow:
+
+# pthreads: AIX (must check this before -lpthread)
+# none: in case threads are in libc; should be tried before -Kthread and
+#       other compiler flags to prevent continual compiler warnings
+# -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
+# -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
+# lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
+# -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads)
+# -pthreads: Solaris/gcc
+# -mthreads: Mingw32/gcc, Lynx/gcc
+# -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
+#      doesn't hurt to check since this sometimes defines pthreads too;
+#      also defines -D_REENTRANT)
+#      ... -mt is also the pthreads flag for HP/aCC
+# pthread: Linux, etcetera
+# --thread-safe: KAI C++
+# pthread-config: use pthread-config program (for GNU Pth library)
+
+case ${host_os} in
+        solaris*)
+
+        # On Solaris (at least, for some versions), libc contains stubbed
+        # (non-functional) versions of the pthreads routines, so link-based
+        # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
+        # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
+        # a function called by this macro, so we could check for that, but
+        # who knows whether they'll stub that too in a future libc.)  So,
+        # we'll just look for -pthreads and -lpthread first:
+
+        ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags"
+        ;;
+
+        darwin*)
+        ax_pthread_flags="-pthread $ax_pthread_flags"
+        ;;
+esac
+
+if test x"$ax_pthread_ok" = xno; then
+for flag in $ax_pthread_flags; do
+
+        case $flag in
+                none)
+                AC_MSG_CHECKING([whether pthreads work without any flags])
+                ;;
+
+                -*)
+                AC_MSG_CHECKING([whether pthreads work with $flag])
+                PTHREAD_CFLAGS="$flag"
+                ;;
+
+                pthread-config)
+                AC_CHECK_PROG(ax_pthread_config, pthread-config, yes, no)
+                if test x"$ax_pthread_config" = xno; then continue; fi
+                PTHREAD_CFLAGS="`pthread-config --cflags`"
+                PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
+                ;;
+
+                *)
+                AC_MSG_CHECKING([for the pthreads library -l$flag])
+                PTHREAD_LIBS="-l$flag"
+                ;;
+        esac
+
+        save_LIBS="$LIBS"
+        save_CFLAGS="$CFLAGS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+
+        # Check for various functions.  We must include pthread.h,
+        # since some functions may be macros.  (On the Sequent, we
+        # need a special flag -Kthread to make this header compile.)
+        # We check for pthread_join because it is in -lpthread on IRIX
+        # while pthread_create is in libc.  We check for pthread_attr_init
+        # due to DEC craziness with -lpthreads.  We check for
+        # pthread_cleanup_push because it is one of the few pthread
+        # functions on Solaris that doesn't have a non-functional libc stub.
+        # We try pthread_create on general principles.
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
+                        static void routine(void *a) { a = 0; }
+                        static void *start_routine(void *a) { return a; }],
+                       [pthread_t th; pthread_attr_t attr;
+                        pthread_create(&th, 0, start_routine, 0);
+                        pthread_join(th, 0);
+                        pthread_attr_init(&attr);
+                        pthread_cleanup_push(routine, 0);
+                        pthread_cleanup_pop(0) /* ; */])],
+                [ax_pthread_ok=yes],
+                [])
+
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+
+        AC_MSG_RESULT($ax_pthread_ok)
+        if test "x$ax_pthread_ok" = xyes; then
+                break;
+        fi
+
+        PTHREAD_LIBS=""
+        PTHREAD_CFLAGS=""
+done
+fi
+
+# Various other checks:
+if test "x$ax_pthread_ok" = xyes; then
+        save_LIBS="$LIBS"
+        LIBS="$PTHREAD_LIBS $LIBS"
+        save_CFLAGS="$CFLAGS"
+        CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
+
+        # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
+        AC_MSG_CHECKING([for joinable pthread attribute])
+        attr_name=unknown
+        for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
+            AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
+                           [int attr = $attr; return attr /* ; */])],
+                [attr_name=$attr; break],
+                [])
+        done
+        AC_MSG_RESULT($attr_name)
+        if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
+            AC_DEFINE_UNQUOTED(PTHREAD_CREATE_JOINABLE, $attr_name,
+                               [Define to necessary symbol if this constant
+                                uses a non-standard name on your system.])
+        fi
+
+        AC_MSG_CHECKING([if more special flags are required for pthreads])
+        flag=no
+        case ${host_os} in
+            aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";;
+            osf* | hpux*) flag="-D_REENTRANT";;
+            solaris*)
+            if test "$GCC" = "yes"; then
+                flag="-D_REENTRANT"
+            else
+                flag="-mt -D_REENTRANT"
+            fi
+            ;;
+        esac
+        AC_MSG_RESULT(${flag})
+        if test "x$flag" != xno; then
+            PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
+        fi
+
+        AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
+            ax_cv_PTHREAD_PRIO_INHERIT, [
+                AC_LINK_IFELSE([
+                    AC_LANG_PROGRAM([[#include <pthread.h>]], [[int i = PTHREAD_PRIO_INHERIT;]])],
+                    [ax_cv_PTHREAD_PRIO_INHERIT=yes],
+                    [ax_cv_PTHREAD_PRIO_INHERIT=no])
+            ])
+        AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"],
+            AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], 1, [Have PTHREAD_PRIO_INHERIT.]))
+
+        LIBS="$save_LIBS"
+        CFLAGS="$save_CFLAGS"
+
+        # More AIX lossage: compile with *_r variant
+        if test "x$GCC" != xyes; then
+            case $host_os in
+                aix*)
+                AS_CASE(["x/$CC"],
+                  [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
+                  [#handle absolute path differently from PATH based program lookup
+                   AS_CASE(["x$CC"],
+                     [x/*],
+                     [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])],
+                     [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])])
+                ;;
+            esac
+        fi
+fi
+
+test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
+
+AC_SUBST(PTHREAD_LIBS)
+AC_SUBST(PTHREAD_CFLAGS)
+AC_SUBST(PTHREAD_CC)
+
+# Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
+if test x"$ax_pthread_ok" = xyes; then
+        ifelse([$1],,AC_DEFINE(HAVE_PTHREAD,1,[Define if you have POSIX threads libraries and header files.]),[$1])
+        :
+else
+        ax_pthread_ok=no
+        $2
+fi
+AC_LANG_POP
+])dnl AX_PTHREAD
diff --git a/make_release.sh b/make_release.sh
new file mode 100644
index 0000000..fd3012c
--- /dev/null
+++ b/make_release.sh
@@ -0,0 +1,35 @@
+autoreconf
+make clean >/dev/null
+
+
+if [ $(uname) == 'Darwin' ]; then
+    # assuming MacOsX with MacPorts:
+    ./configure CFLAGS='-I/opt/local/include' LDFLAGS='-L/opt/local/lib' >/dev/null
+    make -j 2 >/dev/null
+    pushd src/lofreq
+    rm lofreq
+    # repeat last line from make but replace -lz with MacPorts static one
+    cc -std=gnu99 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall -I../libbam/ -I../lofreq_core/ -I/opt/local/include -D_THREAD_SAFE -pthread -D_THREAD_SAFE -pthread -o lofreq lofreq_main.o lofreq_uniq.o lofreq_snpcaller.o /opt/local/lib/libz.a  -L/opt/local/lib ../lofreq_core/liblofreq_core.a ../libbam/libbam.a ../cdflib90/libcdf.a  -lm -pthread || exit 1
+    # check with otool -L lofreq
+    popd
+elif [ $(uname) == 'Linux' ]; then
+    # assuming GIS setup with locally statically compiled libz
+    ./configure CFLAGS='-I/home/wilma/local/include' LDFLAGS='-L/home/wilma/local/lib' 
+    make -j 2
+else
+    echo "Unknown system: $(uname)" 1>&2
+    exit 1
+fi
+
+# all systems:
+version=$(grep '^AC_INIT' configure.ac | cut -f 2 -d , | tr -d '[\[\]] ')
+reldir=lofreq_star-$version
+test -d $reldir && exit 1
+mkdir $reldir
+# use tar to preserve directory structure as if untouched src tree
+# but removing unwanted stuff
+tar c src/zlib.LICENSE src/libbam/AUTHORS src/libbam/COPYING src/cdflib90.README src/lofreq/lofreq $(find src/lofreq_python/ -name \*py -or -name \*py.README | grep -v '/build/' | grep -v setup) | tar x --strip-components 1 -C $reldir || exit 1
+cp README LICENSE $reldir
+tar cvzf $(basename $reldir).tgz $reldir && rm -rf $reldir/* && rmdir $reldir
+
+echo "release in $(basename $reldir).tgz. rename to architecture. unpack and test" 1>&2
diff --git a/src/cdflib90.README b/src/cdflib90.README
new file mode 100644
index 0000000..7d1b154
--- /dev/null
+++ b/src/cdflib90.README
@@ -0,0 +1,46 @@
+DCDFLIB Library of Routines for Cumulative Distribution Functions Inverses, and Other Parameters
+
+Downloaded from https://biostatistics.mdanderson.org/SoftwareDownload/SingleSoftware.aspx?Software_Id=21
+Quote from there:
+
+"""
+DCDFLIB (Double precision Cumulative Distribution Function LIBrary) is
+a collection of routines that calculate cumulative distribution
+functions, inverses, and parameters for common statistical
+distributions. DCDFLIB uses published algorithms where available, and
+literature citations are included in the documentation. Values
+associated with a statistical distribution include X, the upper limit
+of integration of the density, P, the cumulative distribution function
+evaluated at X, and auxiliary parameters such as degrees of freedom.
+Given all but one such value, a routine in DCDFLIB will calculate the
+one value. (CAVEAT: For the central and noncentral F, the cdf is
+sometimes not monotone in the degrees of freedom. For these
+parameters, there can be two answers. DCDFLIB finds an arbitrary one
+of the two in this case.) Routines are provided for the following
+distributions.
+
+Beta
+Binomial
+Chi-square
+Noncentral Chi-square
+F
+Noncentral F
+Gamma
+Negative Binomial
+Normal
+Poisson
+Student's t
+Noncentral t
+CDFLIB90 is a translation of DCDFLIB into Fortran 95 with improvements. Compared to the older package, it offers an improved packaging, improved speed (a better monotone function inverter is used), and perhaps a minute amount of increased accuracy.
+
+Contact: Barry W. Brown
+
+Software developed by John Venier and Dan Serachitopol
+"""
+
+
+
+
+
+
+
diff --git a/src/cdflib90/.gitignore b/src/cdflib90/.gitignore
new file mode 100644
index 0000000..8449759
--- /dev/null
+++ b/src/cdflib90/.gitignore
@@ -0,0 +1,15 @@
+# backup files
+*~
+*#
+
+# objects
+*.o
+*.a
+*.lo
+*.la
+lofreq_samtools
+
+Makefile
+Makefile.log
+.deps
+.libs
diff --git a/src/cdflib90/Makefile.am b/src/cdflib90/Makefile.am
new file mode 100644
index 0000000..470b97f
--- /dev/null
+++ b/src/cdflib90/Makefile.am
@@ -0,0 +1,5 @@
+AM_CFLAGS = -O2 -Wall -fPIC @AM_CFLAGS@
+AM_LDFLAGS = -shared -fPIC @AM_LDFLAGS@
+noinst_LIBRARIES = libcdf.a
+libcdf_a_SOURCES = cdflib.h dcdflib.c ipmpar.c
+EXTRA_DIST = readme
diff --git a/src/cdflib90/cdflib.h b/src/cdflib90/cdflib.h
new file mode 100644
index 0000000..d5b51a4
--- /dev/null
+++ b/src/cdflib90/cdflib.h
@@ -0,0 +1,90 @@
+double algdiv(double*,double*);
+double alngam(double*);
+double alnrel(double*);
+double apser(double*,double*,double*,double*);
+double basym(double*,double*,double*,double*);
+double bcorr(double*,double*);
+double betaln(double*,double*);
+double bfrac(double*,double*,double*,double*,double*,double*);
+void bgrat(double*,double*,double*,double*,double*,double*,int*i);
+double bpser(double*,double*,double*,double*);
+void bratio(double*,double*,double*,double*,double*,double*,int*);
+double brcmp1(int*,double*,double*,double*,double*);
+double brcomp(double*,double*,double*,double*);
+double bup(double*,double*,double*,double*,int*,double*);
+void cdfbet(int*,double*,double*,double*,double*,double*,double*,
+            int*,double*);
+void cdfbin(int*,double*,double*,double*,double*,double*,double*,
+            int*,double*);
+void cdfchi(int*,double*,double*,double*,double*,int*,double*);
+void cdfchn(int*,double*,double*,double*,double*,double*,int*,double*);
+void cdff(int*,double*,double*,double*,double*,double*,int*,double*);
+void cdffnc(int*,double*,double*,double*,double*,double*,double*,
+            int*s,double*);
+void cdfgam(int*,double*,double*,double*,double*,double*,int*,double*);
+void cdfnbn(int*,double*,double*,double*,double*,double*,double*,
+            int*,double*);
+void cdfnor(int*,double*,double*,double*,double*,double*,int*,double*);
+void cdfpoi(int*,double*,double*,double*,double*,int*,double*);
+void cdft(int*,double*,double*,double*,double*,int*,double*);
+void cdftnc(int*,double*,double*,double*,double*,double*,int*,double*);
+void cumbet(double*,double*,double*,double*,double*,double*);
+void cumbin(double*,double*,double*,double*,double*,double*);
+void cumchi(double*,double*,double*,double*);
+void cumchn(double*,double*,double*,double*,double*);
+void cumf(double*,double*,double*,double*,double*);
+void cumfnc(double*,double*,double*,double*,double*,double*);
+void cumgam(double*,double*,double*,double*);
+void cumnbn(double*,double*,double*,double*,double*,double*);
+void cumnor(double*,double*,double*);
+void cumpoi(double*,double*,double*,double*);
+void cumt(double*,double*,double*,double*);
+void cumtnc(double*,double*,double*,double*,double*);
+double devlpl(double [],int*,double*);
+double dinvnr(double *p,double *q);
+#ifdef AW_VOID_COMPILER_COMPLAINTS
+static void E0000(int,int*,double*,double*,unsigned long*,
+                  unsigned long*,double*,double*,double*,
+                  double*,double*,double*,double*);
+#endif
+void dinvr(int*,double*,double*,unsigned long*,unsigned long*);
+void dstinv(double*,double*,double*,double*,double*,double*,
+            double*);
+double dt1(double*,double*,double*);
+#ifdef AW_AVOID_COMPILER_COMPLAINTS
+static void E0001(int,int*,double*,double*,double*,double*,
+                  unsigned long*,unsigned long*,double*,double*,
+                  double*,double*);
+#endif                 
+void dzror(int*,double*,double*,double*,double *,
+           unsigned long*,unsigned long*);
+void dstzr(double *zxlo,double *zxhi,double *zabstl,double *zreltl);
+double erf1(double*);
+double erfc1(int*,double*);
+double esum(int*,double*);
+double exparg(int*);
+double fpser(double*,double*,double*,double*);
+double gam1(double*);
+void gaminv(double*,double*,double*,double*,double*,int*);
+double gamln(double*);
+double gamln1(double*);
+double Xgamm(double*);
+void grat1(double*,double*,double*,double*,double*,double*);
+void gratio(double*,double*,double*,double*,int*);
+double gsumln(double*,double*);
+double psi(double*);
+double rcomp(double*,double*);
+double rexp(double*);
+double rlog(double*);
+double rlog1(double*);
+double spmpar(int*);
+double stvaln(double*);
+double fifdint(double);
+double fifdmax1(double,double);
+double fifdmin1(double,double);
+double fifdsign(double,double);
+long fifidint(double);
+long fifmod(long,long);
+void ftnstop(char*);
+extern int ipmpar(int*);
+
diff --git a/src/cdflib90/dcdflib.c b/src/cdflib90/dcdflib.c
new file mode 100644
index 0000000..7f6ea99
--- /dev/null
+++ b/src/cdflib90/dcdflib.c
@@ -0,0 +1,9085 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include "cdflib.h"
+/*
+ * A comment about ints and longs - whether ints or longs are used should
+ * make no difference, but where double r-values are assigned to ints the
+ * r-value is cast converted to a long, which is then assigned to the int
+ * to be compatible with the operation of fifidint.
+ */
+/*
+-----------------------------------------------------------------------
+ 
+     COMPUTATION OF LN(GAMMA(B)/GAMMA(A+B)) WHEN B .GE. 8
+ 
+                         --------
+ 
+     IN THIS ALGORITHM, DEL(X) IS THE FUNCTION DEFINED BY
+     LN(GAMMA(X)) = (X - 0.5)*LN(X) - X + 0.5*LN(2*PI) + DEL(X).
+ 
+-----------------------------------------------------------------------
+*/
+double algdiv(double *a,double *b)
+{
+static double c0 = .833333333333333e-01;
+static double c1 = -.277777777760991e-02;
+static double c2 = .793650666825390e-03;
+static double c3 = -.595202931351870e-03;
+static double c4 = .837308034031215e-03;
+static double c5 = -.165322962780713e-02;
+static double algdiv,c,d,h,s11,s3,s5,s7,s9,t,u,v,w,x,x2,T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*a <= *b) goto S10;
+    h = *b/ *a;
+    c = 1.0e0/(1.0e0+h);
+    x = h/(1.0e0+h);
+    d = *a+(*b-0.5e0);
+    goto S20;
+S10:
+    h = *a/ *b;
+    c = h/(1.0e0+h);
+    x = 1.0e0/(1.0e0+h);
+    d = *b+(*a-0.5e0);
+S20:
+/*
+                SET SN = (1 - X**N)/(1 - X)
+*/
+    x2 = x*x;
+    s3 = 1.0e0+(x+x2);
+    s5 = 1.0e0+(x+x2*s3);
+    s7 = 1.0e0+(x+x2*s5);
+    s9 = 1.0e0+(x+x2*s7);
+    s11 = 1.0e0+(x+x2*s9);
+/*
+                SET W = DEL(B) - DEL(A + B)
+*/
+    t = pow(1.0e0/ *b,2.0);
+    w = ((((c5*s11*t+c4*s9)*t+c3*s7)*t+c2*s5)*t+c1*s3)*t+c0;
+    w *= (c/ *b);
+/*
+                    COMBINE THE RESULTS
+*/
+    T1 = *a/ *b;
+    u = d*alnrel(&T1);
+    v = *a*(log(*b)-1.0e0);
+    if(u <= v) goto S30;
+    algdiv = w-v-u;
+    return algdiv;
+S30:
+    algdiv = w-u-v;
+    return algdiv;
+}
+double alngam(double *x)
+/*
+**********************************************************************
+ 
+     double alngam(double *x)
+                 double precision LN of the GAMma function
+ 
+ 
+                              Function
+ 
+ 
+     Returns the natural logarithm of GAMMA(X).
+ 
+ 
+                              Arguments
+ 
+ 
+     X --> value at which scaled log gamma is to be returned
+                    X is DOUBLE PRECISION
+ 
+ 
+                              Method
+ 
+ 
+     If X .le. 6.0, then use recursion to get X below 3
+     then apply rational approximation number 5236 of
+     Hart et al, Computer Approximations, John Wiley and
+     Sons, NY, 1968.
+ 
+     If X .gt. 6.0, then use recursion to get X to at least 12 and
+     then use formula 5423 of the same source.
+ 
+**********************************************************************
+*/
+{
+#define hln2pi 0.91893853320467274178e0
+static double coef[5] = {
+    0.83333333333333023564e-1,-0.27777777768818808e-2,0.79365006754279e-3,
+    -0.594997310889e-3,0.8065880899e-3
+};
+static double scoefd[4] = {
+    0.62003838007126989331e2,0.9822521104713994894e1,-0.8906016659497461257e1,
+    0.1000000000000000000e1
+};
+static double scoefn[9] = {
+    0.62003838007127258804e2,0.36036772530024836321e2,0.20782472531792126786e2,
+    0.6338067999387272343e1,0.215994312846059073e1,0.3980671310203570498e0,
+    0.1093115956710439502e0,0.92381945590275995e-2,0.29737866448101651e-2
+};
+static int K1 = 9;
+static int K3 = 4;
+static int K5 = 5;
+static double alngam,offset,prod,xx;
+static int i,n;
+static double T2,T4,T6;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*x <= 6.0e0)) goto S70;
+    prod = 1.0e0;
+    xx = *x;
+    if(!(*x > 3.0e0)) goto S30;
+S10:
+    if(!(xx > 3.0e0)) goto S20;
+    xx -= 1.0e0;
+    prod *= xx;
+    goto S10;
+S30:
+S20:
+    if(!(*x < 2.0e0)) goto S60;
+S40:
+    if(!(xx < 2.0e0)) goto S50;
+    prod /= xx;
+    xx += 1.0e0;
+    goto S40;
+S60:
+S50:
+    T2 = xx-2.0e0;
+    T4 = xx-2.0e0;
+    alngam = devlpl(scoefn,&K1,&T2)/devlpl(scoefd,&K3,&T4);
+/*
+     COMPUTE RATIONAL APPROXIMATION TO GAMMA(X)
+*/
+    alngam *= prod;
+    alngam = log(alngam);
+    goto S110;
+S70:
+    offset = hln2pi;
+/*
+     IF NECESSARY MAKE X AT LEAST 12 AND CARRY CORRECTION IN OFFSET
+*/
+    n = fifidint(12.0e0-*x);
+    if(!(n > 0)) goto S90;
+    prod = 1.0e0;
+    for(i=1; i<=n; i++) prod *= (*x+(double)(i-1));
+    offset -= log(prod);
+    xx = *x+(double)n;
+    goto S100;
+S90:
+    xx = *x;
+S100:
+/*
+     COMPUTE POWER SERIES
+*/
+    T6 = 1.0e0/pow(xx,2.0);
+    alngam = devlpl(coef,&K5,&T6)/xx;
+    alngam += (offset+(xx-0.5e0)*log(xx)-xx);
+S110:
+    return alngam;
+#undef hln2pi
+}
+double alnrel(double *a)
+/*
+-----------------------------------------------------------------------
+            EVALUATION OF THE FUNCTION LN(1 + A)
+-----------------------------------------------------------------------
+*/
+{
+static double p1 = -.129418923021993e+01;
+static double p2 = .405303492862024e+00;
+static double p3 = -.178874546012214e-01;
+static double q1 = -.162752256355323e+01;
+static double q2 = .747811014037616e+00;
+static double q3 = -.845104217945565e-01;
+static double alnrel,t,t2,w,x;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(fabs(*a) > 0.375e0) goto S10;
+    t = *a/(*a+2.0e0);
+    t2 = t*t;
+    w = (((p3*t2+p2)*t2+p1)*t2+1.0e0)/(((q3*t2+q2)*t2+q1)*t2+1.0e0);
+    alnrel = 2.0e0*t*w;
+    return alnrel;
+S10:
+    x = 1.e0+*a;
+    alnrel = log(x);
+    return alnrel;
+}
+double apser(double *a,double *b,double *x,double *eps)
+/*
+-----------------------------------------------------------------------
+     APSER YIELDS THE INCOMPLETE BETA RATIO I(SUB(1-X))(B,A) FOR
+     A .LE. MIN(EPS,EPS*B), B*X .LE. 1, AND X .LE. 0.5. USED WHEN
+     A IS VERY SMALL. USE ONLY IF ABOVE INEQUALITIES ARE SATISFIED.
+-----------------------------------------------------------------------
+*/
+{
+static double g = .577215664901533e0;
+static double apser,aj,bx,c,j,s,t,tol;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    bx = *b**x;
+    t = *x-bx;
+    if(*b**eps > 2.e-2) goto S10;
+    c = log(*x)+psi(b)+g+t;
+    goto S20;
+S10:
+    c = log(bx)+g+t;
+S20:
+    tol = 5.0e0**eps*fabs(c);
+    j = 1.0e0;
+    s = 0.0e0;
+S30:
+    j += 1.0e0;
+    t *= (*x-bx/j);
+    aj = t/j;
+    s += aj;
+    if(fabs(aj) > tol) goto S30;
+    apser = -(*a*(c+s));
+    return apser;
+}
+double basym(double *a,double *b,double *lambda,double *eps)
+/*
+-----------------------------------------------------------------------
+     ASYMPTOTIC EXPANSION FOR IX(A,B) FOR LARGE A AND B.
+     LAMBDA = (A + B)*Y - B  AND EPS IS THE TOLERANCE USED.
+     IT IS ASSUMED THAT LAMBDA IS NONNEGATIVE AND THAT
+     A AND B ARE GREATER THAN OR EQUAL TO 15.
+-----------------------------------------------------------------------
+*/
+{
+static double e0 = 1.12837916709551e0;
+static double e1 = .353553390593274e0;
+static int num = 20;
+/*
+------------------------
+     ****** NUM IS THE MAXIMUM VALUE THAT N CAN TAKE IN THE DO LOOP
+            ENDING AT STATEMENT 50. IT IS REQUIRED THAT NUM BE EVEN.
+            THE ARRAYS A0, B0, C, D HAVE DIMENSION NUM + 1.
+------------------------
+     E0 = 2/SQRT(PI)
+     E1 = 2**(-3/2)
+------------------------
+*/
+static int K3 = 1;
+static double basym,bsum,dsum,f,h,h2,hn,j0,j1,r,r0,r1,s,sum,t,t0,t1,u,w,w0,z,z0,
+    z2,zn,znm1;
+static int i,im1,imj,j,m,mm1,mmj,n,np1;
+static double a0[21],b0[21],c[21],d[21],T1,T2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    basym = 0.0e0;
+    if(*a >= *b) goto S10;
+    h = *a/ *b;
+    r0 = 1.0e0/(1.0e0+h);
+    r1 = (*b-*a)/ *b;
+    w0 = 1.0e0/sqrt(*a*(1.0e0+h));
+    goto S20;
+S10:
+    h = *b/ *a;
+    r0 = 1.0e0/(1.0e0+h);
+    r1 = (*b-*a)/ *a;
+    w0 = 1.0e0/sqrt(*b*(1.0e0+h));
+S20:
+    T1 = -(*lambda/ *a);
+    T2 = *lambda/ *b;
+    f = *a*rlog1(&T1)+*b*rlog1(&T2);
+    t = exp(-f);
+    if(t == 0.0e0) return basym;
+    z0 = sqrt(f);
+    z = 0.5e0*(z0/e1);
+    z2 = f+f;
+    a0[0] = 2.0e0/3.0e0*r1;
+    c[0] = -(0.5e0*a0[0]);
+    d[0] = -c[0];
+    j0 = 0.5e0/e0*erfc1(&K3,&z0);
+    j1 = e1;
+    sum = j0+d[0]*w0*j1;
+    s = 1.0e0;
+    h2 = h*h;
+    hn = 1.0e0;
+    w = w0;
+    znm1 = z;
+    zn = z2;
+    for(n=2; n<=num; n+=2) {
+        hn = h2*hn;
+        a0[n-1] = 2.0e0*r0*(1.0e0+h*hn)/((double)n+2.0e0);
+        np1 = n+1;
+        s += hn;
+        a0[np1-1] = 2.0e0*r1*s/((double)n+3.0e0);
+        for(i=n; i<=np1; i++) {
+            r = -(0.5e0*((double)i+1.0e0));
+            b0[0] = r*a0[0];
+            for(m=2; m<=i; m++) {
+                bsum = 0.0e0;
+                mm1 = m-1;
+                for(j=1; j<=mm1; j++) {
+                    mmj = m-j;
+                    bsum += (((double)j*r-(double)mmj)*a0[j-1]*b0[mmj-1]);
+                }
+                b0[m-1] = r*a0[m-1]+bsum/(double)m;
+            }
+            c[i-1] = b0[i-1]/((double)i+1.0e0);
+            dsum = 0.0e0;
+            im1 = i-1;
+            for(j=1; j<=im1; j++) {
+                imj = i-j;
+                dsum += (d[imj-1]*c[j-1]);
+            }
+            d[i-1] = -(dsum+c[i-1]);
+        }
+        j0 = e1*znm1+((double)n-1.0e0)*j0;
+        j1 = e1*zn+(double)n*j1;
+        znm1 = z2*znm1;
+        zn = z2*zn;
+        w = w0*w;
+        t0 = d[n-1]*w*j0;
+        w = w0*w;
+        t1 = d[np1-1]*w*j1;
+        sum += (t0+t1);
+        if(fabs(t0)+fabs(t1) <= *eps*sum) goto S80;
+    }
+S80:
+    u = exp(-bcorr(a,b));
+    basym = e0*t*u*sum;
+    return basym;
+}
+double bcorr(double *a0,double *b0)
+/*
+-----------------------------------------------------------------------
+ 
+     EVALUATION OF  DEL(A0) + DEL(B0) - DEL(A0 + B0)  WHERE
+     LN(GAMMA(A)) = (A - 0.5)*LN(A) - A + 0.5*LN(2*PI) + DEL(A).
+     IT IS ASSUMED THAT A0 .GE. 8 AND B0 .GE. 8.
+ 
+-----------------------------------------------------------------------
+*/
+{
+static double c0 = .833333333333333e-01;
+static double c1 = -.277777777760991e-02;
+static double c2 = .793650666825390e-03;
+static double c3 = -.595202931351870e-03;
+static double c4 = .837308034031215e-03;
+static double c5 = -.165322962780713e-02;
+static double bcorr,a,b,c,h,s11,s3,s5,s7,s9,t,w,x,x2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    a = fifdmin1(*a0,*b0);
+    b = fifdmax1(*a0,*b0);
+    h = a/b;
+    c = h/(1.0e0+h);
+    x = 1.0e0/(1.0e0+h);
+    x2 = x*x;
+/*
+                SET SN = (1 - X**N)/(1 - X)
+*/
+    s3 = 1.0e0+(x+x2);
+    s5 = 1.0e0+(x+x2*s3);
+    s7 = 1.0e0+(x+x2*s5);
+    s9 = 1.0e0+(x+x2*s7);
+    s11 = 1.0e0+(x+x2*s9);
+/*
+                SET W = DEL(B) - DEL(A + B)
+*/
+    t = pow(1.0e0/b,2.0);
+    w = ((((c5*s11*t+c4*s9)*t+c3*s7)*t+c2*s5)*t+c1*s3)*t+c0;
+    w *= (c/b);
+/*
+                   COMPUTE  DEL(A) + W
+*/
+    t = pow(1.0e0/a,2.0);
+    bcorr = (((((c5*t+c4)*t+c3)*t+c2)*t+c1)*t+c0)/a+w;
+    return bcorr;
+}
+double betaln(double *a0,double *b0)
+/*
+-----------------------------------------------------------------------
+     EVALUATION OF THE LOGARITHM OF THE BETA FUNCTION
+-----------------------------------------------------------------------
+     E = 0.5*LN(2*PI)
+--------------------------
+*/
+{
+static double e = .918938533204673e0;
+static double betaln,a,b,c,h,u,v,w,z;
+static int i,n;
+static double T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    a = fifdmin1(*a0,*b0);
+    b = fifdmax1(*a0,*b0);
+    if(a >= 8.0e0) goto S100;
+    if(a >= 1.0e0) goto S20;
+/*
+-----------------------------------------------------------------------
+                   PROCEDURE WHEN A .LT. 1
+-----------------------------------------------------------------------
+*/
+    if(b >= 8.0e0) goto S10;
+    T1 = a+b;
+    betaln = gamln(&a)+(gamln(&b)-gamln(&T1));
+    return betaln;
+S10:
+    betaln = gamln(&a)+algdiv(&a,&b);
+    return betaln;
+S20:
+/*
+-----------------------------------------------------------------------
+                PROCEDURE WHEN 1 .LE. A .LT. 8
+-----------------------------------------------------------------------
+*/
+    if(a > 2.0e0) goto S40;
+    if(b > 2.0e0) goto S30;
+    betaln = gamln(&a)+gamln(&b)-gsumln(&a,&b);
+    return betaln;
+S30:
+    w = 0.0e0;
+    if(b < 8.0e0) goto S60;
+    betaln = gamln(&a)+algdiv(&a,&b);
+    return betaln;
+S40:
+/*
+                REDUCTION OF A WHEN B .LE. 1000
+*/
+    if(b > 1000.0e0) goto S80;
+    n = (long)(a - 1.0e0);
+    w = 1.0e0;
+    for(i=1; i<=n; i++) {
+        a -= 1.0e0;
+        h = a/b;
+        w *= (h/(1.0e0+h));
+    }
+    w = log(w);
+    if(b < 8.0e0) goto S60;
+    betaln = w+gamln(&a)+algdiv(&a,&b);
+    return betaln;
+S60:
+/*
+                 REDUCTION OF B WHEN B .LT. 8
+*/
+    n = (long)(b - 1.0e0);
+    z = 1.0e0;
+    for(i=1; i<=n; i++) {
+        b -= 1.0e0;
+        z *= (b/(a+b));
+    }
+    betaln = w+log(z)+(gamln(&a)+(gamln(&b)-gsumln(&a,&b)));
+    return betaln;
+S80:
+/*
+                REDUCTION OF A WHEN B .GT. 1000
+*/
+    n = (long)(a - 1.0e0);
+    w = 1.0e0;
+    for(i=1; i<=n; i++) {
+        a -= 1.0e0;
+        w *= (a/(1.0e0+a/b));
+    }
+    betaln = log(w)-(double)n*log(b)+(gamln(&a)+algdiv(&a,&b));
+    return betaln;
+S100:
+/*
+-----------------------------------------------------------------------
+                   PROCEDURE WHEN A .GE. 8
+-----------------------------------------------------------------------
+*/
+    w = bcorr(&a,&b);
+    h = a/b;
+    c = h/(1.0e0+h);
+    u = -((a-0.5e0)*log(c));
+    v = b*alnrel(&h);
+    if(u <= v) goto S110;
+    betaln = -(0.5e0*log(b))+e+w-v-u;
+    return betaln;
+S110:
+    betaln = -(0.5e0*log(b))+e+w-u-v;
+    return betaln;
+}
+double bfrac(double *a,double *b,double *x,double *y,double *lambda,
+	     double *eps)
+/*
+-----------------------------------------------------------------------
+     CONTINUED FRACTION EXPANSION FOR IX(A,B) WHEN A,B .GT. 1.
+     IT IS ASSUMED THAT  LAMBDA = (A + B)*Y - B.
+-----------------------------------------------------------------------
+*/
+{
+static double bfrac,alpha,an,anp1,beta,bn,bnp1,c,c0,c1,e,n,p,r,r0,s,t,w,yp1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    bfrac = brcomp(a,b,x,y);
+    if(bfrac == 0.0e0) return bfrac;
+    c = 1.0e0+*lambda;
+    c0 = *b/ *a;
+    c1 = 1.0e0+1.0e0/ *a;
+    yp1 = *y+1.0e0;
+    n = 0.0e0;
+    p = 1.0e0;
+    s = *a+1.0e0;
+    an = 0.0e0;
+    bn = anp1 = 1.0e0;
+    bnp1 = c/c1;
+    r = c1/c;
+S10:
+/*
+        CONTINUED FRACTION CALCULATION
+*/
+    n += 1.0e0;
+    t = n/ *a;
+    w = n*(*b-n)**x;
+    e = *a/s;
+    alpha = p*(p+c0)*e*e*(w**x);
+    e = (1.0e0+t)/(c1+t+t);
+    beta = n+w/s+e*(c+n*yp1);
+    p = 1.0e0+t;
+    s += 2.0e0;
+/*
+        UPDATE AN, BN, ANP1, AND BNP1
+*/
+    t = alpha*an+beta*anp1;
+    an = anp1;
+    anp1 = t;
+    t = alpha*bn+beta*bnp1;
+    bn = bnp1;
+    bnp1 = t;
+    r0 = r;
+    r = anp1/bnp1;
+    if(fabs(r-r0) <= *eps*r) goto S20;
+/*
+        RESCALE AN, BN, ANP1, AND BNP1
+*/
+    an /= bnp1;
+    bn /= bnp1;
+    anp1 = r;
+    bnp1 = 1.0e0;
+    goto S10;
+S20:
+/*
+                 TERMINATION
+*/
+    bfrac *= r;
+    return bfrac;
+}
+void bgrat(double *a,double *b,double *x,double *y,double *w,
+	   double *eps,int *ierr)
+/*
+-----------------------------------------------------------------------
+     ASYMPTOTIC EXPANSION FOR IX(A,B) WHEN A IS LARGER THAN B.
+     THE RESULT OF THE EXPANSION IS ADDED TO W. IT IS ASSUMED
+     THAT A .GE. 15 AND B .LE. 1.  EPS IS THE TOLERANCE USED.
+     IERR IS A VARIABLE THAT REPORTS THE STATUS OF THE RESULTS.
+-----------------------------------------------------------------------
+*/
+{
+static double bm1,bp2n,cn,coef,dj,j,l,lnx,n2,nu,p,q,r,s,sum,t,t2,u,v,z;
+static int i,n,nm1;
+static double c[30],d[30],T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    bm1 = *b-0.5e0-0.5e0;
+    nu = *a+0.5e0*bm1;
+    if(*y > 0.375e0) goto S10;
+    T1 = -*y;
+    lnx = alnrel(&T1);
+    goto S20;
+S10:
+    lnx = log(*x);
+S20:
+    z = -(nu*lnx);
+    if(*b*z == 0.0e0) goto S70;
+/*
+                 COMPUTATION OF THE EXPANSION
+                 SET R = EXP(-Z)*Z**B/GAMMA(B)
+*/
+    r = *b*(1.0e0+gam1(b))*exp(*b*log(z));
+    r *= (exp(*a*lnx)*exp(0.5e0*bm1*lnx));
+    u = algdiv(b,a)+*b*log(nu);
+    u = r*exp(-u);
+    if(u == 0.0e0) goto S70;
+    grat1(b,&z,&r,&p,&q,eps);
+    v = 0.25e0*pow(1.0e0/nu,2.0);
+    t2 = 0.25e0*lnx*lnx;
+    l = *w/u;
+    j = q/r;
+    sum = j;
+    t = cn = 1.0e0;
+    n2 = 0.0e0;
+    for(n=1; n<=30; n++) {
+        bp2n = *b+n2;
+        j = (bp2n*(bp2n+1.0e0)*j+(z+bp2n+1.0e0)*t)*v;
+        n2 += 2.0e0;
+        t *= t2;
+        cn /= (n2*(n2+1.0e0));
+        c[n-1] = cn;
+        s = 0.0e0;
+        if(n == 1) goto S40;
+        nm1 = n-1;
+        coef = *b-(double)n;
+        for(i=1; i<=nm1; i++) {
+            s += (coef*c[i-1]*d[n-i-1]);
+            coef += *b;
+        }
+S40:
+        d[n-1] = bm1*cn+s/(double)n;
+        dj = d[n-1]*j;
+        sum += dj;
+        if(sum <= 0.0e0) goto S70;
+        if(fabs(dj) <= *eps*(sum+l)) goto S60;
+    }
+S60:
+/*
+                    ADD THE RESULTS TO W
+*/
+    *ierr = 0;
+    *w += (u*sum);
+    return;
+S70:
+/*
+               THE EXPANSION CANNOT BE COMPUTED
+*/
+    *ierr = 1;
+    return;
+}
+double bpser(double *a,double *b,double *x,double *eps)
+/*
+-----------------------------------------------------------------------
+     POWER SERIES EXPANSION FOR EVALUATING IX(A,B) WHEN B .LE. 1
+     OR B*X .LE. 0.7.  EPS IS THE TOLERANCE USED.
+-----------------------------------------------------------------------
+*/
+{
+static double bpser,a0,apb,b0,c,n,sum,t,tol,u,w,z;
+static int i,m;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    bpser = 0.0e0;
+    if(*x == 0.0e0) return bpser;
+/*
+-----------------------------------------------------------------------
+            COMPUTE THE FACTOR X**A/(A*BETA(A,B))
+-----------------------------------------------------------------------
+*/
+    a0 = fifdmin1(*a,*b);
+    if(a0 < 1.0e0) goto S10;
+    z = *a*log(*x)-betaln(a,b);
+    bpser = exp(z)/ *a;
+    goto S100;
+S10:
+    b0 = fifdmax1(*a,*b);
+    if(b0 >= 8.0e0) goto S90;
+    if(b0 > 1.0e0) goto S40;
+/*
+            PROCEDURE FOR A0 .LT. 1 AND B0 .LE. 1
+*/
+    bpser = pow(*x,*a);
+    if(bpser == 0.0e0) return bpser;
+    apb = *a+*b;
+    if(apb > 1.0e0) goto S20;
+    z = 1.0e0+gam1(&apb);
+    goto S30;
+S20:
+    u = *a+*b-1.e0;
+    z = (1.0e0+gam1(&u))/apb;
+S30:
+    c = (1.0e0+gam1(a))*(1.0e0+gam1(b))/z;
+    bpser *= (c*(*b/apb));
+    goto S100;
+S40:
+/*
+         PROCEDURE FOR A0 .LT. 1 AND 1 .LT. B0 .LT. 8
+*/
+    u = gamln1(&a0);
+    m = (long)(b0 - 1.0e0);
+    if(m < 1) goto S60;
+    c = 1.0e0;
+    for(i=1; i<=m; i++) {
+        b0 -= 1.0e0;
+        c *= (b0/(a0+b0));
+    }
+    u = log(c)+u;
+S60:
+    z = *a*log(*x)-u;
+    b0 -= 1.0e0;
+    apb = a0+b0;
+    if(apb > 1.0e0) goto S70;
+    t = 1.0e0+gam1(&apb);
+    goto S80;
+S70:
+    u = a0+b0-1.e0;
+    t = (1.0e0+gam1(&u))/apb;
+S80:
+    bpser = exp(z)*(a0/ *a)*(1.0e0+gam1(&b0))/t;
+    goto S100;
+S90:
+/*
+            PROCEDURE FOR A0 .LT. 1 AND B0 .GE. 8
+*/
+    u = gamln1(&a0)+algdiv(&a0,&b0);
+    z = *a*log(*x)-u;
+    bpser = a0/ *a*exp(z);
+S100:
+    if(bpser == 0.0e0 || *a <= 0.1e0**eps) return bpser;
+/*
+-----------------------------------------------------------------------
+                     COMPUTE THE SERIES
+-----------------------------------------------------------------------
+*/
+    sum = n = 0.0e0;
+    c = 1.0e0;
+    tol = *eps/ *a;
+S110:
+    n += 1.0e0;
+    c *= ((0.5e0+(0.5e0-*b/n))**x);
+    w = c/(*a+n);
+    sum += w;
+    if(fabs(w) > tol) goto S110;
+    bpser *= (1.0e0+*a*sum);
+    return bpser;
+}
+void bratio(double *a,double *b,double *x,double *y,double *w,
+	    double *w1,int *ierr)
+/*
+-----------------------------------------------------------------------
+ 
+            EVALUATION OF THE INCOMPLETE BETA FUNCTION IX(A,B)
+ 
+                     --------------------
+ 
+     IT IS ASSUMED THAT A AND B ARE NONNEGATIVE, AND THAT X .LE. 1
+     AND Y = 1 - X.  BRATIO ASSIGNS W AND W1 THE VALUES
+ 
+                      W  = IX(A,B)
+                      W1 = 1 - IX(A,B)
+ 
+     IERR IS A VARIABLE THAT REPORTS THE STATUS OF THE RESULTS.
+     IF NO INPUT ERRORS ARE DETECTED THEN IERR IS SET TO 0 AND
+     W AND W1 ARE COMPUTED. OTHERWISE, IF AN ERROR IS DETECTED,
+     THEN W AND W1 ARE ASSIGNED THE VALUE 0 AND IERR IS SET TO
+     ONE OF THE FOLLOWING VALUES ...
+ 
+        IERR = 1  IF A OR B IS NEGATIVE
+        IERR = 2  IF A = B = 0
+        IERR = 3  IF X .LT. 0 OR X .GT. 1
+        IERR = 4  IF Y .LT. 0 OR Y .GT. 1
+        IERR = 5  IF X + Y .NE. 1
+        IERR = 6  IF X = A = 0
+        IERR = 7  IF Y = B = 0
+ 
+--------------------
+     WRITTEN BY ALFRED H. MORRIS, JR.
+        NAVAL SURFACE WARFARE CENTER
+        DAHLGREN, VIRGINIA
+     REVISED ... NOV 1991
+-----------------------------------------------------------------------
+*/
+{
+static int K1 = 1;
+static double a0,b0,eps,lambda,t,x0,y0,z;
+static int ierr1,ind,n;
+static double T2,T3,T4,T5;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     ****** EPS IS A MACHINE DEPENDENT CONSTANT. EPS IS THE SMALLEST
+            FLOATING POINT NUMBER FOR WHICH 1.0 + EPS .GT. 1.0
+*/
+    eps = spmpar(&K1);
+    *w = *w1 = 0.0e0;
+    if(*a < 0.0e0 || *b < 0.0e0) goto S270;
+    if(*a == 0.0e0 && *b == 0.0e0) goto S280;
+    if(*x < 0.0e0 || *x > 1.0e0) goto S290;
+    if(*y < 0.0e0 || *y > 1.0e0) goto S300;
+    z = *x+*y-0.5e0-0.5e0;
+    if(fabs(z) > 3.0e0*eps) goto S310;
+    *ierr = 0;
+    if(*x == 0.0e0) goto S210;
+    if(*y == 0.0e0) goto S230;
+    if(*a == 0.0e0) goto S240;
+    if(*b == 0.0e0) goto S220;
+    eps = fifdmax1(eps,1.e-15);
+    if(fifdmax1(*a,*b) < 1.e-3*eps) goto S260;
+    ind = 0;
+    a0 = *a;
+    b0 = *b;
+    x0 = *x;
+    y0 = *y;
+    if(fifdmin1(a0,b0) > 1.0e0) goto S40;
+/*
+             PROCEDURE FOR A0 .LE. 1 OR B0 .LE. 1
+*/
+    if(*x <= 0.5e0) goto S10;
+    ind = 1;
+    a0 = *b;
+    b0 = *a;
+    x0 = *y;
+    y0 = *x;
+S10:
+    if(b0 < fifdmin1(eps,eps*a0)) goto S90;
+    if(a0 < fifdmin1(eps,eps*b0) && b0*x0 <= 1.0e0) goto S100;
+    if(fifdmax1(a0,b0) > 1.0e0) goto S20;
+    if(a0 >= fifdmin1(0.2e0,b0)) goto S110;
+    if(pow(x0,a0) <= 0.9e0) goto S110;
+    if(x0 >= 0.3e0) goto S120;
+    n = 20;
+    goto S140;
+S20:
+    if(b0 <= 1.0e0) goto S110;
+    if(x0 >= 0.3e0) goto S120;
+    if(x0 >= 0.1e0) goto S30;
+    if(pow(x0*b0,a0) <= 0.7e0) goto S110;
+S30:
+    if(b0 > 15.0e0) goto S150;
+    n = 20;
+    goto S140;
+S40:
+/*
+             PROCEDURE FOR A0 .GT. 1 AND B0 .GT. 1
+*/
+    if(*a > *b) goto S50;
+    lambda = *a-(*a+*b)**x;
+    goto S60;
+S50:
+    lambda = (*a+*b)**y-*b;
+S60:
+    if(lambda >= 0.0e0) goto S70;
+    ind = 1;
+    a0 = *b;
+    b0 = *a;
+    x0 = *y;
+    y0 = *x;
+    lambda = fabs(lambda);
+S70:
+    if(b0 < 40.0e0 && b0*x0 <= 0.7e0) goto S110;
+    if(b0 < 40.0e0) goto S160;
+    if(a0 > b0) goto S80;
+    if(a0 <= 100.0e0) goto S130;
+    if(lambda > 0.03e0*a0) goto S130;
+    goto S200;
+S80:
+    if(b0 <= 100.0e0) goto S130;
+    if(lambda > 0.03e0*b0) goto S130;
+    goto S200;
+S90:
+/*
+            EVALUATION OF THE APPROPRIATE ALGORITHM
+*/
+    *w = fpser(&a0,&b0,&x0,&eps);
+    *w1 = 0.5e0+(0.5e0-*w);
+    goto S250;
+S100:
+    *w1 = apser(&a0,&b0,&x0,&eps);
+    *w = 0.5e0+(0.5e0-*w1);
+    goto S250;
+S110:
+    *w = bpser(&a0,&b0,&x0,&eps);
+    *w1 = 0.5e0+(0.5e0-*w);
+    goto S250;
+S120:
+    *w1 = bpser(&b0,&a0,&y0,&eps);
+    *w = 0.5e0+(0.5e0-*w1);
+    goto S250;
+S130:
+    T2 = 15.0e0*eps;
+    *w = bfrac(&a0,&b0,&x0,&y0,&lambda,&T2);
+    *w1 = 0.5e0+(0.5e0-*w);
+    goto S250;
+S140:
+    *w1 = bup(&b0,&a0,&y0,&x0,&n,&eps);
+    b0 += (double)n;
+S150:
+    T3 = 15.0e0*eps;
+    bgrat(&b0,&a0,&y0,&x0,w1,&T3,&ierr1);
+    *w = 0.5e0+(0.5e0-*w1);
+    goto S250;
+S160:
+    n = (long)(b0);
+    b0 -= (double)n;
+    if(b0 != 0.0e0) goto S170;
+    n -= 1;
+    b0 = 1.0e0;
+S170:
+    *w = bup(&b0,&a0,&y0,&x0,&n,&eps);
+    if(x0 > 0.7e0) goto S180;
+    *w += bpser(&a0,&b0,&x0,&eps);
+    *w1 = 0.5e0+(0.5e0-*w);
+    goto S250;
+S180:
+    if(a0 > 15.0e0) goto S190;
+    n = 20;
+    *w += bup(&a0,&b0,&x0,&y0,&n,&eps);
+    a0 += (double)n;
+S190:
+    T4 = 15.0e0*eps;
+    bgrat(&a0,&b0,&x0,&y0,w,&T4,&ierr1);
+    *w1 = 0.5e0+(0.5e0-*w);
+    goto S250;
+S200:
+    T5 = 100.0e0*eps;
+    *w = basym(&a0,&b0,&lambda,&T5);
+    *w1 = 0.5e0+(0.5e0-*w);
+    goto S250;
+S210:
+/*
+               TERMINATION OF THE PROCEDURE
+*/
+    if(*a == 0.0e0) goto S320;
+S220:
+    *w = 0.0e0;
+    *w1 = 1.0e0;
+    return;
+S230:
+    if(*b == 0.0e0) goto S330;
+S240:
+    *w = 1.0e0;
+    *w1 = 0.0e0;
+    return;
+S250:
+    if(ind == 0) return;
+    t = *w;
+    *w = *w1;
+    *w1 = t;
+    return;
+S260:
+/*
+           PROCEDURE FOR A AND B .LT. 1.E-3*EPS
+*/
+    *w = *b/(*a+*b);
+    *w1 = *a/(*a+*b);
+    return;
+S270:
+/*
+                       ERROR RETURN
+*/
+    *ierr = 1;
+    return;
+S280:
+    *ierr = 2;
+    return;
+S290:
+    *ierr = 3;
+    return;
+S300:
+    *ierr = 4;
+    return;
+S310:
+    *ierr = 5;
+    return;
+S320:
+    *ierr = 6;
+    return;
+S330:
+    *ierr = 7;
+    return;
+}
+double brcmp1(int *mu,double *a,double *b,double *x,double *y)
+/*
+-----------------------------------------------------------------------
+          EVALUATION OF  EXP(MU) * (X**A*Y**B/BETA(A,B))
+-----------------------------------------------------------------------
+*/
+{
+static double Const = .398942280401433e0;
+static double brcmp1,a0,apb,b0,c,e,h,lambda,lnx,lny,t,u,v,x0,y0,z;
+static int i,n;
+/*
+-----------------
+     CONST = 1/SQRT(2*PI)
+-----------------
+*/
+static double T1,T2,T3,T4;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    a0 = fifdmin1(*a,*b);
+    if(a0 >= 8.0e0) goto S130;
+    if(*x > 0.375e0) goto S10;
+    lnx = log(*x);
+    T1 = -*x;
+    lny = alnrel(&T1);
+    goto S30;
+S10:
+    if(*y > 0.375e0) goto S20;
+    T2 = -*y;
+    lnx = alnrel(&T2);
+    lny = log(*y);
+    goto S30;
+S20:
+    lnx = log(*x);
+    lny = log(*y);
+S30:
+    z = *a*lnx+*b*lny;
+    if(a0 < 1.0e0) goto S40;
+    z -= betaln(a,b);
+    brcmp1 = esum(mu,&z);
+    return brcmp1;
+S40:
+/*
+-----------------------------------------------------------------------
+              PROCEDURE FOR A .LT. 1 OR B .LT. 1
+-----------------------------------------------------------------------
+*/
+    b0 = fifdmax1(*a,*b);
+    if(b0 >= 8.0e0) goto S120;
+    if(b0 > 1.0e0) goto S70;
+/*
+                   ALGORITHM FOR B0 .LE. 1
+*/
+    brcmp1 = esum(mu,&z);
+    if(brcmp1 == 0.0e0) return brcmp1;
+    apb = *a+*b;
+    if(apb > 1.0e0) goto S50;
+    z = 1.0e0+gam1(&apb);
+    goto S60;
+S50:
+    u = *a+*b-1.e0;
+    z = (1.0e0+gam1(&u))/apb;
+S60:
+    c = (1.0e0+gam1(a))*(1.0e0+gam1(b))/z;
+    brcmp1 = brcmp1*(a0*c)/(1.0e0+a0/b0);
+    return brcmp1;
+S70:
+/*
+                ALGORITHM FOR 1 .LT. B0 .LT. 8
+*/
+    u = gamln1(&a0);
+    n = (long)(b0 - 1.0e0);
+    if(n < 1) goto S90;
+    c = 1.0e0;
+    for(i=1; i<=n; i++) {
+        b0 -= 1.0e0;
+        c *= (b0/(a0+b0));
+    }
+    u = log(c)+u;
+S90:
+    z -= u;
+    b0 -= 1.0e0;
+    apb = a0+b0;
+    if(apb > 1.0e0) goto S100;
+    t = 1.0e0+gam1(&apb);
+    goto S110;
+S100:
+    u = a0+b0-1.e0;
+    t = (1.0e0+gam1(&u))/apb;
+S110:
+    brcmp1 = a0*esum(mu,&z)*(1.0e0+gam1(&b0))/t;
+    return brcmp1;
+S120:
+/*
+                   ALGORITHM FOR B0 .GE. 8
+*/
+    u = gamln1(&a0)+algdiv(&a0,&b0);
+    T3 = z-u;
+    brcmp1 = a0*esum(mu,&T3);
+    return brcmp1;
+S130:
+/*
+-----------------------------------------------------------------------
+              PROCEDURE FOR A .GE. 8 AND B .GE. 8
+-----------------------------------------------------------------------
+*/
+    if(*a > *b) goto S140;
+    h = *a/ *b;
+    x0 = h/(1.0e0+h);
+    y0 = 1.0e0/(1.0e0+h);
+    lambda = *a-(*a+*b)**x;
+    goto S150;
+S140:
+    h = *b/ *a;
+    x0 = 1.0e0/(1.0e0+h);
+    y0 = h/(1.0e0+h);
+    lambda = (*a+*b)**y-*b;
+S150:
+    e = -(lambda/ *a);
+    if(fabs(e) > 0.6e0) goto S160;
+    u = rlog1(&e);
+    goto S170;
+S160:
+    u = e-log(*x/x0);
+S170:
+    e = lambda/ *b;
+    if(fabs(e) > 0.6e0) goto S180;
+    v = rlog1(&e);
+    goto S190;
+S180:
+    v = e-log(*y/y0);
+S190:
+    T4 = -(*a*u+*b*v);
+    z = esum(mu,&T4);
+    brcmp1 = Const*sqrt(*b*x0)*z*exp(-bcorr(a,b));
+    return brcmp1;
+}
+double brcomp(double *a,double *b,double *x,double *y)
+/*
+-----------------------------------------------------------------------
+               EVALUATION OF X**A*Y**B/BETA(A,B)
+-----------------------------------------------------------------------
+*/
+{
+static double Const = .398942280401433e0;
+static double brcomp,a0,apb,b0,c,e,h,lambda,lnx,lny,t,u,v,x0,y0,z;
+static int i,n;
+/*
+-----------------
+     CONST = 1/SQRT(2*PI)
+-----------------
+*/
+static double T1,T2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    brcomp = 0.0e0;
+    if(*x == 0.0e0 || *y == 0.0e0) return brcomp;
+    a0 = fifdmin1(*a,*b);
+    if(a0 >= 8.0e0) goto S130;
+    if(*x > 0.375e0) goto S10;
+    lnx = log(*x);
+    T1 = -*x;
+    lny = alnrel(&T1);
+    goto S30;
+S10:
+    if(*y > 0.375e0) goto S20;
+    T2 = -*y;
+    lnx = alnrel(&T2);
+    lny = log(*y);
+    goto S30;
+S20:
+    lnx = log(*x);
+    lny = log(*y);
+S30:
+    z = *a*lnx+*b*lny;
+    if(a0 < 1.0e0) goto S40;
+    z -= betaln(a,b);
+    brcomp = exp(z);
+    return brcomp;
+S40:
+/*
+-----------------------------------------------------------------------
+              PROCEDURE FOR A .LT. 1 OR B .LT. 1
+-----------------------------------------------------------------------
+*/
+    b0 = fifdmax1(*a,*b);
+    if(b0 >= 8.0e0) goto S120;
+    if(b0 > 1.0e0) goto S70;
+/*
+                   ALGORITHM FOR B0 .LE. 1
+*/
+    brcomp = exp(z);
+    if(brcomp == 0.0e0) return brcomp;
+    apb = *a+*b;
+    if(apb > 1.0e0) goto S50;
+    z = 1.0e0+gam1(&apb);
+    goto S60;
+S50:
+    u = *a+*b-1.e0;
+    z = (1.0e0+gam1(&u))/apb;
+S60:
+    c = (1.0e0+gam1(a))*(1.0e0+gam1(b))/z;
+    brcomp = brcomp*(a0*c)/(1.0e0+a0/b0);
+    return brcomp;
+S70:
+/*
+                ALGORITHM FOR 1 .LT. B0 .LT. 8
+*/
+    u = gamln1(&a0);
+    n = (long)(b0 - 1.0e0);
+    if(n < 1) goto S90;
+    c = 1.0e0;
+    for(i=1; i<=n; i++) {
+        b0 -= 1.0e0;
+        c *= (b0/(a0+b0));
+    }
+    u = log(c)+u;
+S90:
+    z -= u;
+    b0 -= 1.0e0;
+    apb = a0+b0;
+    if(apb > 1.0e0) goto S100;
+    t = 1.0e0+gam1(&apb);
+    goto S110;
+S100:
+    u = a0+b0-1.e0;
+    t = (1.0e0+gam1(&u))/apb;
+S110:
+    brcomp = a0*exp(z)*(1.0e0+gam1(&b0))/t;
+    return brcomp;
+S120:
+/*
+                   ALGORITHM FOR B0 .GE. 8
+*/
+    u = gamln1(&a0)+algdiv(&a0,&b0);
+    brcomp = a0*exp(z-u);
+    return brcomp;
+S130:
+/*
+-----------------------------------------------------------------------
+              PROCEDURE FOR A .GE. 8 AND B .GE. 8
+-----------------------------------------------------------------------
+*/
+    if(*a > *b) goto S140;
+    h = *a/ *b;
+    x0 = h/(1.0e0+h);
+    y0 = 1.0e0/(1.0e0+h);
+    lambda = *a-(*a+*b)**x;
+    goto S150;
+S140:
+    h = *b/ *a;
+    x0 = 1.0e0/(1.0e0+h);
+    y0 = h/(1.0e0+h);
+    lambda = (*a+*b)**y-*b;
+S150:
+    e = -(lambda/ *a);
+    if(fabs(e) > 0.6e0) goto S160;
+    u = rlog1(&e);
+    goto S170;
+S160:
+    u = e-log(*x/x0);
+S170:
+    e = lambda/ *b;
+    if(fabs(e) > 0.6e0) goto S180;
+    v = rlog1(&e);
+    goto S190;
+S180:
+    v = e-log(*y/y0);
+S190:
+    z = exp(-(*a*u+*b*v));
+    brcomp = Const*sqrt(*b*x0)*z*exp(-bcorr(a,b));
+    return brcomp;
+}
+double bup(double *a,double *b,double *x,double *y,int *n,double *eps)
+/*
+-----------------------------------------------------------------------
+     EVALUATION OF IX(A,B) - IX(A+N,B) WHERE N IS A POSITIVE INTEGER.
+     EPS IS THE TOLERANCE USED.
+-----------------------------------------------------------------------
+*/
+{
+static int K1 = 1;
+static int K2 = 0;
+static double bup,ap1,apb,d,l,r,t,w;
+static int i,k,kp1,mu,nm1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+          OBTAIN THE SCALING FACTOR EXP(-MU) AND
+             EXP(MU)*(X**A*Y**B/BETA(A,B))/A
+*/
+    apb = *a+*b;
+    ap1 = *a+1.0e0;
+    mu = 0;
+    d = 1.0e0;
+    if(*n == 1 || *a < 1.0e0) goto S10;
+    if(apb < 1.1e0*ap1) goto S10;
+    mu = (long)(fabs(exparg(&K1)));
+    k = (long)(exparg(&K2));
+    if(k < mu) mu = k;
+    t = mu;
+    d = exp(-t);
+S10:
+    bup = brcmp1(&mu,a,b,x,y)/ *a;
+    if(*n == 1 || bup == 0.0e0) return bup;
+    nm1 = *n-1;
+    w = d;
+/*
+          LET K BE THE INDEX OF THE MAXIMUM TERM
+*/
+    k = 0;
+    if(*b <= 1.0e0) goto S50;
+    if(*y > 1.e-4) goto S20;
+    k = nm1;
+    goto S30;
+S20:
+    r = (*b-1.0e0)**x/ *y-*a;
+    if(r < 1.0e0) goto S50;
+    t = nm1;
+    k = (long)(t);
+    if(r < t) k = (long)(r);
+S30:
+/*
+          ADD THE INCREASING TERMS OF THE SERIES
+*/
+    for(i=1; i<=k; i++) {
+        l = i-1;
+        d = (apb+l)/(ap1+l)**x*d;
+        w += d;
+    }
+    if(k == nm1) goto S70;
+S50:
+/*
+          ADD THE REMAINING TERMS OF THE SERIES
+*/
+    kp1 = k+1;
+    for(i=kp1; i<=nm1; i++) {
+        l = i-1;
+        d = (apb+l)/(ap1+l)**x*d;
+        w += d;
+        if(d <= *eps*w) goto S70;
+    }
+S70:
+/*
+               TERMINATE THE PROCEDURE
+*/
+    bup *= w;
+    return bup;
+}
+void cdfbet(int *which,double *p,double *q,double *x,double *y,
+	    double *a,double *b,int *status,double *bound)
+/**********************************************************************
+
+      void cdfbet(int *which,double *p,double *q,double *x,double *y,
+            double *a,double *b,int *status,double *bound)
+
+               Cumulative Distribution Function
+                         BETa Distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the beta distribution given
+     values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next four argument
+               values is to be calculated from the others.
+               Legal range: 1..4
+               iwhich = 1 : Calculate P and Q from X,Y,A and B
+               iwhich = 2 : Calculate X and Y from P,Q,A and B
+               iwhich = 3 : Calculate A from P,Q,X,Y and B
+               iwhich = 4 : Calculate B from P,Q,X,Y and A
+
+     P <--> The integral from 0 to X of the chi-square
+            distribution.
+            Input range: [0, 1].
+
+     Q <--> 1-P.
+            Input range: [0, 1].
+            P + Q = 1.0.
+
+     X <--> Upper limit of integration of beta density.
+            Input range: [0,1].
+            Search range: [0,1]
+
+     Y <--> 1-X.
+            Input range: [0,1].
+            Search range: [0,1]
+            X + Y = 1.0.
+
+     A <--> The first parameter of the beta density.
+            Input range: (0, +infinity).
+            Search range: [1D-100,1D100]
+
+     B <--> The second parameter of the beta density.
+            Input range: (0, +infinity).
+            Search range: [1D-100,1D100]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+                4 if X + Y .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Cumulative distribution function  (P)  is calculated directly by
+     code associated with the following reference.
+
+     DiDinato, A. R. and Morris,  A.   H.  Algorithm 708: Significant
+     Digit Computation of the Incomplete  Beta  Function Ratios.  ACM
+     Trans. Math.  Softw. 18 (1993), 360-373.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+
+                              Note
+
+
+     The beta density is proportional to
+               t^(A-1) * (1-t)^(B-1)
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define inf 1.0e100
+#define one 1.0e0
+static int K1 = 1;
+static double K2 = 0.0e0;
+static double K3 = 1.0e0;
+static double K8 = 0.5e0;
+static double K9 = 5.0e0;
+static double fx,xhi,xlo,cum,ccum,xy,pq;
+static unsigned long qhi,qleft,qporq;
+static double T4,T5,T6,T7,T10,T11,T12,T13,T14,T15;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q < 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q < 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 2) goto S150;
+/*
+     X
+*/
+    if(!(*x < 0.0e0 || *x > 1.0e0)) goto S140;
+    if(!(*x < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    goto S130;
+S120:
+    *bound = 1.0e0;
+S130:
+    *status = -4;
+    return;
+S150:
+S140:
+    if(*which == 2) goto S190;
+/*
+     Y
+*/
+    if(!(*y < 0.0e0 || *y > 1.0e0)) goto S180;
+    if(!(*y < 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    goto S170;
+S160:
+    *bound = 1.0e0;
+S170:
+    *status = -5;
+    return;
+S190:
+S180:
+    if(*which == 3) goto S210;
+/*
+     A
+*/
+    if(!(*a <= 0.0e0)) goto S200;
+    *bound = 0.0e0;
+    *status = -6;
+    return;
+S210:
+S200:
+    if(*which == 4) goto S230;
+/*
+     B
+*/
+    if(!(*b <= 0.0e0)) goto S220;
+    *bound = 0.0e0;
+    *status = -7;
+    return;
+S230:
+S220:
+    if(*which == 1) goto S270;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S260;
+    if(!(pq < 0.0e0)) goto S240;
+    *bound = 0.0e0;
+    goto S250;
+S240:
+    *bound = 1.0e0;
+S250:
+    *status = 3;
+    return;
+S270:
+S260:
+    if(*which == 2) goto S310;
+/*
+     X + Y
+*/
+    xy = *x+*y;
+    if(!(fabs(xy-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S300;
+    if(!(xy < 0.0e0)) goto S280;
+    *bound = 0.0e0;
+    goto S290;
+S280:
+    *bound = 1.0e0;
+S290:
+    *status = 4;
+    return;
+S310:
+S300:
+    if(!(*which == 1)) qporq = *p <= *q;
+/*
+     Select the minimum of P or Q
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P and Q
+*/
+        cumbet(x,y,a,b,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating X and Y
+*/
+        T4 = atol;
+        T5 = tol;
+        dstzr(&K2,&K3,&T4,&T5);
+        if(!qporq) goto S340;
+        *status = 0;
+        dzror(status,x,&fx,&xlo,&xhi,&qleft,&qhi);
+        *y = one-*x;
+S320:
+        if(!(*status == 1)) goto S330;
+        cumbet(x,y,a,b,&cum,&ccum);
+        fx = cum-*p;
+        dzror(status,x,&fx,&xlo,&xhi,&qleft,&qhi);
+        *y = one-*x;
+        goto S320;
+S330:
+        goto S370;
+S340:
+        *status = 0;
+        dzror(status,y,&fx,&xlo,&xhi,&qleft,&qhi);
+        *x = one-*y;
+S350:
+        if(!(*status == 1)) goto S360;
+        cumbet(x,y,a,b,&cum,&ccum);
+        fx = ccum-*q;
+        dzror(status,y,&fx,&xlo,&xhi,&qleft,&qhi);
+        *x = one-*y;
+        goto S350;
+S370:
+S360:
+        if(!(*status == -1)) goto S400;
+        if(!qleft) goto S380;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S390;
+S380:
+        *status = 2;
+        *bound = 1.0e0;
+S400:
+S390:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Computing A
+*/
+        *a = 5.0e0;
+        T6 = zero;
+        T7 = inf;
+        T10 = atol;
+        T11 = tol;
+        dstinv(&T6,&T7,&K8,&K8,&K9,&T10,&T11);
+        *status = 0;
+        dinvr(status,a,&fx,&qleft,&qhi);
+S410:
+        if(!(*status == 1)) goto S440;
+        cumbet(x,y,a,b,&cum,&ccum);
+        if(!qporq) goto S420;
+        fx = cum-*p;
+        goto S430;
+S420:
+        fx = ccum-*q;
+S430:
+        dinvr(status,a,&fx,&qleft,&qhi);
+        goto S410;
+S440:
+        if(!(*status == -1)) goto S470;
+        if(!qleft) goto S450;
+        *status = 1;
+        *bound = zero;
+        goto S460;
+S450:
+        *status = 2;
+        *bound = inf;
+S470:
+S460:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Computing B
+*/
+        *b = 5.0e0;
+        T12 = zero;
+        T13 = inf;
+        T14 = atol;
+        T15 = tol;
+        dstinv(&T12,&T13,&K8,&K8,&K9,&T14,&T15);
+        *status = 0;
+        dinvr(status,b,&fx,&qleft,&qhi);
+S480:
+        if(!(*status == 1)) goto S510;
+        cumbet(x,y,a,b,&cum,&ccum);
+        if(!qporq) goto S490;
+        fx = cum-*p;
+        goto S500;
+S490:
+        fx = ccum-*q;
+S500:
+        dinvr(status,b,&fx,&qleft,&qhi);
+        goto S480;
+S510:
+        if(!(*status == -1)) goto S540;
+        if(!qleft) goto S520;
+        *status = 1;
+        *bound = zero;
+        goto S530;
+S520:
+        *status = 2;
+        *bound = inf;
+S530:
+        ;
+    }
+S540:
+    return;
+#undef tol
+#undef atol
+#undef zero
+#undef inf
+#undef one
+}
+void cdfbin(int *which,double *p,double *q,double *s,double *xn,
+	    double *pr,double *ompr,int *status,double *bound)
+/**********************************************************************
+
+      void cdfbin(int *which,double *p,double *q,double *s,double *xn,
+            double *pr,double *ompr,int *status,double *bound)
+
+               Cumulative Distribution Function
+                         BINomial distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the binomial
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next four argument
+               values is to be calculated from the others.
+               Legal range: 1..4
+               iwhich = 1 : Calculate P and Q from S,XN,PR and OMPR
+               iwhich = 2 : Calculate S from P,Q,XN,PR and OMPR
+               iwhich = 3 : Calculate XN from P,Q,S,PR and OMPR
+               iwhich = 4 : Calculate PR and OMPR from P,Q,S and XN
+
+     P <--> The cumulation from 0 to S of the binomial distribution.
+            (Probablility of S or fewer successes in XN trials each
+            with probability of success PR.)
+            Input range: [0,1].
+
+     Q <--> 1-P.
+            Input range: [0, 1].
+            P + Q = 1.0.
+
+     S <--> The number of successes observed.
+            Input range: [0, XN]
+            Search range: [0, XN]
+
+     XN  <--> The number of binomial trials.
+              Input range: (0, +infinity).
+              Search range: [1E-100, 1E100]
+
+     PR  <--> The probability of success in each binomial trial.
+              Input range: [0,1].
+              Search range: [0,1]
+
+     OMPR  <--> 1-PR
+              Input range: [0,1].
+              Search range: [0,1]
+              PR + OMPR = 1.0
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+                4 if PR + OMPR .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula  26.5.24    of   Abramowitz  and    Stegun,  Handbook   of
+     Mathematical   Functions (1966) is   used  to reduce the  binomial
+     distribution  to  the  cumulative incomplete    beta distribution.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+
+**********************************************************************/
+{
+#define atol 1.0e-50
+#define tol 1.0e-8
+#define zero 1.0e-100
+#define inf 1.0e100
+#define one 1.0e0
+static int K1 = 1;
+static double K2 = 0.0e0;
+static double K3 = 0.5e0;
+static double K4 = 5.0e0;
+static double K11 = 1.0e0;
+static double fx,xhi,xlo,cum,ccum,pq,prompr;
+static unsigned long qhi,qleft,qporq;
+static double T5,T6,T7,T8,T9,T10,T12,T13;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 && *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q < 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q < 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 3) goto S130;
+/*
+     XN
+*/
+    if(!(*xn <= 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S130:
+S120:
+    if(*which == 2) goto S170;
+/*
+     S
+*/
+    if(!(*s < 0.0e0 || *which != 3 && *s > *xn)) goto S160;
+    if(!(*s < 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    goto S150;
+S140:
+    *bound = *xn;
+S150:
+    *status = -4;
+    return;
+S170:
+S160:
+    if(*which == 4) goto S210;
+/*
+     PR
+*/
+    if(!(*pr < 0.0e0 || *pr > 1.0e0)) goto S200;
+    if(!(*pr < 0.0e0)) goto S180;
+    *bound = 0.0e0;
+    goto S190;
+S180:
+    *bound = 1.0e0;
+S190:
+    *status = -6;
+    return;
+S210:
+S200:
+    if(*which == 4) goto S250;
+/*
+     OMPR
+*/
+    if(!(*ompr < 0.0e0 || *ompr > 1.0e0)) goto S240;
+    if(!(*ompr < 0.0e0)) goto S220;
+    *bound = 0.0e0;
+    goto S230;
+S220:
+    *bound = 1.0e0;
+S230:
+    *status = -7;
+    return;
+S250:
+S240:
+    if(*which == 1) goto S290;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S280;
+    if(!(pq < 0.0e0)) goto S260;
+    *bound = 0.0e0;
+    goto S270;
+S260:
+    *bound = 1.0e0;
+S270:
+    *status = 3;
+    return;
+S290:
+S280:
+    if(*which == 4) goto S330;
+/*
+     PR + OMPR
+*/
+    prompr = *pr+*ompr;
+    if(!(fabs(prompr-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S320;
+    if(!(prompr < 0.0e0)) goto S300;
+    *bound = 0.0e0;
+    goto S310;
+S300:
+    *bound = 1.0e0;
+S310:
+    *status = 4;
+    return;
+S330:
+S320:
+    if(!(*which == 1)) qporq = *p <= *q;
+/*
+     Select the minimum of P or Q
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P
+*/
+        cumbin(s,xn,pr,ompr,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating S
+*/
+        *s = 5.0e0;
+        T5 = atol;
+        T6 = tol;
+        dstinv(&K2,xn,&K3,&K3,&K4,&T5,&T6);
+        *status = 0;
+        dinvr(status,s,&fx,&qleft,&qhi);
+S340:
+        if(!(*status == 1)) goto S370;
+        cumbin(s,xn,pr,ompr,&cum,&ccum);
+        if(!qporq) goto S350;
+        fx = cum-*p;
+        goto S360;
+S350:
+        fx = ccum-*q;
+S360:
+        dinvr(status,s,&fx,&qleft,&qhi);
+        goto S340;
+S370:
+        if(!(*status == -1)) goto S400;
+        if(!qleft) goto S380;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S390;
+S380:
+        *status = 2;
+        *bound = *xn;
+S400:
+S390:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating XN
+*/
+        *xn = 5.0e0;
+        T7 = zero;
+        T8 = inf;
+        T9 = atol;
+        T10 = tol;
+        dstinv(&T7,&T8,&K3,&K3,&K4,&T9,&T10);
+        *status = 0;
+        dinvr(status,xn,&fx,&qleft,&qhi);
+S410:
+        if(!(*status == 1)) goto S440;
+        cumbin(s,xn,pr,ompr,&cum,&ccum);
+        if(!qporq) goto S420;
+        fx = cum-*p;
+        goto S430;
+S420:
+        fx = ccum-*q;
+S430:
+        dinvr(status,xn,&fx,&qleft,&qhi);
+        goto S410;
+S440:
+        if(!(*status == -1)) goto S470;
+        if(!qleft) goto S450;
+        *status = 1;
+        *bound = zero;
+        goto S460;
+S450:
+        *status = 2;
+        *bound = inf;
+S470:
+S460:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Calculating PR and OMPR
+*/
+        T12 = atol;
+        T13 = tol;
+        dstzr(&K2,&K11,&T12,&T13);
+        if(!qporq) goto S500;
+        *status = 0;
+        dzror(status,pr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *ompr = one-*pr;
+S480:
+        if(!(*status == 1)) goto S490;
+        cumbin(s,xn,pr,ompr,&cum,&ccum);
+        fx = cum-*p;
+        dzror(status,pr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *ompr = one-*pr;
+        goto S480;
+S490:
+        goto S530;
+S500:
+        *status = 0;
+        dzror(status,ompr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *pr = one-*ompr;
+S510:
+        if(!(*status == 1)) goto S520;
+        cumbin(s,xn,pr,ompr,&cum,&ccum);
+        fx = ccum-*q;
+        dzror(status,ompr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *pr = one-*ompr;
+        goto S510;
+S530:
+S520:
+        if(!(*status == -1)) goto S560;
+        if(!qleft) goto S540;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S550;
+S540:
+        *status = 2;
+        *bound = 1.0e0;
+S550:
+        ;
+    }
+S560:
+    return;
+#undef atol
+#undef tol
+#undef zero
+#undef inf
+#undef one
+}
+void cdfchi(int *which,double *p,double *q,double *x,double *df,
+	    int *status,double *bound)
+/**********************************************************************
+
+      void cdfchi(int *which,double *p,double *q,double *x,double *df,
+            int *status,double *bound)
+
+               Cumulative Distribution Function
+               CHI-Square distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the chi-square
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next three argument
+               values is to be calculated from the others.
+               Legal range: 1..3
+               iwhich = 1 : Calculate P and Q from X and DF
+               iwhich = 2 : Calculate X from P,Q and DF
+               iwhich = 3 : Calculate DF from P,Q and X
+
+     P <--> The integral from 0 to X of the chi-square
+            distribution.
+            Input range: [0, 1].
+
+     Q <--> 1-P.
+            Input range: (0, 1].
+            P + Q = 1.0.
+
+     X <--> Upper limit of integration of the non-central
+            chi-square distribution.
+            Input range: [0, +infinity).
+            Search range: [0,1E100]
+
+     DF <--> Degrees of freedom of the
+             chi-square distribution.
+             Input range: (0, +infinity).
+             Search range: [ 1E-100, 1E100]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+               10 indicates error returned from cumgam.  See
+                  references in cdfgam
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula    26.4.19   of Abramowitz  and     Stegun, Handbook  of
+     Mathematical Functions   (1966) is used   to reduce the chisqure
+     distribution to the incomplete distribution.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define inf 1.0e100
+static int K1 = 1;
+static double K2 = 0.0e0;
+static double K4 = 0.5e0;
+static double K5 = 5.0e0;
+static double fx,cum,ccum,pq,porq;
+static unsigned long qhi,qleft,qporq;
+static double T3,T6,T7,T8,T9,T10,T11;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 3)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 3.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 2) goto S130;
+/*
+     X
+*/
+    if(!(*x < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S130:
+S120:
+    if(*which == 3) goto S150;
+/*
+     DF
+*/
+    if(!(*df <= 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S150:
+S140:
+    if(*which == 1) goto S190;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S180;
+    if(!(pq < 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    goto S170;
+S160:
+    *bound = 1.0e0;
+S170:
+    *status = 3;
+    return;
+S190:
+S180:
+    if(*which == 1) goto S220;
+/*
+     Select the minimum of P or Q
+*/
+    qporq = *p <= *q;
+    if(!qporq) goto S200;
+    porq = *p;
+    goto S210;
+S200:
+    porq = *q;
+S220:
+S210:
+/*
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P and Q
+*/
+        *status = 0;
+        cumchi(x,df,p,q);
+        if(porq > 1.5e0) {
+            *status = 10;
+            return;
+        }
+    }
+    else if(2 == *which) {
+/*
+     Calculating X
+*/
+        *x = 5.0e0;
+        T3 = inf;
+        T6 = atol;
+        T7 = tol;
+        dstinv(&K2,&T3,&K4,&K4,&K5,&T6,&T7);
+        *status = 0;
+        dinvr(status,x,&fx,&qleft,&qhi);
+S230:
+        if(!(*status == 1)) goto S270;
+        cumchi(x,df,&cum,&ccum);
+        if(!qporq) goto S240;
+        fx = cum-*p;
+        goto S250;
+S240:
+        fx = ccum-*q;
+S250:
+        if(!(fx+porq > 1.5e0)) goto S260;
+        *status = 10;
+        return;
+S260:
+        dinvr(status,x,&fx,&qleft,&qhi);
+        goto S230;
+S270:
+        if(!(*status == -1)) goto S300;
+        if(!qleft) goto S280;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S290;
+S280:
+        *status = 2;
+        *bound = inf;
+S300:
+S290:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating DF
+*/
+        *df = 5.0e0;
+        T8 = zero;
+        T9 = inf;
+        T10 = atol;
+        T11 = tol;
+        dstinv(&T8,&T9,&K4,&K4,&K5,&T10,&T11);
+        *status = 0;
+        dinvr(status,df,&fx,&qleft,&qhi);
+S310:
+        if(!(*status == 1)) goto S350;
+        cumchi(x,df,&cum,&ccum);
+        if(!qporq) goto S320;
+        fx = cum-*p;
+        goto S330;
+S320:
+        fx = ccum-*q;
+S330:
+        if(!(fx+porq > 1.5e0)) goto S340;
+        *status = 10;
+        return;
+S340:
+        dinvr(status,df,&fx,&qleft,&qhi);
+        goto S310;
+S350:
+        if(!(*status == -1)) goto S380;
+        if(!qleft) goto S360;
+        *status = 1;
+        *bound = zero;
+        goto S370;
+S360:
+        *status = 2;
+        *bound = inf;
+S370:
+        ;
+    }
+S380:
+    return;
+#undef tol
+#undef atol
+#undef zero
+#undef inf
+}
+void cdfchn(int *which,double *p,double *q,double *x,double *df,
+	    double *pnonc,int *status,double *bound)
+/**********************************************************************
+
+      void cdfchn(int *which,double *p,double *q,double *x,double *df,
+            double *pnonc,int *status,double *bound)
+
+               Cumulative Distribution Function
+               Non-central Chi-Square
+
+
+                              Function
+
+
+     Calculates any one parameter of the non-central chi-square
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next three argument
+               values is to be calculated from the others.
+               Input range: 1..4
+               iwhich = 1 : Calculate P and Q from X and DF
+               iwhich = 2 : Calculate X from P,DF and PNONC
+               iwhich = 3 : Calculate DF from P,X and PNONC
+               iwhich = 3 : Calculate PNONC from P,X and DF
+
+     P <--> The integral from 0 to X of the non-central chi-square
+            distribution.
+            Input range: [0, 1-1E-16).
+
+     Q <--> 1-P.
+            Q is not used by this subroutine and is only included
+            for similarity with other cdf* routines.
+
+     X <--> Upper limit of integration of the non-central
+            chi-square distribution.
+            Input range: [0, +infinity).
+            Search range: [0,1E100]
+
+     DF <--> Degrees of freedom of the non-central
+             chi-square distribution.
+             Input range: (0, +infinity).
+             Search range: [ 1E-100, 1E100]
+
+     PNONC <--> Non-centrality parameter of the non-central
+                chi-square distribution.
+                Input range: [0, +infinity).
+                Search range: [0,1E4]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula  26.4.25   of   Abramowitz   and   Stegun,  Handbook  of
+     Mathematical  Functions (1966) is used to compute the cumulative
+     distribution function.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+
+                            WARNING
+
+     The computation time  required for this  routine is proportional
+     to the noncentrality  parameter  (PNONC).  Very large  values of
+     this parameter can consume immense  computer resources.  This is
+     why the search range is bounded by 10,000.
+
+**********************************************************************/
+{
+#define tent4 1.0e4
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define one ( 1.0e0 - 1.0e-16 )
+#define inf 1.0e100
+static double K1 = 0.0e0;
+static double K3 = 0.5e0;
+static double K4 = 5.0e0;
+static double fx,cum,ccum;
+static unsigned long qhi,qleft;
+static double T2,T5,T6,T7,T8,T9,T10,T11,T12,T13;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > one)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = one;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 2) goto S90;
+/*
+     X
+*/
+    if(!(*x < 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S90:
+S80:
+    if(*which == 3) goto S110;
+/*
+     DF
+*/
+    if(!(*df <= 0.0e0)) goto S100;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S110:
+S100:
+    if(*which == 4) goto S130;
+/*
+     PNONC
+*/
+    if(!(*pnonc < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -6;
+    return;
+S130:
+S120:
+/*
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P and Q
+*/
+        cumchn(x,df,pnonc,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating X
+*/
+        *x = 5.0e0;
+        T2 = inf;
+        T5 = atol;
+        T6 = tol;
+        dstinv(&K1,&T2,&K3,&K3,&K4,&T5,&T6);
+        *status = 0;
+        dinvr(status,x,&fx,&qleft,&qhi);
+S140:
+        if(!(*status == 1)) goto S150;
+        cumchn(x,df,pnonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,x,&fx,&qleft,&qhi);
+        goto S140;
+S150:
+        if(!(*status == -1)) goto S180;
+        if(!qleft) goto S160;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S170;
+S160:
+        *status = 2;
+        *bound = inf;
+S180:
+S170:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating DF
+*/
+        *df = 5.0e0;
+        T7 = zero;
+        T8 = inf;
+        T9 = atol;
+        T10 = tol;
+        dstinv(&T7,&T8,&K3,&K3,&K4,&T9,&T10);
+        *status = 0;
+        dinvr(status,df,&fx,&qleft,&qhi);
+S190:
+        if(!(*status == 1)) goto S200;
+        cumchn(x,df,pnonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,df,&fx,&qleft,&qhi);
+        goto S190;
+S200:
+        if(!(*status == -1)) goto S230;
+        if(!qleft) goto S210;
+        *status = 1;
+        *bound = zero;
+        goto S220;
+S210:
+        *status = 2;
+        *bound = inf;
+S230:
+S220:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Calculating PNONC
+*/
+        *pnonc = 5.0e0;
+        T11 = tent4;
+        T12 = atol;
+        T13 = tol;
+        dstinv(&K1,&T11,&K3,&K3,&K4,&T12,&T13);
+        *status = 0;
+        dinvr(status,pnonc,&fx,&qleft,&qhi);
+S240:
+        if(!(*status == 1)) goto S250;
+        cumchn(x,df,pnonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,pnonc,&fx,&qleft,&qhi);
+        goto S240;
+S250:
+        if(!(*status == -1)) goto S280;
+        if(!qleft) goto S260;
+        *status = 1;
+        *bound = zero;
+        goto S270;
+S260:
+        *status = 2;
+        *bound = tent4;
+S270:
+        ;
+    }
+S280:
+    return;
+#undef tent4
+#undef tol
+#undef atol
+#undef zero
+#undef one
+#undef inf
+}
+void cdff(int *which,double *p,double *q,double *f,double *dfn,
+	  double *dfd,int *status,double *bound)
+/**********************************************************************
+
+      void cdff(int *which,double *p,double *q,double *f,double *dfn,
+          double *dfd,int *status,double *bound)
+
+               Cumulative Distribution Function
+               F distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the F distribution
+     given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next four argument
+               values is to be calculated from the others.
+               Legal range: 1..4
+               iwhich = 1 : Calculate P and Q from F,DFN and DFD
+               iwhich = 2 : Calculate F from P,Q,DFN and DFD
+               iwhich = 3 : Calculate DFN from P,Q,F and DFD
+               iwhich = 4 : Calculate DFD from P,Q,F and DFN
+
+       P <--> The integral from 0 to F of the f-density.
+              Input range: [0,1].
+
+       Q <--> 1-P.
+              Input range: (0, 1].
+              P + Q = 1.0.
+
+       F <--> Upper limit of integration of the f-density.
+              Input range: [0, +infinity).
+              Search range: [0,1E100]
+
+     DFN < --> Degrees of freedom of the numerator sum of squares.
+               Input range: (0, +infinity).
+               Search range: [ 1E-100, 1E100]
+
+     DFD < --> Degrees of freedom of the denominator sum of squares.
+               Input range: (0, +infinity).
+               Search range: [ 1E-100, 1E100]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula   26.6.2   of   Abramowitz   and   Stegun,  Handbook  of
+     Mathematical  Functions (1966) is used to reduce the computation
+     of the  cumulative  distribution function for the  F  variate to
+     that of an incomplete beta.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+                              WARNING
+
+     The value of the  cumulative  F distribution is  not necessarily
+     monotone in  either degrees of freedom.  There  thus may  be two
+     values  that  provide a given CDF  value.   This routine assumes
+     monotonicity and will find an arbitrary one of the two values.
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define inf 1.0e100
+static int K1 = 1;
+static double K2 = 0.0e0;
+static double K4 = 0.5e0;
+static double K5 = 5.0e0;
+static double pq,fx,cum,ccum;
+static unsigned long qhi,qleft,qporq;
+static double T3,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 2) goto S130;
+/*
+     F
+*/
+    if(!(*f < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S130:
+S120:
+    if(*which == 3) goto S150;
+/*
+     DFN
+*/
+    if(!(*dfn <= 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S150:
+S140:
+    if(*which == 4) goto S170;
+/*
+     DFD
+*/
+    if(!(*dfd <= 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    *status = -6;
+    return;
+S170:
+S160:
+    if(*which == 1) goto S210;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S200;
+    if(!(pq < 0.0e0)) goto S180;
+    *bound = 0.0e0;
+    goto S190;
+S180:
+    *bound = 1.0e0;
+S190:
+    *status = 3;
+    return;
+S210:
+S200:
+    if(!(*which == 1)) qporq = *p <= *q;
+/*
+     Select the minimum of P or Q
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P
+*/
+        cumf(f,dfn,dfd,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating F
+*/
+        *f = 5.0e0;
+        T3 = inf;
+        T6 = atol;
+        T7 = tol;
+        dstinv(&K2,&T3,&K4,&K4,&K5,&T6,&T7);
+        *status = 0;
+        dinvr(status,f,&fx,&qleft,&qhi);
+S220:
+        if(!(*status == 1)) goto S250;
+        cumf(f,dfn,dfd,&cum,&ccum);
+        if(!qporq) goto S230;
+        fx = cum-*p;
+        goto S240;
+S230:
+        fx = ccum-*q;
+S240:
+        dinvr(status,f,&fx,&qleft,&qhi);
+        goto S220;
+S250:
+        if(!(*status == -1)) goto S280;
+        if(!qleft) goto S260;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S270;
+S260:
+        *status = 2;
+        *bound = inf;
+S280:
+S270:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating DFN
+*/
+        *dfn = 5.0e0;
+        T8 = zero;
+        T9 = inf;
+        T10 = atol;
+        T11 = tol;
+        dstinv(&T8,&T9,&K4,&K4,&K5,&T10,&T11);
+        *status = 0;
+        dinvr(status,dfn,&fx,&qleft,&qhi);
+S290:
+        if(!(*status == 1)) goto S320;
+        cumf(f,dfn,dfd,&cum,&ccum);
+        if(!qporq) goto S300;
+        fx = cum-*p;
+        goto S310;
+S300:
+        fx = ccum-*q;
+S310:
+        dinvr(status,dfn,&fx,&qleft,&qhi);
+        goto S290;
+S320:
+        if(!(*status == -1)) goto S350;
+        if(!qleft) goto S330;
+        *status = 1;
+        *bound = zero;
+        goto S340;
+S330:
+        *status = 2;
+        *bound = inf;
+S350:
+S340:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Calculating DFD
+*/
+        *dfd = 5.0e0;
+        T12 = zero;
+        T13 = inf;
+        T14 = atol;
+        T15 = tol;
+        dstinv(&T12,&T13,&K4,&K4,&K5,&T14,&T15);
+        *status = 0;
+        dinvr(status,dfd,&fx,&qleft,&qhi);
+S360:
+        if(!(*status == 1)) goto S390;
+        cumf(f,dfn,dfd,&cum,&ccum);
+        if(!qporq) goto S370;
+        fx = cum-*p;
+        goto S380;
+S370:
+        fx = ccum-*q;
+S380:
+        dinvr(status,dfd,&fx,&qleft,&qhi);
+        goto S360;
+S390:
+        if(!(*status == -1)) goto S420;
+        if(!qleft) goto S400;
+        *status = 1;
+        *bound = zero;
+        goto S410;
+S400:
+        *status = 2;
+        *bound = inf;
+S410:
+        ;
+    }
+S420:
+    return;
+#undef tol
+#undef atol
+#undef zero
+#undef inf
+}
+void cdffnc(int *which,double *p,double *q,double *f,double *dfn,
+	    double *dfd,double *phonc,int *status,double *bound)
+/**********************************************************************
+
+      void cdffnc(int *which,double *p,double *q,double *f,double *dfn,
+            double *dfd,double *phonc,int *status,double *bound)
+
+               Cumulative Distribution Function
+               Non-central F distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the Non-central F
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next five argument
+               values is to be calculated from the others.
+               Legal range: 1..5
+               iwhich = 1 : Calculate P and Q from F,DFN,DFD and PNONC
+               iwhich = 2 : Calculate F from P,Q,DFN,DFD and PNONC
+               iwhich = 3 : Calculate DFN from P,Q,F,DFD and PNONC
+               iwhich = 4 : Calculate DFD from P,Q,F,DFN and PNONC
+               iwhich = 5 : Calculate PNONC from P,Q,F,DFN and DFD
+
+       P <--> The integral from 0 to F of the non-central f-density.
+              Input range: [0,1-1E-16).
+
+       Q <--> 1-P.
+              Q is not used by this subroutine and is only included
+              for similarity with other cdf* routines.
+
+       F <--> Upper limit of integration of the non-central f-density.
+              Input range: [0, +infinity).
+              Search range: [0,1E100]
+
+     DFN < --> Degrees of freedom of the numerator sum of squares.
+               Input range: (0, +infinity).
+               Search range: [ 1E-100, 1E100]
+
+     DFD < --> Degrees of freedom of the denominator sum of squares.
+               Must be in range: (0, +infinity).
+               Input range: (0, +infinity).
+               Search range: [ 1E-100, 1E100]
+
+     PNONC <-> The non-centrality parameter
+               Input range: [0,infinity)
+               Search range: [0,1E4]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula  26.6.20   of   Abramowitz   and   Stegun,  Handbook  of
+     Mathematical  Functions (1966) is used to compute the cumulative
+     distribution function.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+                            WARNING
+
+     The computation time  required for this  routine is proportional
+     to the noncentrality  parameter  (PNONC).  Very large  values of
+     this parameter can consume immense  computer resources.  This is
+     why the search range is bounded by 10,000.
+
+                              WARNING
+
+     The  value  of the  cumulative  noncentral F distribution is not
+     necessarily monotone in either degrees  of freedom.  There  thus
+     may be two values that provide a given  CDF value.  This routine
+     assumes monotonicity  and will find  an arbitrary one of the two
+     values.
+
+**********************************************************************/
+{
+#define tent4 1.0e4
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define one ( 1.0e0 - 1.0e-16 )
+#define inf 1.0e100
+static double K1 = 0.0e0;
+static double K3 = 0.5e0;
+static double K4 = 5.0e0;
+static double fx,cum,ccum;
+static unsigned long qhi,qleft;
+static double T2,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 5)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 5.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > one)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = one;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 2) goto S90;
+/*
+     F
+*/
+    if(!(*f < 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S90:
+S80:
+    if(*which == 3) goto S110;
+/*
+     DFN
+*/
+    if(!(*dfn <= 0.0e0)) goto S100;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S110:
+S100:
+    if(*which == 4) goto S130;
+/*
+     DFD
+*/
+    if(!(*dfd <= 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -6;
+    return;
+S130:
+S120:
+    if(*which == 5) goto S150;
+/*
+     PHONC
+*/
+    if(!(*phonc < 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    *status = -7;
+    return;
+S150:
+S140:
+/*
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P
+*/
+        cumfnc(f,dfn,dfd,phonc,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating F
+*/
+        *f = 5.0e0;
+        T2 = inf;
+        T5 = atol;
+        T6 = tol;
+        dstinv(&K1,&T2,&K3,&K3,&K4,&T5,&T6);
+        *status = 0;
+        dinvr(status,f,&fx,&qleft,&qhi);
+S160:
+        if(!(*status == 1)) goto S170;
+        cumfnc(f,dfn,dfd,phonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,f,&fx,&qleft,&qhi);
+        goto S160;
+S170:
+        if(!(*status == -1)) goto S200;
+        if(!qleft) goto S180;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S190;
+S180:
+        *status = 2;
+        *bound = inf;
+S200:
+S190:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating DFN
+*/
+        *dfn = 5.0e0;
+        T7 = zero;
+        T8 = inf;
+        T9 = atol;
+        T10 = tol;
+        dstinv(&T7,&T8,&K3,&K3,&K4,&T9,&T10);
+        *status = 0;
+        dinvr(status,dfn,&fx,&qleft,&qhi);
+S210:
+        if(!(*status == 1)) goto S220;
+        cumfnc(f,dfn,dfd,phonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,dfn,&fx,&qleft,&qhi);
+        goto S210;
+S220:
+        if(!(*status == -1)) goto S250;
+        if(!qleft) goto S230;
+        *status = 1;
+        *bound = zero;
+        goto S240;
+S230:
+        *status = 2;
+        *bound = inf;
+S250:
+S240:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Calculating DFD
+*/
+        *dfd = 5.0e0;
+        T11 = zero;
+        T12 = inf;
+        T13 = atol;
+        T14 = tol;
+        dstinv(&T11,&T12,&K3,&K3,&K4,&T13,&T14);
+        *status = 0;
+        dinvr(status,dfd,&fx,&qleft,&qhi);
+S260:
+        if(!(*status == 1)) goto S270;
+        cumfnc(f,dfn,dfd,phonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,dfd,&fx,&qleft,&qhi);
+        goto S260;
+S270:
+        if(!(*status == -1)) goto S300;
+        if(!qleft) goto S280;
+        *status = 1;
+        *bound = zero;
+        goto S290;
+S280:
+        *status = 2;
+        *bound = inf;
+S300:
+S290:
+        ;
+    }
+    else if(5 == *which) {
+/*
+     Calculating PHONC
+*/
+        *phonc = 5.0e0;
+        T15 = tent4;
+        T16 = atol;
+        T17 = tol;
+        dstinv(&K1,&T15,&K3,&K3,&K4,&T16,&T17);
+        *status = 0;
+        dinvr(status,phonc,&fx,&qleft,&qhi);
+S310:
+        if(!(*status == 1)) goto S320;
+        cumfnc(f,dfn,dfd,phonc,&cum,&ccum);
+        fx = cum-*p;
+        dinvr(status,phonc,&fx,&qleft,&qhi);
+        goto S310;
+S320:
+        if(!(*status == -1)) goto S350;
+        if(!qleft) goto S330;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S340;
+S330:
+        *status = 2;
+        *bound = tent4;
+S340:
+        ;
+    }
+S350:
+    return;
+#undef tent4
+#undef tol
+#undef atol
+#undef zero
+#undef one
+#undef inf
+}
+void cdfgam(int *which,double *p,double *q,double *x,double *shape,
+	    double *scale,int *status,double *bound)
+/**********************************************************************
+
+      void cdfgam(int *which,double *p,double *q,double *x,double *shape,
+            double *scale,int *status,double *bound)
+
+               Cumulative Distribution Function
+                         GAMma Distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the gamma
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next four argument
+               values is to be calculated from the others.
+               Legal range: 1..4
+               iwhich = 1 : Calculate P and Q from X,SHAPE and SCALE
+               iwhich = 2 : Calculate X from P,Q,SHAPE and SCALE
+               iwhich = 3 : Calculate SHAPE from P,Q,X and SCALE
+               iwhich = 4 : Calculate SCALE from P,Q,X and SHAPE
+
+     P <--> The integral from 0 to X of the gamma density.
+            Input range: [0,1].
+
+     Q <--> 1-P.
+            Input range: (0, 1].
+            P + Q = 1.0.
+
+     X <--> The upper limit of integration of the gamma density.
+            Input range: [0, +infinity).
+            Search range: [0,1E100]
+
+     SHAPE <--> The shape parameter of the gamma density.
+                Input range: (0, +infinity).
+                Search range: [1E-100,1E100]
+
+     SCALE <--> The scale parameter of the gamma density.
+                Input range: (0, +infinity).
+                Search range: (1E-100,1E100]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+                10 if the gamma or inverse gamma routine cannot
+                   compute the answer.  Usually happens only for
+                   X and SHAPE very large (gt 1E10 or more)
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Cumulative distribution function (P) is calculated directly by
+     the code associated with:
+
+     DiDinato, A. R. and Morris, A. H. Computation of the  incomplete
+     gamma function  ratios  and their  inverse.   ACM  Trans.  Math.
+     Softw. 12 (1986), 377-393.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+
+                              Note
+
+
+
+     The gamma density is proportional to
+       T**(SHAPE - 1) * EXP(- SCALE * T)
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define inf 1.0e100
+static int K1 = 1;
+static double K5 = 0.5e0;
+static double K6 = 5.0e0;
+static double xx,fx,xscale,cum,ccum,pq,porq;
+static int ierr;
+static unsigned long qhi,qleft,qporq;
+static double T2,T3,T4,T7,T8,T9;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 2) goto S130;
+/*
+     X
+*/
+    if(!(*x < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S130:
+S120:
+    if(*which == 3) goto S150;
+/*
+     SHAPE
+*/
+    if(!(*shape <= 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S150:
+S140:
+    if(*which == 4) goto S170;
+/*
+     SCALE
+*/
+    if(!(*scale <= 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    *status = -6;
+    return;
+S170:
+S160:
+    if(*which == 1) goto S210;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S200;
+    if(!(pq < 0.0e0)) goto S180;
+    *bound = 0.0e0;
+    goto S190;
+S180:
+    *bound = 1.0e0;
+S190:
+    *status = 3;
+    return;
+S210:
+S200:
+    if(*which == 1) goto S240;
+/*
+     Select the minimum of P or Q
+*/
+    qporq = *p <= *q;
+    if(!qporq) goto S220;
+    porq = *p;
+    goto S230;
+S220:
+    porq = *q;
+S240:
+S230:
+/*
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P
+*/
+        *status = 0;
+        xscale = *x**scale;
+        cumgam(&xscale,shape,p,q);
+        if(porq > 1.5e0) *status = 10;
+    }
+    else if(2 == *which) {
+/*
+     Computing X
+*/
+        T2 = -1.0e0;
+        gaminv(shape,&xx,&T2,p,q,&ierr);
+        if(ierr < 0.0e0) {
+            *status = 10;
+            return;
+        }
+        else  {
+            *x = xx/ *scale;
+            *status = 0;
+        }
+    }
+    else if(3 == *which) {
+/*
+     Computing SHAPE
+*/
+        *shape = 5.0e0;
+        xscale = *x**scale;
+        T3 = zero;
+        T4 = inf;
+        T7 = atol;
+        T8 = tol;
+        dstinv(&T3,&T4,&K5,&K5,&K6,&T7,&T8);
+        *status = 0;
+        dinvr(status,shape,&fx,&qleft,&qhi);
+S250:
+        if(!(*status == 1)) goto S290;
+        cumgam(&xscale,shape,&cum,&ccum);
+        if(!qporq) goto S260;
+        fx = cum-*p;
+        goto S270;
+S260:
+        fx = ccum-*q;
+S270:
+        if(!(qporq && cum > 1.5e0 || !qporq && ccum > 1.5e0)) goto S280;
+        *status = 10;
+        return;
+S280:
+        dinvr(status,shape,&fx,&qleft,&qhi);
+        goto S250;
+S290:
+        if(!(*status == -1)) goto S320;
+        if(!qleft) goto S300;
+        *status = 1;
+        *bound = zero;
+        goto S310;
+S300:
+        *status = 2;
+        *bound = inf;
+S320:
+S310:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Computing SCALE
+*/
+        T9 = -1.0e0;
+        gaminv(shape,&xx,&T9,p,q,&ierr);
+        if(ierr < 0.0e0) {
+            *status = 10;
+            return;
+        }
+        else  {
+            *scale = xx/ *x;
+            *status = 0;
+        }
+    }
+    return;
+#undef tol
+#undef atol
+#undef zero
+#undef inf
+}
+void cdfnbn(int *which,double *p,double *q,double *s,double *xn,
+	    double *pr,double *ompr,int *status,double *bound)
+/**********************************************************************
+
+      void cdfnbn(int *which,double *p,double *q,double *s,double *xn,
+            double *pr,double *ompr,int *status,double *bound)
+
+               Cumulative Distribution Function
+               Negative BiNomial distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the negative binomial
+     distribution given values for the others.
+
+     The  cumulative  negative   binomial  distribution  returns  the
+     probability that there  will be  F or fewer failures before  the
+     XNth success in binomial trials each of which has probability of
+     success PR.
+
+     The individual term of the negative binomial is the probability of
+     S failures before XN successes and is
+          Choose( S, XN+S-1 ) * PR^(XN) * (1-PR)^S
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which of the next four argument
+               values is to be calculated from the others.
+               Legal range: 1..4
+               iwhich = 1 : Calculate P and Q from S,XN,PR and OMPR
+               iwhich = 2 : Calculate S from P,Q,XN,PR and OMPR
+               iwhich = 3 : Calculate XN from P,Q,S,PR and OMPR
+               iwhich = 4 : Calculate PR and OMPR from P,Q,S and XN
+
+     P <--> The cumulation from 0 to S of the  negative
+            binomial distribution.
+            Input range: [0,1].
+
+     Q <--> 1-P.
+            Input range: (0, 1].
+            P + Q = 1.0.
+
+     S <--> The upper limit of cumulation of the binomial distribution.
+            There are F or fewer failures before the XNth success.
+            Input range: [0, +infinity).
+            Search range: [0, 1E100]
+
+     XN  <--> The number of successes.
+              Input range: [0, +infinity).
+              Search range: [0, 1E100]
+
+     PR  <--> The probability of success in each binomial trial.
+              Input range: [0,1].
+              Search range: [0,1].
+
+     OMPR  <--> 1-PR
+              Input range: [0,1].
+              Search range: [0,1]
+              PR + OMPR = 1.0
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+                4 if PR + OMPR .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula   26.5.26   of   Abramowitz  and  Stegun,  Handbook   of
+     Mathematical Functions (1966) is used  to  reduce calculation of
+     the cumulative distribution  function to that of  an  incomplete
+     beta.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define inf 1.0e100
+#define one 1.0e0
+static int K1 = 1;
+static double K2 = 0.0e0;
+static double K4 = 0.5e0;
+static double K5 = 5.0e0;
+static double K11 = 1.0e0;
+static double fx,xhi,xlo,pq,prompr,cum,ccum;
+static unsigned long qhi,qleft,qporq;
+static double T3,T6,T7,T8,T9,T10,T12,T13;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 2) goto S130;
+/*
+     S
+*/
+    if(!(*s < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S130:
+S120:
+    if(*which == 3) goto S150;
+/*
+     XN
+*/
+    if(!(*xn < 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S150:
+S140:
+    if(*which == 4) goto S190;
+/*
+     PR
+*/
+    if(!(*pr < 0.0e0 || *pr > 1.0e0)) goto S180;
+    if(!(*pr < 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    goto S170;
+S160:
+    *bound = 1.0e0;
+S170:
+    *status = -6;
+    return;
+S190:
+S180:
+    if(*which == 4) goto S230;
+/*
+     OMPR
+*/
+    if(!(*ompr < 0.0e0 || *ompr > 1.0e0)) goto S220;
+    if(!(*ompr < 0.0e0)) goto S200;
+    *bound = 0.0e0;
+    goto S210;
+S200:
+    *bound = 1.0e0;
+S210:
+    *status = -7;
+    return;
+S230:
+S220:
+    if(*which == 1) goto S270;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S260;
+    if(!(pq < 0.0e0)) goto S240;
+    *bound = 0.0e0;
+    goto S250;
+S240:
+    *bound = 1.0e0;
+S250:
+    *status = 3;
+    return;
+S270:
+S260:
+    if(*which == 4) goto S310;
+/*
+     PR + OMPR
+*/
+    prompr = *pr+*ompr;
+    if(!(fabs(prompr-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S300;
+    if(!(prompr < 0.0e0)) goto S280;
+    *bound = 0.0e0;
+    goto S290;
+S280:
+    *bound = 1.0e0;
+S290:
+    *status = 4;
+    return;
+S310:
+S300:
+    if(!(*which == 1)) qporq = *p <= *q;
+/*
+     Select the minimum of P or Q
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P
+*/
+        cumnbn(s,xn,pr,ompr,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating S
+*/
+        *s = 5.0e0;
+        T3 = inf;
+        T6 = atol;
+        T7 = tol;
+        dstinv(&K2,&T3,&K4,&K4,&K5,&T6,&T7);
+        *status = 0;
+        dinvr(status,s,&fx,&qleft,&qhi);
+S320:
+        if(!(*status == 1)) goto S350;
+        cumnbn(s,xn,pr,ompr,&cum,&ccum);
+        if(!qporq) goto S330;
+        fx = cum-*p;
+        goto S340;
+S330:
+        fx = ccum-*q;
+S340:
+        dinvr(status,s,&fx,&qleft,&qhi);
+        goto S320;
+S350:
+        if(!(*status == -1)) goto S380;
+        if(!qleft) goto S360;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S370;
+S360:
+        *status = 2;
+        *bound = inf;
+S380:
+S370:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating XN
+*/
+        *xn = 5.0e0;
+        T8 = inf;
+        T9 = atol;
+        T10 = tol;
+        dstinv(&K2,&T8,&K4,&K4,&K5,&T9,&T10);
+        *status = 0;
+        dinvr(status,xn,&fx,&qleft,&qhi);
+S390:
+        if(!(*status == 1)) goto S420;
+        cumnbn(s,xn,pr,ompr,&cum,&ccum);
+        if(!qporq) goto S400;
+        fx = cum-*p;
+        goto S410;
+S400:
+        fx = ccum-*q;
+S410:
+        dinvr(status,xn,&fx,&qleft,&qhi);
+        goto S390;
+S420:
+        if(!(*status == -1)) goto S450;
+        if(!qleft) goto S430;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S440;
+S430:
+        *status = 2;
+        *bound = inf;
+S450:
+S440:
+        ;
+    }
+    else if(4 == *which) {
+/*
+     Calculating PR and OMPR
+*/
+        T12 = atol;
+        T13 = tol;
+        dstzr(&K2,&K11,&T12,&T13);
+        if(!qporq) goto S480;
+        *status = 0;
+        dzror(status,pr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *ompr = one-*pr;
+S460:
+        if(!(*status == 1)) goto S470;
+        cumnbn(s,xn,pr,ompr,&cum,&ccum);
+        fx = cum-*p;
+        dzror(status,pr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *ompr = one-*pr;
+        goto S460;
+S470:
+        goto S510;
+S480:
+        *status = 0;
+        dzror(status,ompr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *pr = one-*ompr;
+S490:
+        if(!(*status == 1)) goto S500;
+        cumnbn(s,xn,pr,ompr,&cum,&ccum);
+        fx = ccum-*q;
+        dzror(status,ompr,&fx,&xlo,&xhi,&qleft,&qhi);
+        *pr = one-*ompr;
+        goto S490;
+S510:
+S500:
+        if(!(*status == -1)) goto S540;
+        if(!qleft) goto S520;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S530;
+S520:
+        *status = 2;
+        *bound = 1.0e0;
+S530:
+        ;
+    }
+S540:
+    return;
+#undef tol
+#undef atol
+#undef inf
+#undef one
+}
+void cdfnor(int *which,double *p,double *q,double *x,double *mean,
+	    double *sd,int *status,double *bound)
+/**********************************************************************
+
+      void cdfnor(int *which,double *p,double *q,double *x,double *mean,
+            double *sd,int *status,double *bound)
+
+               Cumulative Distribution Function
+               NORmal distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the normal
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH  --> Integer indicating  which of the  next  parameter
+     values is to be calculated using values  of the others.
+     Legal range: 1..4
+               iwhich = 1 : Calculate P and Q from X,MEAN and SD
+               iwhich = 2 : Calculate X from P,Q,MEAN and SD
+               iwhich = 3 : Calculate MEAN from P,Q,X and SD
+               iwhich = 4 : Calculate SD from P,Q,X and MEAN
+
+     P <--> The integral from -infinity to X of the normal density.
+            Input range: (0,1].
+
+     Q <--> 1-P.
+            Input range: (0, 1].
+            P + Q = 1.0.
+
+     X < --> Upper limit of integration of the normal-density.
+             Input range: ( -infinity, +infinity)
+
+     MEAN <--> The mean of the normal density.
+               Input range: (-infinity, +infinity)
+
+     SD <--> Standard Deviation of the normal density.
+             Input range: (0, +infinity).
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+
+
+     A slightly modified version of ANORM from
+
+     Cody, W.D. (1993). "ALGORITHM 715: SPECFUN - A Portabel FORTRAN
+     Package of Special Function Routines and Test Drivers"
+     acm Transactions on Mathematical Software. 19, 22-32.
+
+     is used to calulate the  cumulative standard normal distribution.
+
+     The rational functions from pages  90-95  of Kennedy and Gentle,
+     Statistical  Computing,  Marcel  Dekker, NY,  1980 are  used  as
+     starting values to Newton's Iterations which compute the inverse
+     standard normal.  Therefore no  searches  are necessary for  any
+     parameter.
+
+     For X < -15, the asymptotic expansion for the normal is used  as
+     the starting value in finding the inverse standard normal.
+     This is formula 26.2.12 of Abramowitz and Stegun.
+
+
+                              Note
+
+
+      The normal density is proportional to
+      exp( - 0.5 * (( X - MEAN)/SD)**2)
+
+**********************************************************************/
+{
+static int K1 = 1;
+static double z,pq;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    *status = 0;
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 4.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p <= 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p <= 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 1) goto S150;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S140;
+    if(!(pq < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    goto S130;
+S120:
+    *bound = 1.0e0;
+S130:
+    *status = 3;
+    return;
+S150:
+S140:
+    if(*which == 4) goto S170;
+/*
+     SD
+*/
+    if(!(*sd <= 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    *status = -6;
+    return;
+S170:
+S160:
+/*
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Computing P
+*/
+        z = (*x-*mean)/ *sd;
+        cumnor(&z,p,q);
+    }
+    else if(2 == *which) {
+/*
+     Computing X
+*/
+        z = dinvnr(p,q);
+        *x = *sd*z+*mean;
+    }
+    else if(3 == *which) {
+/*
+     Computing the MEAN
+*/
+        z = dinvnr(p,q);
+        *mean = *x-*sd*z;
+    }
+    else if(4 == *which) {
+/*
+     Computing SD
+*/
+        z = dinvnr(p,q);
+        *sd = (*x-*mean)/z;
+    }
+    return;
+}
+void cdfpoi(int *which,double *p,double *q,double *s,double *xlam,
+	    int *status,double *bound)
+/**********************************************************************
+
+      void cdfpoi(int *which,double *p,double *q,double *s,double *xlam,
+            int *status,double *bound)
+
+               Cumulative Distribution Function
+               POIsson distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the Poisson
+     distribution given values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which  argument
+               value is to be calculated from the others.
+               Legal range: 1..3
+               iwhich = 1 : Calculate P and Q from S and XLAM
+               iwhich = 2 : Calculate A from P,Q and XLAM
+               iwhich = 3 : Calculate XLAM from P,Q and S
+
+        P <--> The cumulation from 0 to S of the poisson density.
+               Input range: [0,1].
+
+        Q <--> 1-P.
+               Input range: (0, 1].
+               P + Q = 1.0.
+
+        S <--> Upper limit of cumulation of the Poisson.
+               Input range: [0, +infinity).
+               Search range: [0,1E100]
+
+     XLAM <--> Mean of the Poisson distribution.
+               Input range: [0, +infinity).
+               Search range: [0,1E100]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula   26.4.21  of   Abramowitz  and   Stegun,   Handbook  of
+     Mathematical Functions (1966) is used  to reduce the computation
+     of  the cumulative distribution function to that  of computing a
+     chi-square, hence an incomplete gamma function.
+
+     Cumulative  distribution function  (P) is  calculated  directly.
+     Computation of other parameters involve a seach for a value that
+     produces  the desired value of  P.   The  search relies  on  the
+     monotinicity of P with the other parameter.
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define inf 1.0e100
+static int K1 = 1;
+static double K2 = 0.0e0;
+static double K4 = 0.5e0;
+static double K5 = 5.0e0;
+static double fx,cum,ccum,pq;
+static unsigned long qhi,qleft,qporq;
+static double T3,T6,T7,T8,T9,T10;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 3)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 3.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p < 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 2) goto S130;
+/*
+     S
+*/
+    if(!(*s < 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -4;
+    return;
+S130:
+S120:
+    if(*which == 3) goto S150;
+/*
+     XLAM
+*/
+    if(!(*xlam < 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S150:
+S140:
+    if(*which == 1) goto S190;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S180;
+    if(!(pq < 0.0e0)) goto S160;
+    *bound = 0.0e0;
+    goto S170;
+S160:
+    *bound = 1.0e0;
+S170:
+    *status = 3;
+    return;
+S190:
+S180:
+    if(!(*which == 1)) qporq = *p <= *q;
+/*
+     Select the minimum of P or Q
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Calculating P
+*/
+        cumpoi(s,xlam,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Calculating S
+*/
+        *s = 5.0e0;
+        T3 = inf;
+        T6 = atol;
+        T7 = tol;
+        dstinv(&K2,&T3,&K4,&K4,&K5,&T6,&T7);
+        *status = 0;
+        dinvr(status,s,&fx,&qleft,&qhi);
+S200:
+        if(!(*status == 1)) goto S230;
+        cumpoi(s,xlam,&cum,&ccum);
+        if(!qporq) goto S210;
+        fx = cum-*p;
+        goto S220;
+S210:
+        fx = ccum-*q;
+S220:
+        dinvr(status,s,&fx,&qleft,&qhi);
+        goto S200;
+S230:
+        if(!(*status == -1)) goto S260;
+        if(!qleft) goto S240;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S250;
+S240:
+        *status = 2;
+        *bound = inf;
+S260:
+S250:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Calculating XLAM
+*/
+        *xlam = 5.0e0;
+        T8 = inf;
+        T9 = atol;
+        T10 = tol;
+        dstinv(&K2,&T8,&K4,&K4,&K5,&T9,&T10);
+        *status = 0;
+        dinvr(status,xlam,&fx,&qleft,&qhi);
+S270:
+        if(!(*status == 1)) goto S300;
+        cumpoi(s,xlam,&cum,&ccum);
+        if(!qporq) goto S280;
+        fx = cum-*p;
+        goto S290;
+S280:
+        fx = ccum-*q;
+S290:
+        dinvr(status,xlam,&fx,&qleft,&qhi);
+        goto S270;
+S300:
+        if(!(*status == -1)) goto S330;
+        if(!qleft) goto S310;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S320;
+S310:
+        *status = 2;
+        *bound = inf;
+S320:
+        ;
+    }
+S330:
+    return;
+#undef tol
+#undef atol
+#undef inf
+}
+void cdft(int *which,double *p,double *q,double *t,double *df,
+	  int *status,double *bound)
+/**********************************************************************
+
+      void cdft(int *which,double *p,double *q,double *t,double *df,
+          int *status,double *bound)
+
+               Cumulative Distribution Function
+                         T distribution
+
+
+                              Function
+
+
+     Calculates any one parameter of the t distribution given
+     values for the others.
+
+
+                              Arguments
+
+
+     WHICH --> Integer indicating which  argument
+               values is to be calculated from the others.
+               Legal range: 1..3
+               iwhich = 1 : Calculate P and Q from T and DF
+               iwhich = 2 : Calculate T from P,Q and DF
+               iwhich = 3 : Calculate DF from P,Q and T
+
+        P <--> The integral from -infinity to t of the t-density.
+               Input range: (0,1].
+
+        Q <--> 1-P.
+               Input range: (0, 1].
+               P + Q = 1.0.
+
+        T <--> Upper limit of integration of the t-density.
+               Input range: ( -infinity, +infinity).
+               Search range: [ -1E100, 1E100 ]
+
+        DF <--> Degrees of freedom of the t-distribution.
+                Input range: (0 , +infinity).
+                Search range: [1e-100, 1E10]
+
+     STATUS <-- 0 if calculation completed correctly
+               -I if input parameter number I is out of range
+                1 if answer appears to be lower than lowest
+                  search bound
+                2 if answer appears to be higher than greatest
+                  search bound
+                3 if P + Q .ne. 1
+
+     BOUND <-- Undefined if STATUS is 0
+
+               Bound exceeded by parameter number I if STATUS
+               is negative.
+
+               Lower search bound if STATUS is 1.
+
+               Upper search bound if STATUS is 2.
+
+
+                              Method
+
+
+     Formula  26.5.27  of   Abramowitz   and  Stegun,   Handbook   of
+     Mathematical Functions  (1966) is used to reduce the computation
+     of the cumulative distribution function to that of an incomplete
+     beta.
+
+     Computation of other parameters involve a seach for a value that
+     produces  the desired  value  of P.   The search relies  on  the
+     monotinicity of P with the other parameter.
+
+**********************************************************************/
+{
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define inf 1.0e100
+#define rtinf 1.0e100
+#define maxdf 1.0e10
+static int K1 = 1;
+static double K4 = 0.5e0;
+static double K5 = 5.0e0;
+static double fx,cum,ccum,pq;
+static unsigned long qhi,qleft,qporq;
+static double T2,T3,T6,T7,T8,T9,T10,T11;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Check arguments
+*/
+    if(!(*which < 1 || *which > 3)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 3.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+/*
+     P
+*/
+    if(!(*p <= 0.0e0 || *p > 1.0e0)) goto S60;
+    if(!(*p <= 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = 1.0e0;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 1) goto S110;
+/*
+     Q
+*/
+    if(!(*q <= 0.0e0 || *q > 1.0e0)) goto S100;
+    if(!(*q <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    goto S90;
+S80:
+    *bound = 1.0e0;
+S90:
+    *status = -3;
+    return;
+S110:
+S100:
+    if(*which == 3) goto S130;
+/*
+     DF
+*/
+    if(!(*df <= 0.0e0)) goto S120;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S130:
+S120:
+    if(*which == 1) goto S170;
+/*
+     P + Q
+*/
+    pq = *p+*q;
+    if(!(fabs(pq-0.5e0-0.5e0) > 3.0e0*spmpar(&K1))) goto S160;
+    if(!(pq < 0.0e0)) goto S140;
+    *bound = 0.0e0;
+    goto S150;
+S140:
+    *bound = 1.0e0;
+S150:
+    *status = 3;
+    return;
+S170:
+S160:
+    if(!(*which == 1)) qporq = *p <= *q;
+/*
+     Select the minimum of P or Q
+     Calculate ANSWERS
+*/
+    if(1 == *which) {
+/*
+     Computing P and Q
+*/
+        cumt(t,df,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+/*
+     Computing T
+     .. Get initial approximation for T
+*/
+        *t = dt1(p,q,df);
+        T2 = -rtinf;
+        T3 = rtinf;
+        T6 = atol;
+        T7 = tol;
+        dstinv(&T2,&T3,&K4,&K4,&K5,&T6,&T7);
+        *status = 0;
+        dinvr(status,t,&fx,&qleft,&qhi);
+S180:
+        if(!(*status == 1)) goto S210;
+        cumt(t,df,&cum,&ccum);
+        if(!qporq) goto S190;
+        fx = cum-*p;
+        goto S200;
+S190:
+        fx = ccum-*q;
+S200:
+        dinvr(status,t,&fx,&qleft,&qhi);
+        goto S180;
+S210:
+        if(!(*status == -1)) goto S240;
+        if(!qleft) goto S220;
+        *status = 1;
+        *bound = -rtinf;
+        goto S230;
+S220:
+        *status = 2;
+        *bound = rtinf;
+S240:
+S230:
+        ;
+    }
+    else if(3 == *which) {
+/*
+     Computing DF
+*/
+        *df = 5.0e0;
+        T8 = zero;
+        T9 = maxdf;
+        T10 = atol;
+        T11 = tol;
+        dstinv(&T8,&T9,&K4,&K4,&K5,&T10,&T11);
+        *status = 0;
+        dinvr(status,df,&fx,&qleft,&qhi);
+S250:
+        if(!(*status == 1)) goto S280;
+        cumt(t,df,&cum,&ccum);
+        if(!qporq) goto S260;
+        fx = cum-*p;
+        goto S270;
+S260:
+        fx = ccum-*q;
+S270:
+        dinvr(status,df,&fx,&qleft,&qhi);
+        goto S250;
+S280:
+        if(!(*status == -1)) goto S310;
+        if(!qleft) goto S290;
+        *status = 1;
+        *bound = zero;
+        goto S300;
+S290:
+        *status = 2;
+        *bound = maxdf;
+S300:
+        ;
+    }
+S310:
+    return;
+#undef tol
+#undef atol
+#undef zero
+#undef inf
+#undef rtinf
+#undef maxdf
+}
+void cdftnc(int *which,double *p,double *q,double *t,double *df,
+            double *pnonc,int *status,double *bound)
+/**********************************************************************
+ 
+   void cdftnc(int *which,double *p,double *q,double *t,double *df,
+               double *pnonc,int *status,double *bound)
+
+                Cumulative Distribution Function
+                   Non-Central T distribution
+ 
+                                Function
+ 
+      Calculates any one parameter of the noncentral t distribution give
+      values for the others.
+ 
+                                Arguments
+ 
+      WHICH --> Integer indicating which  argument
+                values is to be calculated from the others.
+                Legal range: 1..3
+                iwhich = 1 : Calculate P and Q from T,DF,PNONC
+                iwhich = 2 : Calculate T from P,Q,DF,PNONC
+                iwhich = 3 : Calculate DF from P,Q,T
+                iwhich = 4 : Calculate PNONC from P,Q,DF,T
+ 
+         P <--> The integral from -infinity to t of the noncentral t-den
+               Input range: (0,1].
+ 
+         Q <--> 1-P.
+               Input range: (0, 1].
+                P + Q = 1.0.
+ 
+         T <--> Upper limit of integration of the noncentral t-density.
+                Input range: ( -infinity, +infinity).
+                Search range: [ -1E100, 1E100 ]
+ 
+         DF <--> Degrees of freedom of the noncentral t-distribution.
+                 Input range: (0 , +infinity).
+                 Search range: [1e-100, 1E10]
+ 
+      PNONC <--> Noncentrality parameter of the noncentral t-distributio
+                 Input range: [-infinity , +infinity).
+                 Search range: [-1e4, 1E4]
+ 
+      STATUS <-- 0 if calculation completed correctly
+                -I if input parameter number I is out of range
+                 1 if answer appears to be lower than lowest
+                   search bound
+                 2 if answer appears to be higher than greatest
+                   search bound
+                 3 if P + Q .ne. 1
+ 
+      BOUND <-- Undefined if STATUS is 0
+ 
+                Bound exceeded by parameter number I if STATUS
+                is negative.
+ 
+                Lower search bound if STATUS is 1.
+ 
+                Upper search bound if STATUS is 2.
+ 
+                                 Method
+ 
+      Upper tail    of  the  cumulative  noncentral t is calculated usin
+      formulae  from page 532  of Johnson, Kotz,  Balakrishnan, Coninuou
+      Univariate Distributions, Vol 2, 2nd Edition.  Wiley (1995)
+ 
+      Computation of other parameters involve a seach for a value that
+      produces  the desired  value  of P.   The search relies  on  the
+      monotinicity of P with the other parameter.
+ 
+**********************************************************************/
+{
+#define tent4 1.0e4
+#define tol 1.0e-8
+#define atol 1.0e-50
+#define zero 1.0e-100
+#define one ( 1.0e0 - 1.0e-16 )
+#define inf 1.0e100
+static double K3 = 0.5e0;
+static double K4 = 5.0e0;
+static double ccum,cum,fx;
+static unsigned long qhi,qleft;
+static double T1,T2,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*which < 1 || *which > 4)) goto S30;
+    if(!(*which < 1)) goto S10;
+    *bound = 1.0e0;
+    goto S20;
+S10:
+    *bound = 5.0e0;
+S20:
+    *status = -1;
+    return;
+S30:
+    if(*which == 1) goto S70;
+    if(!(*p < 0.0e0 || *p > one)) goto S60;
+    if(!(*p < 0.0e0)) goto S40;
+    *bound = 0.0e0;
+    goto S50;
+S40:
+    *bound = one;
+S50:
+    *status = -2;
+    return;
+S70:
+S60:
+    if(*which == 3) goto S90;
+    if(!(*df <= 0.0e0)) goto S80;
+    *bound = 0.0e0;
+    *status = -5;
+    return;
+S90:
+S80:
+    if(*which == 4) goto S100;
+S100:
+    if(1 == *which) {
+        cumtnc(t,df,pnonc,p,q);
+        *status = 0;
+    }
+    else if(2 == *which) {
+        *t = 5.0e0;
+        T1 = -inf;
+        T2 = inf;
+        T5 = atol;
+        T6 = tol;
+        dstinv(&T1,&T2,&K3,&K3,&K4,&T5,&T6);
+        *status = 0;
+        dinvr(status,t,&fx,&qleft,&qhi);
+S110:
+        if(!(*status == 1)) goto S120;
+        cumtnc(t,df,pnonc,&cum,&ccum);
+        fx = cum - *p;
+        dinvr(status,t,&fx,&qleft,&qhi);
+        goto S110;
+S120:
+        if(!(*status == -1)) goto S150;
+        if(!qleft) goto S130;
+        *status = 1;
+        *bound = -inf;
+        goto S140;
+S130:
+        *status = 2;
+        *bound = inf;
+S150:
+S140:
+        ;
+    }
+    else if(3 == *which) {
+        *df = 5.0e0;
+        T7 = zero;
+        T8 = tent4;
+        T9 = atol;
+        T10 = tol;
+        dstinv(&T7,&T8,&K3,&K3,&K4,&T9,&T10);
+        *status = 0;
+        dinvr(status,df,&fx,&qleft,&qhi);
+S160:
+        if(!(*status == 1)) goto S170;
+        cumtnc(t,df,pnonc,&cum,&ccum);
+        fx = cum - *p;
+        dinvr(status,df,&fx,&qleft,&qhi);
+        goto S160;
+S170:
+        if(!(*status == -1)) goto S200;
+        if(!qleft) goto S180;
+        *status = 1;
+        *bound = zero;
+        goto S190;
+S180:
+        *status = 2;
+        *bound = inf;
+S200:
+S190:
+        ;
+    }
+    else if(4 == *which) {
+        *pnonc = 5.0e0;
+        T11 = -tent4;
+        T12 = tent4;
+        T13 = atol;
+        T14 = tol;
+        dstinv(&T11,&T12,&K3,&K3,&K4,&T13,&T14);
+        *status = 0;
+        dinvr(status,pnonc,&fx,&qleft,&qhi);
+S210:
+        if(!(*status == 1)) goto S220;
+        cumtnc(t,df,pnonc,&cum,&ccum);
+        fx = cum - *p;
+        dinvr(status,pnonc,&fx,&qleft,&qhi);
+        goto S210;
+S220:
+        if(!(*status == -1)) goto S250;
+        if(!qleft) goto S230;
+        *status = 1;
+        *bound = 0.0e0;
+        goto S240;
+S230:
+        *status = 2;
+        *bound = tent4;
+S240:
+        ;
+    }
+S250:
+    return;
+#undef tent4
+#undef tol
+#undef atol
+#undef zero
+#undef one
+#undef inf
+}
+void cumbet(double *x,double *y,double *a,double *b,double *cum,
+	    double *ccum)
+/*
+**********************************************************************
+ 
+     void cumbet(double *x,double *y,double *a,double *b,double *cum,
+            double *ccum)
+
+          Double precision cUMulative incomplete BETa distribution
+ 
+ 
+                              Function
+ 
+ 
+     Calculates the cdf to X of the incomplete beta distribution
+     with parameters a and b.  This is the integral from 0 to x
+     of (1/B(a,b))*f(t)) where f(t) = t**(a-1) * (1-t)**(b-1)
+ 
+ 
+                              Arguments
+ 
+ 
+     X --> Upper limit of integration.
+                                        X is DOUBLE PRECISION
+ 
+     Y --> 1 - X.
+                                        Y is DOUBLE PRECISION
+ 
+     A --> First parameter of the beta distribution.
+                                        A is DOUBLE PRECISION
+ 
+     B --> Second parameter of the beta distribution.
+                                        B is DOUBLE PRECISION
+ 
+     CUM <-- Cumulative incomplete beta distribution.
+                                        CUM is DOUBLE PRECISION
+ 
+     CCUM <-- Compliment of Cumulative incomplete beta distribution.
+                                        CCUM is DOUBLE PRECISION
+ 
+ 
+                              Method
+ 
+ 
+     Calls the routine BRATIO.
+ 
+                                   References
+ 
+     Didonato, Armido R. and Morris, Alfred H. Jr. (1992) Algorithim
+     708 Significant Digit Computation of the Incomplete Beta Function
+     Ratios. ACM ToMS, Vol.18, No. 3, Sept. 1992, 360-373.
+ 
+**********************************************************************
+*/
+{
+static int ierr;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*x <= 0.0e0)) goto S10;
+    *cum = 0.0e0;
+    *ccum = 1.0e0;
+    return;
+S10:
+    if(!(*y <= 0.0e0)) goto S20;
+    *cum = 1.0e0;
+    *ccum = 0.0e0;
+    return;
+S20:
+    bratio(a,b,x,y,cum,ccum,&ierr);
+/*
+     Call bratio routine
+*/
+    return;
+}
+void cumbin(double *s,double *xn,double *pr,double *ompr,
+	    double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumbin(double *s,double *xn,double *pr,double *ompr,
+            double *cum,double *ccum)
+
+                    CUmulative BINomial distribution
+ 
+ 
+                              Function
+ 
+ 
+     Returns the probability   of 0  to  S  successes in  XN   binomial
+     trials, each of which has a probability of success, PBIN.
+ 
+ 
+                              Arguments
+ 
+ 
+     S --> The upper limit of cumulation of the binomial distribution.
+                                                  S is DOUBLE PRECISION
+ 
+     XN --> The number of binomial trials.
+                                                  XN is DOUBLE PRECISIO
+ 
+     PBIN --> The probability of success in each binomial trial.
+                                                  PBIN is DOUBLE PRECIS
+ 
+     OMPR --> 1 - PBIN
+                                                  OMPR is DOUBLE PRECIS
+ 
+     CUM <-- Cumulative binomial distribution.
+                                                  CUM is DOUBLE PRECISI
+ 
+     CCUM <-- Compliment of Cumulative binomial distribution.
+                                                  CCUM is DOUBLE PRECIS
+ 
+ 
+                              Method
+ 
+ 
+     Formula  26.5.24    of   Abramowitz  and    Stegun,  Handbook   of
+     Mathematical   Functions (1966) is   used  to reduce the  binomial
+     distribution  to  the  cumulative    beta distribution.
+ 
+**********************************************************************
+*/
+{
+static double T1,T2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*s < *xn)) goto S10;
+    T1 = *s+1.0e0;
+    T2 = *xn-*s;
+    cumbet(pr,ompr,&T1,&T2,ccum,cum);
+    goto S20;
+S10:
+    *cum = 1.0e0;
+    *ccum = 0.0e0;
+S20:
+    return;
+}
+void cumchi(double *x,double *df,double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumchi(double *x,double *df,double *cum,double *ccum)
+             CUMulative of the CHi-square distribution
+ 
+ 
+                              Function
+ 
+ 
+     Calculates the cumulative chi-square distribution.
+ 
+ 
+                              Arguments
+ 
+ 
+     X       --> Upper limit of integration of the
+                 chi-square distribution.
+                                                 X is DOUBLE PRECISION
+ 
+     DF      --> Degrees of freedom of the
+                 chi-square distribution.
+                                                 DF is DOUBLE PRECISION
+ 
+     CUM <-- Cumulative chi-square distribution.
+                                                 CUM is DOUBLE PRECISIO
+ 
+     CCUM <-- Compliment of Cumulative chi-square distribution.
+                                                 CCUM is DOUBLE PRECISI
+ 
+ 
+                              Method
+ 
+ 
+     Calls incomplete gamma function (CUMGAM)
+ 
+**********************************************************************
+*/
+{
+static double a,xx;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    a = *df*0.5e0;
+    xx = *x*0.5e0;
+    cumgam(&xx,&a,cum,ccum);
+    return;
+}
+void cumchn(double *x,double *df,double *pnonc,double *cum,
+            double *ccum)
+/**********************************************************************
+ 
+     void cumchn(double *x,double *df,double *pnonc,double *cum,
+                 double *ccum)
+
+             CUMulative of the Non-central CHi-square distribution
+ 
+                               Function
+ 
+     Calculates     the       cumulative      non-central    chi-square
+     distribution, i.e.,  the probability   that  a   random   variable
+     which    follows  the  non-central chi-square  distribution,  with
+     non-centrality  parameter    PNONC  and   continuous  degrees   of
+     freedom DF, is less than or equal to X.
+ 
+                              Arguments
+ 
+     X       --> Upper limit of integration of the non-central
+                 chi-square distribution.
+ 
+     DF      --> Degrees of freedom of the non-central
+                 chi-square distribution.
+ 
+     PNONC   --> Non-centrality parameter of the non-central
+                 chi-square distribution.
+ 
+     CUM <-- Cumulative non-central chi-square distribution.
+ 
+     CCUM <-- Compliment of Cumulative non-central chi-square distribut
+ 
+ 
+                                Method
+ 
+     Uses  formula  26.4.25   of  Abramowitz  and  Stegun, Handbook  of
+     Mathematical    Functions,  US   NBS   (1966)    to calculate  the
+     non-central chi-square.
+ 
+                                Variables
+ 
+     EPS     --- Convergence criterion.  The sum stops when a
+                 term is less than EPS*SUM.
+ 
+     CCUM <-- Compliment of Cumulative non-central
+              chi-square distribution.
+ 
+**********************************************************************/
+{
+#define dg(i) (*df + 2.0e0 * (double)(i))
+#define qsmall(xx) (int)(sum < 1.0e-20 || (xx) < eps * sum)
+static double eps = 1.0e-5;
+static double adj,centaj,centwt,chid2,dfd2,lcntaj,lcntwt,lfact,pcent,pterm,sum,
+    sumadj,term,wt,xnonc;
+static int i,icent;
+static double T1,T2,T3;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*x <= 0.0e0)) goto S10;
+    *cum = 0.0e0;
+    *ccum = 1.0e0;
+    return;
+S10:
+    if(!(*pnonc <= 1.0e-10 )) goto S20;
+/*
+     When non-centrality parameter is (essentially) zero,
+     use cumulative chi-square distribution
+*/
+    cumchi(x,df,cum,ccum);
+    return;
+S20:
+    xnonc = *pnonc / 2.0e0;
+/*
+***********************************************************************
+     The following code calcualtes the weight, chi-square, and
+     adjustment term for the central term in the infinite series.
+     The central term is the one in which the poisson weight is
+     greatest.  The adjustment term is the amount that must
+     be subtracted from the chi-square to move up two degrees
+     of freedom.
+***********************************************************************
+*/
+    icent = fifidint(xnonc);
+    if(icent == 0) icent = 1;
+    chid2 = *x / 2.0e0;
+/*
+     Calculate central weight term
+*/
+    T1 = (double)(icent + 1);
+    lfact = alngam(&T1);
+    lcntwt = -xnonc + (double)icent * log(xnonc) - lfact;
+    centwt = exp(lcntwt);
+/*
+     Calculate central chi-square
+*/
+    T2 = dg(icent);
+    cumchi(x,&T2,&pcent,ccum);
+/*
+     Calculate central adjustment term
+*/
+    dfd2 = dg(icent) / 2.0e0;
+    T3 = 1.0e0 + dfd2;
+    lfact = alngam(&T3);
+    lcntaj = dfd2 * log(chid2) - chid2 - lfact;
+    centaj = exp(lcntaj);
+    sum = centwt * pcent;
+/*
+***********************************************************************
+     Sum backwards from the central term towards zero.
+     Quit whenever either
+     (1) the zero term is reached, or
+     (2) the term gets small relative to the sum
+***********************************************************************
+*/
+    sumadj = 0.0e0;
+    adj = centaj;
+    wt = centwt;
+    i = icent;
+    goto S40;
+S30:
+    if(qsmall(term) || i == 0) goto S50;
+S40:
+    dfd2 = dg(i) / 2.0e0;
+/*
+     Adjust chi-square for two fewer degrees of freedom.
+     The adjusted value ends up in PTERM.
+*/
+    adj = adj * dfd2 / chid2;
+    sumadj += adj;
+    pterm = pcent + sumadj;
+/*
+     Adjust poisson weight for J decreased by one
+*/
+    wt *= ((double)i / xnonc);
+    term = wt * pterm;
+    sum += term;
+    i -= 1;
+    goto S30;
+S50:
+/*
+***********************************************************************
+     Now sum forward from the central term towards infinity.
+     Quit when either
+     (1) the term gets small relative to the sum, or
+***********************************************************************
+*/
+    sumadj = adj = centaj;
+    wt = centwt;
+    i = icent;
+    goto S70;
+S60:
+    if(qsmall(term)) goto S80;
+S70:
+/*
+     Update weights for next higher J
+*/
+    wt *= (xnonc / (double)(i + 1));
+/*
+     Calculate PTERM and add term to sum
+*/
+    pterm = pcent - sumadj;
+    term = wt * pterm;
+    sum += term;
+/*
+     Update adjustment term for DF for next iteration
+*/
+    i += 1;
+    dfd2 = dg(i) / 2.0e0;
+    adj = adj * chid2 / dfd2;
+    sumadj += adj;
+    goto S60;
+S80:
+    *cum = sum;
+    *ccum = 0.5e0 + (0.5e0 - *cum);
+    return;
+#undef dg
+#undef qsmall
+}
+void cumf(double *f,double *dfn,double *dfd,double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumf(double *f,double *dfn,double *dfd,double *cum,double *ccum)
+                    CUMulative F distribution
+ 
+ 
+                              Function
+ 
+ 
+     Computes  the  integral from  0  to  F of  the f-density  with DFN
+     and DFD degrees of freedom.
+ 
+ 
+                              Arguments
+ 
+ 
+     F --> Upper limit of integration of the f-density.
+                                                  F is DOUBLE PRECISION
+ 
+     DFN --> Degrees of freedom of the numerator sum of squares.
+                                                  DFN is DOUBLE PRECISI
+ 
+     DFD --> Degrees of freedom of the denominator sum of squares.
+                                                  DFD is DOUBLE PRECISI
+ 
+     CUM <-- Cumulative f distribution.
+                                                  CUM is DOUBLE PRECISI
+ 
+     CCUM <-- Compliment of Cumulative f distribution.
+                                                  CCUM is DOUBLE PRECIS
+ 
+ 
+                              Method
+ 
+ 
+     Formula  26.5.28 of  Abramowitz and   Stegun   is  used to  reduce
+     the cumulative F to a cumulative beta distribution.
+ 
+ 
+                              Note
+ 
+ 
+     If F is less than or equal to 0, 0 is returned.
+ 
+**********************************************************************
+*/
+{
+#define half 0.5e0
+#define done 1.0e0
+static double dsum,prod,xx,yy;
+static int ierr;
+static double T1,T2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*f <= 0.0e0)) goto S10;
+    *cum = 0.0e0;
+    *ccum = 1.0e0;
+    return;
+S10:
+    prod = *dfn**f;
+/*
+     XX is such that the incomplete beta with parameters
+     DFD/2 and DFN/2 evaluated at XX is 1 - CUM or CCUM
+     YY is 1 - XX
+     Calculate the smaller of XX and YY accurately
+*/
+    dsum = *dfd+prod;
+    xx = *dfd/dsum;
+    if(xx > half) {
+        yy = prod/dsum;
+        xx = done-yy;
+    }
+    else  yy = done-xx;
+    T1 = *dfd*half;
+    T2 = *dfn*half;
+    bratio(&T1,&T2,&xx,&yy,ccum,cum,&ierr);
+    return;
+#undef half
+#undef done
+}
+void cumfnc(double *f,double *dfn,double *dfd,double *pnonc,
+	    double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+               F -NON- -C-ENTRAL F DISTRIBUTION
+ 
+ 
+ 
+                              Function
+ 
+ 
+     COMPUTES NONCENTRAL F DISTRIBUTION WITH DFN AND DFD
+     DEGREES OF FREEDOM AND NONCENTRALITY PARAMETER PNONC
+ 
+ 
+                              Arguments
+ 
+ 
+     X --> UPPER LIMIT OF INTEGRATION OF NONCENTRAL F IN EQUATION
+ 
+     DFN --> DEGREES OF FREEDOM OF NUMERATOR
+ 
+     DFD -->  DEGREES OF FREEDOM OF DENOMINATOR
+ 
+     PNONC --> NONCENTRALITY PARAMETER.
+ 
+     CUM <-- CUMULATIVE NONCENTRAL F DISTRIBUTION
+ 
+     CCUM <-- COMPLIMENT OF CUMMULATIVE
+ 
+ 
+                              Method
+ 
+ 
+     USES FORMULA 26.6.20 OF REFERENCE FOR INFINITE SERIES.
+     SERIES IS CALCULATED BACKWARD AND FORWARD FROM J = LAMBDA/2
+     (THIS IS THE TERM WITH THE LARGEST POISSON WEIGHT) UNTIL
+     THE CONVERGENCE CRITERION IS MET.
+ 
+     FOR SPEED, THE INCOMPLETE BETA FUNCTIONS ARE EVALUATED
+     BY FORMULA 26.5.16.
+ 
+ 
+               REFERENCE
+ 
+ 
+     HANDBOOD OF MATHEMATICAL FUNCTIONS
+     EDITED BY MILTON ABRAMOWITZ AND IRENE A. STEGUN
+     NATIONAL BUREAU OF STANDARDS APPLIED MATEMATICS SERIES - 55
+     MARCH 1965
+     P 947, EQUATIONS 26.6.17, 26.6.18
+ 
+ 
+                              Note
+ 
+ 
+     THE SUM CONTINUES UNTIL A SUCCEEDING TERM IS LESS THAN EPS
+     TIMES THE SUM (OR THE SUM IS LESS THAN 1.0E-20).  EPS IS
+     SET TO 1.0E-4 IN A DATA STATEMENT WHICH CAN BE CHANGED.
+ 
+**********************************************************************
+*/
+{
+#define qsmall(x) (int)(sum < 1.0e-20 || (x) < eps*sum)
+#define half 0.5e0
+#define done 1.0e0
+static double eps = 1.0e-4;
+static double dsum,dummy,prod,xx,yy,adn,aup,b,betdn,betup,centwt,dnterm,sum,
+    upterm,xmult,xnonc;
+static int i,icent,ierr;
+static double T1,T2,T3,T4,T5,T6;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*f <= 0.0e0)) goto S10;
+    *cum = 0.0e0;
+    *ccum = 1.0e0;
+    return;
+S10:
+    if(!(*pnonc < 1.0e-10)) goto S20;
+/*
+     Handle case in which the non-centrality parameter is
+     (essentially) zero.
+*/
+    cumf(f,dfn,dfd,cum,ccum);
+    return;
+S20:
+    xnonc = *pnonc/2.0e0;
+/*
+     Calculate the central term of the poisson weighting factor.
+*/
+    icent = (long)(xnonc);
+    if(icent == 0) icent = 1;
+/*
+     Compute central weight term
+*/
+    T1 = (double)(icent+1);
+    centwt = exp(-xnonc+(double)icent*log(xnonc)-alngam(&T1));
+/*
+     Compute central incomplete beta term
+     Assure that minimum of arg to beta and 1 - arg is computed
+          accurately.
+*/
+    prod = *dfn**f;
+    dsum = *dfd+prod;
+    yy = *dfd/dsum;
+    if(yy > half) {
+        xx = prod/dsum;
+        yy = done-xx;
+    }
+    else  xx = done-yy;
+    T2 = *dfn*half+(double)icent;
+    T3 = *dfd*half;
+    bratio(&T2,&T3,&xx,&yy,&betdn,&dummy,&ierr);
+    adn = *dfn/2.0e0+(double)icent;
+    aup = adn;
+    b = *dfd/2.0e0;
+    betup = betdn;
+    sum = centwt*betdn;
+/*
+     Now sum terms backward from icent until convergence or all done
+*/
+    xmult = centwt;
+    i = icent;
+    T4 = adn+b;
+    T5 = adn+1.0e0;
+    dnterm = exp(alngam(&T4)-alngam(&T5)-alngam(&b)+adn*log(xx)+b*log(yy));
+S30:
+    if(qsmall(xmult*betdn) || i <= 0) goto S40;
+    xmult *= ((double)i/xnonc);
+    i -= 1;
+    adn -= 1.0;
+    dnterm = (adn+1.0)/((adn+b)*xx)*dnterm;
+    betdn += dnterm;
+    sum += (xmult*betdn);
+    goto S30;
+S40:
+    i = icent+1;
+/*
+     Now sum forwards until convergence
+*/
+    xmult = centwt;
+    if(aup-1.0+b == 0) upterm = exp(-alngam(&aup)-alngam(&b)+(aup-1.0)*log(xx)+
+      b*log(yy));
+    else  {
+        T6 = aup-1.0+b;
+        upterm = exp(alngam(&T6)-alngam(&aup)-alngam(&b)+(aup-1.0)*log(xx)+b*
+          log(yy));
+    }
+    goto S60;
+S50:
+    if(qsmall(xmult*betup)) goto S70;
+S60:
+    xmult *= (xnonc/(double)i);
+    i += 1;
+    aup += 1.0;
+    upterm = (aup+b-2.0e0)*xx/(aup-1.0)*upterm;
+    betup -= upterm;
+    sum += (xmult*betup);
+    goto S50;
+S70:
+    *cum = sum;
+    *ccum = 0.5e0+(0.5e0-*cum);
+    return;
+#undef qsmall
+#undef half
+#undef done
+}
+void cumgam(double *x,double *a,double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumgam(double *x,double *a,double *cum,double *ccum)
+           Double precision cUMulative incomplete GAMma distribution
+ 
+ 
+                              Function
+ 
+ 
+     Computes   the  cumulative        of    the     incomplete   gamma
+     distribution, i.e., the integral from 0 to X of
+          (1/GAM(A))*EXP(-T)*T**(A-1) DT
+     where GAM(A) is the complete gamma function of A, i.e.,
+          GAM(A) = integral from 0 to infinity of
+                    EXP(-T)*T**(A-1) DT
+ 
+ 
+                              Arguments
+ 
+ 
+     X --> The upper limit of integration of the incomplete gamma.
+                                                X is DOUBLE PRECISION
+ 
+     A --> The shape parameter of the incomplete gamma.
+                                                A is DOUBLE PRECISION
+ 
+     CUM <-- Cumulative incomplete gamma distribution.
+                                        CUM is DOUBLE PRECISION
+ 
+     CCUM <-- Compliment of Cumulative incomplete gamma distribution.
+                                                CCUM is DOUBLE PRECISIO
+ 
+ 
+                              Method
+ 
+ 
+     Calls the routine GRATIO.
+ 
+**********************************************************************
+*/
+{
+static int K1 = 0;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*x <= 0.0e0)) goto S10;
+    *cum = 0.0e0;
+    *ccum = 1.0e0;
+    return;
+S10:
+    gratio(a,x,cum,ccum,&K1);
+/*
+     Call gratio routine
+*/
+    return;
+}
+void cumnbn(double *s,double *xn,double *pr,double *ompr,
+	    double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumnbn(double *s,double *xn,double *pr,double *ompr,
+            double *cum,double *ccum)
+
+                    CUmulative Negative BINomial distribution
+ 
+ 
+                              Function
+ 
+ 
+     Returns the probability that it there will be S or fewer failures
+     before there are XN successes, with each binomial trial having
+     a probability of success PR.
+ 
+     Prob(# failures = S | XN successes, PR)  =
+                        ( XN + S - 1 )
+                        (            ) * PR^XN * (1-PR)^S
+                        (      S     )
+ 
+ 
+                              Arguments
+ 
+ 
+     S --> The number of failures
+                                                  S is DOUBLE PRECISION
+ 
+     XN --> The number of successes
+                                                  XN is DOUBLE PRECISIO
+ 
+     PR --> The probability of success in each binomial trial.
+                                                  PR is DOUBLE PRECISIO
+ 
+     OMPR --> 1 - PR
+                                                  OMPR is DOUBLE PRECIS
+ 
+     CUM <-- Cumulative negative binomial distribution.
+                                                  CUM is DOUBLE PRECISI
+ 
+     CCUM <-- Compliment of Cumulative negative binomial distribution.
+                                                  CCUM is DOUBLE PRECIS
+ 
+ 
+                              Method
+ 
+ 
+     Formula  26.5.26    of   Abramowitz  and    Stegun,  Handbook   of
+     Mathematical   Functions (1966) is   used  to reduce the  negative
+     binomial distribution to the cumulative beta distribution.
+ 
+**********************************************************************
+*/
+{
+static double T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    T1 = *s+1.e0;
+    cumbet(pr,ompr,xn,&T1,cum,ccum);
+    return;
+}
+void cumnor(double *arg,double *result,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumnor(double *arg,double *result,double *ccum)
+ 
+ 
+                              Function
+ 
+ 
+     Computes the cumulative  of    the  normal   distribution,   i.e.,
+     the integral from -infinity to x of
+          (1/sqrt(2*pi)) exp(-u*u/2) du
+ 
+     X --> Upper limit of integration.
+                                        X is DOUBLE PRECISION
+ 
+     RESULT <-- Cumulative normal distribution.
+                                        RESULT is DOUBLE PRECISION
+ 
+     CCUM <-- Compliment of Cumulative normal distribution.
+                                        CCUM is DOUBLE PRECISION
+ 
+     Renaming of function ANORM from:
+
+     Cody, W.D. (1993). "ALGORITHM 715: SPECFUN - A Portabel FORTRAN
+     Package of Special Function Routines and Test Drivers"
+     acm Transactions on Mathematical Software. 19, 22-32.
+
+     with slight modifications to return ccum and to deal with
+     machine constants.
+ 
+**********************************************************************
+  Original Comments:
+------------------------------------------------------------------
+ 
+ This function evaluates the normal distribution function:
+ 
+                              / x
+                     1       |       -t*t/2
+          P(x) = ----------- |      e       dt
+                 sqrt(2 pi)  |
+                             /-oo
+ 
+   The main computation evaluates near-minimax approximations
+   derived from those in "Rational Chebyshev approximations for
+   the error function" by W. J. Cody, Math. Comp., 1969, 631-637.
+   This transportable program uses rational functions that
+   theoretically approximate the normal distribution function to
+   at least 18 significant decimal digits.  The accuracy achieved
+   depends on the arithmetic system, the compiler, the intrinsic
+   functions, and proper selection of the machine-dependent
+   constants.
+ 
+*******************************************************************
+*******************************************************************
+ 
+ Explanation of machine-dependent constants.
+ 
+   MIN   = smallest machine representable number.
+ 
+   EPS   = argument below which anorm(x) may be represented by
+           0.5  and above which  x*x  will not underflow.
+           A conservative value is the largest machine number X
+           such that   1.0 + X = 1.0   to machine precision.
+*******************************************************************
+*******************************************************************
+ 
+ Error returns
+ 
+  The program returns  ANORM = 0     for  ARG .LE. XLOW.
+ 
+ 
+ Intrinsic functions required are:
+ 
+     ABS, AINT, EXP
+ 
+ 
+  Author: W. J. Cody
+          Mathematics and Computer Science Division
+          Argonne National Laboratory
+          Argonne, IL 60439
+ 
+  Latest modification: March 15, 1992
+ 
+------------------------------------------------------------------
+*/
+{
+static double a[5] = {
+    2.2352520354606839287e00,1.6102823106855587881e02,1.0676894854603709582e03,
+    1.8154981253343561249e04,6.5682337918207449113e-2
+};
+static double b[4] = {
+    4.7202581904688241870e01,9.7609855173777669322e02,1.0260932208618978205e04,
+    4.5507789335026729956e04
+};
+static double c[9] = {
+    3.9894151208813466764e-1,8.8831497943883759412e00,9.3506656132177855979e01,
+    5.9727027639480026226e02,2.4945375852903726711e03,6.8481904505362823326e03,
+    1.1602651437647350124e04,9.8427148383839780218e03,1.0765576773720192317e-8
+};
+static double d[8] = {
+    2.2266688044328115691e01,2.3538790178262499861e02,1.5193775994075548050e03,
+    6.4855582982667607550e03,1.8615571640885098091e04,3.4900952721145977266e04,
+    3.8912003286093271411e04,1.9685429676859990727e04
+};
+static double half = 0.5e0;
+static double p[6] = {
+    2.1589853405795699e-1,1.274011611602473639e-1,2.2235277870649807e-2,
+    1.421619193227893466e-3,2.9112874951168792e-5,2.307344176494017303e-2
+};
+static double one = 1.0e0;
+static double q[5] = {
+    1.28426009614491121e00,4.68238212480865118e-1,6.59881378689285515e-2,
+    3.78239633202758244e-3,7.29751555083966205e-5
+};
+static double sixten = 1.60e0;
+static double sqrpi = 3.9894228040143267794e-1;
+static double thrsh = 0.66291e0;
+static double root32 = 5.656854248e0;
+static double zero = 0.0e0;
+static int K1 = 1;
+static int K2 = 2;
+static int i;
+static double del,eps,temp,x,xden,xnum,y,xsq,min;
+/*
+------------------------------------------------------------------
+  Machine dependent constants
+------------------------------------------------------------------
+*/
+    eps = spmpar(&K1)*0.5e0;
+    min = spmpar(&K2);
+    x = *arg;
+    y = fabs(x);
+    if(y <= thrsh) {
+/*
+------------------------------------------------------------------
+  Evaluate  anorm  for  |X| <= 0.66291
+------------------------------------------------------------------
+*/
+        xsq = zero;
+        if(y > eps) xsq = x*x;
+        xnum = a[4]*xsq;
+        xden = xsq;
+        for(i=0; i<3; i++) {
+            xnum = (xnum+a[i])*xsq;
+            xden = (xden+b[i])*xsq;
+        }
+        *result = x*(xnum+a[3])/(xden+b[3]);
+        temp = *result;
+        *result = half+temp;
+        *ccum = half-temp;
+    }
+/*
+------------------------------------------------------------------
+  Evaluate  anorm  for 0.66291 <= |X| <= sqrt(32)
+------------------------------------------------------------------
+*/
+    else if(y <= root32) {
+        xnum = c[8]*y;
+        xden = y;
+        for(i=0; i<7; i++) {
+            xnum = (xnum+c[i])*y;
+            xden = (xden+d[i])*y;
+        }
+        *result = (xnum+c[7])/(xden+d[7]);
+        xsq = fifdint(y*sixten)/sixten;
+        del = (y-xsq)*(y+xsq);
+        *result = exp(-(xsq*xsq*half))*exp(-(del*half))**result;
+        *ccum = one-*result;
+        if(x > zero) {
+            temp = *result;
+            *result = *ccum;
+            *ccum = temp;
+        }
+    }
+/*
+------------------------------------------------------------------
+  Evaluate  anorm  for |X| > sqrt(32)
+------------------------------------------------------------------
+*/
+    else  {
+        *result = zero;
+        xsq = one/(x*x);
+        xnum = p[5]*xsq;
+        xden = xsq;
+        for(i=0; i<4; i++) {
+            xnum = (xnum+p[i])*xsq;
+            xden = (xden+q[i])*xsq;
+        }
+        *result = xsq*(xnum+p[4])/(xden+q[4]);
+        *result = (sqrpi-*result)/y;
+        xsq = fifdint(x*sixten)/sixten;
+        del = (x-xsq)*(x+xsq);
+        *result = exp(-(xsq*xsq*half))*exp(-(del*half))**result;
+        *ccum = one-*result;
+        if(x > zero) {
+            temp = *result;
+            *result = *ccum;
+            *ccum = temp;
+        }
+    }
+    if(*result < min) *result = 0.0e0;
+/*
+------------------------------------------------------------------
+  Fix up for negative argument, erf, etc.
+------------------------------------------------------------------
+----------Last card of ANORM ----------
+*/
+    if(*ccum < min) *ccum = 0.0e0;
+}
+void cumpoi(double *s,double *xlam,double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumpoi(double *s,double *xlam,double *cum,double *ccum)
+                    CUMulative POIsson distribution
+ 
+ 
+                              Function
+ 
+ 
+     Returns the  probability  of  S   or  fewer events in  a   Poisson
+     distribution with mean XLAM.
+ 
+ 
+                              Arguments
+ 
+ 
+     S --> Upper limit of cumulation of the Poisson.
+                                                  S is DOUBLE PRECISION
+ 
+     XLAM --> Mean of the Poisson distribution.
+                                                  XLAM is DOUBLE PRECIS
+ 
+     CUM <-- Cumulative poisson distribution.
+                                        CUM is DOUBLE PRECISION
+ 
+     CCUM <-- Compliment of Cumulative poisson distribution.
+                                                  CCUM is DOUBLE PRECIS
+ 
+ 
+                              Method
+ 
+ 
+     Uses formula  26.4.21   of   Abramowitz and  Stegun,  Handbook  of
+     Mathematical   Functions  to reduce   the   cumulative Poisson  to
+     the cumulative chi-square distribution.
+ 
+**********************************************************************
+*/
+{
+static double chi,df;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    df = 2.0e0*(*s+1.0e0);
+    chi = 2.0e0**xlam;
+    cumchi(&chi,&df,ccum,cum);
+    return;
+}
+void cumt(double *t,double *df,double *cum,double *ccum)
+/*
+**********************************************************************
+ 
+     void cumt(double *t,double *df,double *cum,double *ccum)
+                    CUMulative T-distribution
+ 
+ 
+                              Function
+ 
+ 
+     Computes the integral from -infinity to T of the t-density.
+ 
+ 
+                              Arguments
+ 
+ 
+     T --> Upper limit of integration of the t-density.
+                                                  T is DOUBLE PRECISION
+ 
+     DF --> Degrees of freedom of the t-distribution.
+                                                  DF is DOUBLE PRECISIO
+ 
+     CUM <-- Cumulative t-distribution.
+                                                  CCUM is DOUBLE PRECIS
+ 
+     CCUM <-- Compliment of Cumulative t-distribution.
+                                                  CCUM is DOUBLE PRECIS
+ 
+ 
+                              Method
+ 
+ 
+     Formula 26.5.27   of     Abramowitz  and   Stegun,    Handbook  of
+     Mathematical Functions  is   used   to  reduce the  t-distribution
+     to an incomplete beta.
+ 
+**********************************************************************
+*/
+{
+static double K2 = 0.5e0;
+static double xx,a,oma,tt,yy,dfptt,T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    tt = *t**t;
+    dfptt = *df+tt;
+    xx = *df/dfptt;
+    yy = tt/dfptt;
+    T1 = 0.5e0**df;
+    cumbet(&xx,&yy,&T1,&K2,&a,&oma);
+    if(!(*t <= 0.0e0)) goto S10;
+    *cum = 0.5e0*a;
+    *ccum = oma+*cum;
+    goto S20;
+S10:
+    *ccum = 0.5e0*a;
+    *cum = oma+*ccum;
+S20:
+    return;
+}
+void cumtnc(double *t,double *df,double *pnonc,double *cum,
+            double *ccum)
+/**********************************************************************
+ 
+     void cumtnc(double *t,double *df,double *pnonc,double *cum,
+                 double *ccum)
+ 
+                  CUMulative Non-Central T-distribution
+ 
+ 
+                               Function
+ 
+ 
+      Computes the integral from -infinity to T of the non-central
+      t-density.
+ 
+ 
+                               Arguments
+ 
+ 
+      T --> Upper limit of integration of the non-central t-density.
+ 
+      DF --> Degrees of freedom of the non-central t-distribution.
+ 
+      PNONC --> Non-centrality parameter of the non-central t distibutio
+ 
+      CUM <-- Cumulative t-distribution.
+ 
+      CCUM <-- Compliment of Cumulative t-distribution.
+ 
+ 
+                               Method
+ 
+      Upper tail    of  the  cumulative  noncentral t   using
+      formulae from page 532  of Johnson, Kotz,  Balakrishnan, Coninuous
+      Univariate Distributions, Vol 2, 2nd Edition.  Wiley (1995)
+ 
+      This implementation starts the calculation at i = lambda,
+      which is near the largest Di.  It then sums forward and backward.
+**********************************************************************/
+{
+#define one 1.0e0
+#define zero 0.0e0
+#define half 0.5e0
+#define two 2.0e0
+#define onep5 1.5e0
+#define conv 1.0e-7
+#define tiny 1.0e-10
+static double alghdf,b,bb,bbcent,bcent,cent,d,dcent,dpnonc,dum1,dum2,e,ecent,
+    halfdf,lambda,lnomx,lnx,omx,pnonc2,s,scent,ss,sscent,t2,term,tt,twoi,x,xi,
+    xlnd,xlne;
+static int ierr;
+static unsigned long qrevs;
+static double T1,T2,T3,T4,T5,T6,T7,T8,T9,T10;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     Case pnonc essentially zero
+*/
+    if(fabs(*pnonc) <= tiny) {
+        cumt(t,df,cum,ccum);
+        return;
+    }
+    qrevs = *t < zero;
+    if(qrevs) {
+        tt = -*t;
+        dpnonc = -*pnonc;
+    }
+    else  {
+        tt = *t;
+        dpnonc = *pnonc;
+    }
+    pnonc2 = dpnonc * dpnonc;
+    t2 = tt * tt;
+    if(fabs(tt) <= tiny) {
+        T1 = -*pnonc;
+        cumnor(&T1,cum,ccum);
+        return;
+    }
+    lambda = half * pnonc2;
+    x = *df / (*df + t2);
+    omx = one - x;
+    lnx = log(x);
+    lnomx = log(omx);
+    halfdf = half * *df;
+    alghdf = gamln(&halfdf);
+/*
+     ******************** Case i = lambda
+*/
+    cent = fifidint(lambda);
+    if(cent < one) cent = one;
+/*
+     Compute d=T(2i) in log space and offset by exp(-lambda)
+*/
+    T2 = cent + one;
+    xlnd = cent * log(lambda) - gamln(&T2) - lambda;
+    dcent = exp(xlnd);
+/*
+     Compute e=t(2i+1) in log space offset by exp(-lambda)
+*/
+    T3 = cent + onep5;
+    xlne = (cent + half) * log(lambda) - gamln(&T3) - lambda;
+    ecent = exp(xlne);
+    if(dpnonc < zero) ecent = -ecent;
+/*
+     Compute bcent=B(2*cent)
+*/
+    T4 = cent + half;
+    bratio(&halfdf,&T4,&x,&omx,&bcent,&dum1,&ierr);
+/*
+     compute bbcent=B(2*cent+1)
+*/
+    T5 = cent + one;
+    bratio(&halfdf,&T5,&x,&omx,&bbcent,&dum2,&ierr);
+/*
+     Case bcent and bbcent are essentially zero
+     Thus t is effectively infinite
+*/
+    if(bcent + bbcent < tiny) {
+        if(qrevs) {
+            *cum = zero;
+            *ccum = one;
+        }
+        else  {
+            *cum = one;
+            *ccum = zero;
+        }
+        return;
+    }
+/*
+     Case bcent and bbcent are essentially one
+     Thus t is effectively zero
+*/
+    if(dum1 + dum2 < tiny) {
+        T6 = -*pnonc;
+        cumnor(&T6,cum,ccum);
+        return;
+    }
+/*
+     First term in ccum is D*B + E*BB
+*/
+    *ccum = dcent * bcent + ecent * bbcent;
+/*
+     compute s(cent) = B(2*(cent+1)) - B(2*cent))
+*/
+    T7 = halfdf + cent + half;
+    T8 = cent + onep5;
+    scent = gamln(&T7) - gamln(&T8) - alghdf + halfdf * lnx + (cent + half) * 
+      lnomx;
+    scent = exp(scent);
+/*
+     compute ss(cent) = B(2*cent+3) - B(2*cent+1)
+*/
+    T9 = halfdf + cent + one;
+    T10 = cent + two;
+    sscent = gamln(&T9) - gamln(&T10) - alghdf + halfdf * lnx + (cent + one) * 
+      lnomx;
+    sscent = exp(sscent);
+/*
+     ******************** Sum Forward
+*/
+    xi = cent + one;
+    twoi = two * xi;
+    d = dcent;
+    e = ecent;
+    b = bcent;
+    bb = bbcent;
+    s = scent;
+    ss = sscent;
+S10:
+    b += s;
+    bb += ss;
+    d = lambda / xi * d;
+    e = lambda / (xi + half) * e;
+    term = d * b + e * bb;
+    *ccum += term;
+    s = s * omx * (*df + twoi - one) / (twoi + one);
+    ss = ss * omx * (*df + twoi) / (twoi + two);
+    xi += one;
+    twoi = two * xi;
+    if(fabs(term) > conv * *ccum) goto S10;
+/*
+     ******************** Sum Backward
+*/
+    xi = cent;
+    twoi = two * xi;
+    d = dcent;
+    e = ecent;
+    b = bcent;
+    bb = bbcent;
+    s = scent * (one + twoi) / ((*df + twoi - one) * omx);
+    ss = sscent * (two + twoi) / ((*df + twoi) * omx);
+S20:
+    b -= s;
+    bb -= ss;
+    d *= (xi / lambda);
+    e *= ((xi + half) / lambda);
+    term = d * b + e * bb;
+    *ccum += term;
+    xi -= one;
+    if(xi < half) goto S30;
+    twoi = two * xi;
+    s = s * (one + twoi) / ((*df + twoi - one) * omx);
+    ss = ss * (two + twoi) / ((*df + twoi) * omx);
+    if(fabs(term) > conv * *ccum) goto S20;
+S30:
+    if(qrevs) {
+        *cum = half * *ccum;
+        *ccum = one - *cum;
+    }
+    else  {
+        *ccum = half * *ccum;
+        *cum = one - *ccum;
+    }
+/*
+     Due to roundoff error the answer may not lie between zero and one
+     Force it to do so
+*/
+    *cum = fifdmax1(fifdmin1(*cum,one),zero);
+    *ccum = fifdmax1(fifdmin1(*ccum,one),zero);
+    return;
+#undef one
+#undef zero
+#undef half
+#undef two
+#undef onep5
+#undef conv
+#undef tiny
+}
+double devlpl(double a[],int *n,double *x)
+/*
+**********************************************************************
+ 
+     double devlpl(double a[],int *n,double *x)
+              Double precision EVALuate a PoLynomial at X
+ 
+ 
+                              Function
+ 
+ 
+     returns
+          A(1) + A(2)*X + ... + A(N)*X**(N-1)
+ 
+ 
+                              Arguments
+ 
+ 
+     A --> Array of coefficients of the polynomial.
+                                        A is DOUBLE PRECISION(N)
+ 
+     N --> Length of A, also degree of polynomial - 1.
+                                        N is INTEGER
+ 
+     X --> Point at which the polynomial is to be evaluated.
+                                        X is DOUBLE PRECISION
+ 
+**********************************************************************
+*/
+{
+static double devlpl,term;
+static int i;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    term = a[*n-1];
+    for(i= *n-1-1; i>=0; i--) term = a[i]+term**x;
+    devlpl = term;
+    return devlpl;
+}
+double dinvnr(double *p,double *q)
+/*
+**********************************************************************
+ 
+     double dinvnr(double *p,double *q)
+     Double precision NoRmal distribution INVerse
+ 
+ 
+                              Function
+ 
+ 
+     Returns X  such that CUMNOR(X)  =   P,  i.e., the  integral from -
+     infinity to X of (1/SQRT(2*PI)) EXP(-U*U/2) dU is P
+ 
+ 
+                              Arguments
+ 
+ 
+     P --> The probability whose normal deviate is sought.
+                    P is DOUBLE PRECISION
+ 
+     Q --> 1-P
+                    P is DOUBLE PRECISION
+ 
+ 
+                              Method
+ 
+ 
+     The  rational   function   on  page 95    of Kennedy  and  Gentle,
+     Statistical Computing, Marcel Dekker, NY , 1980 is used as a start
+     value for the Newton method of finding roots.
+ 
+ 
+                              Note
+ 
+ 
+     If P or Q .lt. machine EPS returns +/- DINVNR(EPS)
+ 
+**********************************************************************
+*/
+{
+#define maxit 100
+#define eps 1.0e-13
+#define r2pi 0.3989422804014326e0
+#define nhalf -0.5e0
+#define dennor(x) (r2pi*exp(nhalf*(x)*(x)))
+static double dinvnr,strtx,xcur,cum,ccum,pp,dx;
+static int i;
+static unsigned long qporq;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     FIND MINIMUM OF P AND Q
+*/
+    qporq = *p <= *q;
+    if(!qporq) goto S10;
+    pp = *p;
+    goto S20;
+S10:
+    pp = *q;
+S20:
+/*
+     INITIALIZATION STEP
+*/
+    strtx = stvaln(&pp);
+    xcur = strtx;
+/*
+     NEWTON INTERATIONS
+*/
+    for(i=1; i<=maxit; i++) {
+        cumnor(&xcur,&cum,&ccum);
+        dx = (cum-pp)/dennor(xcur);
+        xcur -= dx;
+        if(fabs(dx/xcur) < eps) goto S40;
+    }
+    dinvnr = strtx;
+/*
+     IF WE GET HERE, NEWTON HAS FAILED
+*/
+    if(!qporq) dinvnr = -dinvnr;
+    return dinvnr;
+S40:
+/*
+     IF WE GET HERE, NEWTON HAS SUCCEDED
+*/
+    dinvnr = xcur;
+    if(!qporq) dinvnr = -dinvnr;
+    return dinvnr;
+#undef maxit
+#undef eps
+#undef r2pi
+#undef nhalf
+#undef dennor
+}
+/* DEFINE DINVR */
+static void E0000(int IENTRY,int *status,double *x,double *fx,
+		  unsigned long *qleft,unsigned long *qhi,double *zabsst,
+		  double *zabsto,double *zbig,double *zrelst,
+		  double *zrelto,double *zsmall,double *zstpmu)
+{
+#define qxmon(zx,zy,zz) (int)((zx) <= (zy) && (zy) <= (zz))
+static double absstp,abstol,big,fbig,fsmall,relstp,reltol,small,step,stpmul,xhi,
+    xlb,xlo,xsave,xub,yy;
+static int i99999;
+static unsigned long qbdd,qcond,qdum1,qdum2,qincr,qlim,qok,qup;
+    switch(IENTRY){case 0: goto DINVR; case 1: goto DSTINV;}
+DINVR:
+    if(*status > 0) goto S310;
+    qcond = !qxmon(small,*x,big);
+    if(qcond) ftnstop(" SMALL, X, BIG not monotone in INVR");
+    xsave = *x;
+/*
+     See that SMALL and BIG bound the zero and set QINCR
+*/
+    *x = small;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 1;
+    goto S300;
+S10:
+    fsmall = *fx;
+    *x = big;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 2;
+    goto S300;
+S20:
+    fbig = *fx;
+    qincr = fbig > fsmall;
+    if(!qincr) goto S50;
+    if(fsmall <= 0.0e0) goto S30;
+    *status = -1;
+    *qleft = *qhi = 1;
+    return;
+S30:
+    if(fbig >= 0.0e0) goto S40;
+    *status = -1;
+    *qleft = *qhi = 0;
+    return;
+S40:
+    goto S80;
+S50:
+    if(fsmall >= 0.0e0) goto S60;
+    *status = -1;
+    *qleft = 1;
+    *qhi = 0;
+    return;
+S60:
+    if(fbig <= 0.0e0) goto S70;
+    *status = -1;
+    *qleft = 0;
+    *qhi = 1;
+    return;
+S80:
+S70:
+    *x = xsave;
+    step = fifdmax1(absstp,relstp*fabs(*x));
+/*
+      YY = F(X) - Y
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 3;
+    goto S300;
+S90:
+    yy = *fx;
+    if(!(yy == 0.0e0)) goto S100;
+    *status = 0;
+    qok = 1;
+    return;
+S100:
+    qup = qincr && yy < 0.0e0 || !qincr && yy > 0.0e0;
+/*
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+     HANDLE CASE IN WHICH WE MUST STEP HIGHER
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+*/
+    if(!qup) goto S170;
+    xlb = xsave;
+    xub = fifdmin1(xlb+step,big);
+    goto S120;
+S110:
+    if(qcond) goto S150;
+S120:
+/*
+      YY = F(XUB) - Y
+*/
+    *x = xub;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 4;
+    goto S300;
+S130:
+    yy = *fx;
+    qbdd = qincr && yy >= 0.0e0 || !qincr && yy <= 0.0e0;
+    qlim = xub >= big;
+    qcond = qbdd || qlim;
+    if(qcond) goto S140;
+    step = stpmul*step;
+    xlb = xub;
+    xub = fifdmin1(xlb+step,big);
+S140:
+    goto S110;
+S150:
+    if(!(qlim && !qbdd)) goto S160;
+    *status = -1;
+    *qleft = 0;
+    *qhi = !qincr;
+    *x = big;
+    return;
+S160:
+    goto S240;
+S170:
+/*
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+     HANDLE CASE IN WHICH WE MUST STEP LOWER
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+*/
+    xub = xsave;
+    xlb = fifdmax1(xub-step,small);
+    goto S190;
+S180:
+    if(qcond) goto S220;
+S190:
+/*
+      YY = F(XLB) - Y
+*/
+    *x = xlb;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 5;
+    goto S300;
+S200:
+    yy = *fx;
+    qbdd = qincr && yy <= 0.0e0 || !qincr && yy >= 0.0e0;
+    qlim = xlb <= small;
+    qcond = qbdd || qlim;
+    if(qcond) goto S210;
+    step = stpmul*step;
+    xub = xlb;
+    xlb = fifdmax1(xub-step,small);
+S210:
+    goto S180;
+S220:
+    if(!(qlim && !qbdd)) goto S230;
+    *status = -1;
+    *qleft = 1;
+    *qhi = qincr;
+    *x = small;
+    return;
+S240:
+S230:
+    dstzr(&xlb,&xub,&abstol,&reltol);
+/*
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+     IF WE REACH HERE, XLB AND XUB BOUND THE ZERO OF F.
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+*/
+    *status = 0;
+    goto S260;
+S250:
+    if(!(*status == 1)) goto S290;
+S260:
+    dzror(status,x,fx,&xlo,&xhi,&qdum1,&qdum2);
+    if(!(*status == 1)) goto S280;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 6;
+    goto S300;
+S280:
+S270:
+    goto S250;
+S290:
+    *x = xlo;
+    *status = 0;
+    return;
+DSTINV:
+    small = *zsmall;
+    big = *zbig;
+    absstp = *zabsst;
+    relstp = *zrelst;
+    stpmul = *zstpmu;
+    abstol = *zabsto;
+    reltol = *zrelto;
+    return;
+S300:
+/*
+     TO GET-FUNCTION-VALUE
+*/
+    *status = 1;
+    return;
+S310:
+    switch((int)i99999){case 1: goto S10;case 2: goto S20;case 3: goto S90;case 
+      4: goto S130;case 5: goto S200;case 6: goto S270;default: break;}
+#undef qxmon
+}
+void dinvr(int *status,double *x,double *fx,
+	   unsigned long *qleft,unsigned long *qhi)
+/*
+**********************************************************************
+ 
+     void dinvr(int *status,double *x,double *fx,
+           unsigned long *qleft,unsigned long *qhi)
+
+          Double precision
+          bounds the zero of the function and invokes zror
+                    Reverse Communication
+ 
+ 
+                              Function
+ 
+ 
+     Bounds the    function  and  invokes  ZROR   to perform the   zero
+     finding.  STINVR  must  have   been  called  before this   routine
+     in order to set its parameters.
+ 
+ 
+                              Arguments
+ 
+ 
+     STATUS <--> At the beginning of a zero finding problem, STATUS
+                 should be set to 0 and INVR invoked.  (The value
+                 of parameters other than X will be ignored on this cal
+ 
+                 When INVR needs the function evaluated, it will set
+                 STATUS to 1 and return.  The value of the function
+                 should be set in FX and INVR again called without
+                 changing any of its other parameters.
+ 
+                 When INVR has finished without error, it will return
+                 with STATUS 0.  In that case X is approximately a root
+                 of F(X).
+ 
+                 If INVR cannot bound the function, it returns status
+                 -1 and sets QLEFT and QHI.
+                         INTEGER STATUS
+ 
+     X <-- The value of X at which F(X) is to be evaluated.
+                         DOUBLE PRECISION X
+ 
+     FX --> The value of F(X) calculated when INVR returns with
+            STATUS = 1.
+                         DOUBLE PRECISION FX
+ 
+     QLEFT <-- Defined only if QMFINV returns .FALSE.  In that
+          case it is .TRUE. If the stepping search terminated
+          unsucessfully at SMALL.  If it is .FALSE. the search
+          terminated unsucessfully at BIG.
+                    QLEFT is LOGICAL
+ 
+     QHI <-- Defined only if QMFINV returns .FALSE.  In that
+          case it is .TRUE. if F(X) .GT. Y at the termination
+          of the search and .FALSE. if F(X) .LT. Y at the
+          termination of the search.
+                    QHI is LOGICAL
+ 
+**********************************************************************
+*/
+{
+    E0000(0,status,x,fx,qleft,qhi,NULL,NULL,NULL,NULL,NULL,NULL,NULL);
+}
+void dstinv(double *zsmall,double *zbig,double *zabsst,
+	    double *zrelst,double *zstpmu,double *zabsto,
+	    double *zrelto)
+/*
+**********************************************************************
+      void dstinv(double *zsmall,double *zbig,double *zabsst,
+            double *zrelst,double *zstpmu,double *zabsto,
+            double *zrelto)
+
+      Double Precision - SeT INverse finder - Reverse Communication
+                              Function
+     Concise Description - Given a monotone function F finds X
+     such that F(X) = Y.  Uses Reverse communication -- see invr.
+     This routine sets quantities needed by INVR.
+          More Precise Description of INVR -
+     F must be a monotone function, the results of QMFINV are
+     otherwise undefined.  QINCR must be .TRUE. if F is non-
+     decreasing and .FALSE. if F is non-increasing.
+     QMFINV will return .TRUE. if and only if F(SMALL) and
+     F(BIG) bracket Y, i. e.,
+          QINCR is .TRUE. and F(SMALL).LE.Y.LE.F(BIG) or
+          QINCR is .FALSE. and F(BIG).LE.Y.LE.F(SMALL)
+     if QMFINV returns .TRUE., then the X returned satisfies
+     the following condition.  let
+               TOL(X) = MAX(ABSTOL,RELTOL*ABS(X))
+     then if QINCR is .TRUE.,
+          F(X-TOL(X)) .LE. Y .LE. F(X+TOL(X))
+     and if QINCR is .FALSE.
+          F(X-TOL(X)) .GE. Y .GE. F(X+TOL(X))
+                              Arguments
+     SMALL --> The left endpoint of the interval to be
+          searched for a solution.
+                    SMALL is DOUBLE PRECISION
+     BIG --> The right endpoint of the interval to be
+          searched for a solution.
+                    BIG is DOUBLE PRECISION
+     ABSSTP, RELSTP --> The initial step size in the search
+          is MAX(ABSSTP,RELSTP*ABS(X)). See algorithm.
+                    ABSSTP is DOUBLE PRECISION
+                    RELSTP is DOUBLE PRECISION
+     STPMUL --> When a step doesn't bound the zero, the step
+                size is multiplied by STPMUL and another step
+                taken.  A popular value is 2.0
+                    DOUBLE PRECISION STPMUL
+     ABSTOL, RELTOL --> Two numbers that determine the accuracy
+          of the solution.  See function for a precise definition.
+                    ABSTOL is DOUBLE PRECISION
+                    RELTOL is DOUBLE PRECISION
+                              Method
+     Compares F(X) with Y for the input value of X then uses QINCR
+     to determine whether to step left or right to bound the
+     desired x.  the initial step size is
+          MAX(ABSSTP,RELSTP*ABS(S)) for the input value of X.
+     Iteratively steps right or left until it bounds X.
+     At each step which doesn't bound X, the step size is doubled.
+     The routine is careful never to step beyond SMALL or BIG.  If
+     it hasn't bounded X at SMALL or BIG, QMFINV returns .FALSE.
+     after setting QLEFT and QHI.
+     If X is successfully bounded then Algorithm R of the paper
+     'Two Efficient Algorithms with Guaranteed Convergence for
+     Finding a Zero of a Function' by J. C. P. Bus and
+     T. J. Dekker in ACM Transactions on Mathematical
+     Software, Volume 1, No. 4 page 330 (DEC. '75) is employed
+     to find the zero of the function F(X)-Y. This is routine
+     QRZERO.
+**********************************************************************
+*/
+{
+    E0000(1,NULL,NULL,NULL,NULL,NULL,zabsst,zabsto,zbig,zrelst,zrelto,zsmall,
+    zstpmu);
+}
+double dt1(double *p,double *q,double *df)
+/*
+**********************************************************************
+ 
+     double dt1(double *p,double *q,double *df)
+     Double precision Initalize Approximation to
+           INVerse of the cumulative T distribution
+ 
+ 
+                              Function
+ 
+ 
+     Returns  the  inverse   of  the T   distribution   function, i.e.,
+     the integral from 0 to INVT of the T density is P. This is an
+     initial approximation
+ 
+ 
+                              Arguments
+ 
+ 
+     P --> The p-value whose inverse from the T distribution is
+          desired.
+                    P is DOUBLE PRECISION
+ 
+     Q --> 1-P.
+                    Q is DOUBLE PRECISION
+ 
+     DF --> Degrees of freedom of the T distribution.
+                    DF is DOUBLE PRECISION
+ 
+**********************************************************************
+*/
+{
+static double coef[4][5] = {
+    {1.0e0,1.0e0,0.0e0,0.0e0,0.0e0},
+    {3.0e0,16.0e0,5.0e0,0.0e0,0.0e0},
+    {-15.0e0,17.0e0,19.0e0,3.0e0,0.0e0},
+    {-945.0e0,-1920.0e0,1482.0e0,776.0e0,79.0e0}
+};
+static double denom[4] = {
+    4.0e0,96.0e0,384.0e0,92160.0e0
+};
+static int ideg[4] = {
+    2,3,4,5
+};
+static double dt1,denpow,sum,term,x,xp,xx;
+static int i;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    x = fabs(dinvnr(p,q));
+    xx = x*x;
+    sum = x;
+    denpow = 1.0e0;
+    for(i=0; i<4; i++) {
+        term = devlpl(&coef[i][0],&ideg[i],&xx)*x;
+        denpow *= *df;
+        sum += (term/(denpow*denom[i]));
+    }
+    if(!(*p >= 0.5e0)) goto S20;
+    xp = sum;
+    goto S30;
+S20:
+    xp = -sum;
+S30:
+    dt1 = xp;
+    return dt1;
+}
+/* DEFINE DZROR */
+static void E0001(int IENTRY,int *status,double *x,double *fx,
+		  double *xlo,double *xhi,unsigned long *qleft,
+		  unsigned long *qhi,double *zabstl,double *zreltl,
+		  double *zxhi,double *zxlo)
+{
+#define ftol(zx) (0.5e0*fifdmax1(abstol,reltol*fabs((zx))))
+static double a,abstol,b,c,d,fa,fb,fc,fd,fda,fdb,m,mb,p,q,reltol,tol,w,xxhi,xxlo;
+static int ext,i99999;
+static unsigned long first,qrzero;
+    switch(IENTRY){case 0: goto DZROR; case 1: goto DSTZR;}
+DZROR:
+    if(*status > 0) goto S280;
+    *xlo = xxlo;
+    *xhi = xxhi;
+    b = *x = *xlo;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 1;
+    goto S270;
+S10:
+    fb = *fx;
+    *xlo = *xhi;
+    a = *x = *xlo;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 2;
+    goto S270;
+S20:
+/*
+     Check that F(ZXLO) < 0 < F(ZXHI)  or
+                F(ZXLO) > 0 > F(ZXHI)
+*/
+    if(!(fb < 0.0e0)) goto S40;
+    if(!(*fx < 0.0e0)) goto S30;
+    *status = -1;
+    *qleft = *fx < fb;
+    *qhi = 0;
+    return;
+S40:
+S30:
+    if(!(fb > 0.0e0)) goto S60;
+    if(!(*fx > 0.0e0)) goto S50;
+    *status = -1;
+    *qleft = *fx > fb;
+    *qhi = 1;
+    return;
+S60:
+S50:
+    fa = *fx;
+    first = 1;
+S70:
+    c = a;
+    fc = fa;
+    ext = 0;
+S80:
+    if(!(fabs(fc) < fabs(fb))) goto S100;
+    if(!(c != a)) goto S90;
+    d = a;
+    fd = fa;
+S90:
+    a = b;
+    fa = fb;
+    *xlo = c;
+    b = *xlo;
+    fb = fc;
+    c = a;
+    fc = fa;
+S100:
+    tol = ftol(*xlo);
+    m = (c+b)*.5e0;
+    mb = m-b;
+    if(!(fabs(mb) > tol)) goto S240;
+    if(!(ext > 3)) goto S110;
+    w = mb;
+    goto S190;
+S110:
+    tol = fifdsign(tol,mb);
+    p = (b-a)*fb;
+    if(!first) goto S120;
+    q = fa-fb;
+    first = 0;
+    goto S130;
+S120:
+    fdb = (fd-fb)/(d-b);
+    fda = (fd-fa)/(d-a);
+    p = fda*p;
+    q = fdb*fa-fda*fb;
+S130:
+    if(!(p < 0.0e0)) goto S140;
+    p = -p;
+    q = -q;
+S140:
+    if(ext == 3) p *= 2.0e0;
+    if(!(p*1.0e0 == 0.0e0 || p <= q*tol)) goto S150;
+    w = tol;
+    goto S180;
+S150:
+    if(!(p < mb*q)) goto S160;
+    w = p/q;
+    goto S170;
+S160:
+    w = mb;
+S190:
+S180:
+S170:
+    d = a;
+    fd = fa;
+    a = b;
+    fa = fb;
+    b += w;
+    *xlo = b;
+    *x = *xlo;
+/*
+     GET-FUNCTION-VALUE
+*/
+    i99999 = 3;
+    goto S270;
+S200:
+    fb = *fx;
+    if(!(fc*fb >= 0.0e0)) goto S210;
+    goto S70;
+S210:
+    if(!(w == mb)) goto S220;
+    ext = 0;
+    goto S230;
+S220:
+    ext += 1;
+S230:
+    goto S80;
+S240:
+    *xhi = c;
+    qrzero = fc >= 0.0e0 && fb <= 0.0e0 || fc < 0.0e0 && fb >= 0.0e0;
+    if(!qrzero) goto S250;
+    *status = 0;
+    goto S260;
+S250:
+    *status = -1;
+S260:
+    return;
+DSTZR:
+    xxlo = *zxlo;
+    xxhi = *zxhi;
+    abstol = *zabstl;
+    reltol = *zreltl;
+    return;
+S270:
+/*
+     TO GET-FUNCTION-VALUE
+*/
+    *status = 1;
+    return;
+S280:
+    switch((int)i99999){case 1: goto S10;case 2: goto S20;case 3: goto S200;
+      default: break;}
+#undef ftol
+}
+void dzror(int *status,double *x,double *fx,double *xlo,
+	   double *xhi,unsigned long *qleft,unsigned long *qhi)
+/*
+**********************************************************************
+ 
+     void dzror(int *status,double *x,double *fx,double *xlo,
+           double *xhi,unsigned long *qleft,unsigned long *qhi)
+
+     Double precision ZeRo of a function -- Reverse Communication
+ 
+ 
+                              Function
+ 
+ 
+     Performs the zero finding.  STZROR must have been called before
+     this routine in order to set its parameters.
+ 
+ 
+                              Arguments
+ 
+ 
+     STATUS <--> At the beginning of a zero finding problem, STATUS
+                 should be set to 0 and ZROR invoked.  (The value
+                 of other parameters will be ignored on this call.)
+ 
+                 When ZROR needs the function evaluated, it will set
+                 STATUS to 1 and return.  The value of the function
+                 should be set in FX and ZROR again called without
+                 changing any of its other parameters.
+ 
+                 When ZROR has finished without error, it will return
+                 with STATUS 0.  In that case (XLO,XHI) bound the answe
+ 
+                 If ZROR finds an error (which implies that F(XLO)-Y an
+                 F(XHI)-Y have the same sign, it returns STATUS -1.  In
+                 this case, XLO and XHI are undefined.
+                         INTEGER STATUS
+ 
+     X <-- The value of X at which F(X) is to be evaluated.
+                         DOUBLE PRECISION X
+ 
+     FX --> The value of F(X) calculated when ZROR returns with
+            STATUS = 1.
+                         DOUBLE PRECISION FX
+ 
+     XLO <-- When ZROR returns with STATUS = 0, XLO bounds the
+             inverval in X containing the solution below.
+                         DOUBLE PRECISION XLO
+ 
+     XHI <-- When ZROR returns with STATUS = 0, XHI bounds the
+             inverval in X containing the solution above.
+                         DOUBLE PRECISION XHI
+ 
+     QLEFT <-- .TRUE. if the stepping search terminated unsucessfully
+                at XLO.  If it is .FALSE. the search terminated
+                unsucessfully at XHI.
+                    QLEFT is LOGICAL
+ 
+     QHI <-- .TRUE. if F(X) .GT. Y at the termination of the
+              search and .FALSE. if F(X) .LT. Y at the
+              termination of the search.
+                    QHI is LOGICAL
+ 
+**********************************************************************
+*/
+{
+    E0001(0,status,x,fx,xlo,xhi,qleft,qhi,NULL,NULL,NULL,NULL);
+}
+void dstzr(double *zxlo,double *zxhi,double *zabstl,double *zreltl)
+/*
+**********************************************************************
+     void dstzr(double *zxlo,double *zxhi,double *zabstl,double *zreltl)
+     Double precision SeT ZeRo finder - Reverse communication version
+                              Function
+     Sets quantities needed by ZROR.  The function of ZROR
+     and the quantities set is given here.
+     Concise Description - Given a function F
+     find XLO such that F(XLO) = 0.
+          More Precise Description -
+     Input condition. F is a double precision function of a single
+     double precision argument and XLO and XHI are such that
+          F(XLO)*F(XHI)  .LE.  0.0
+     If the input condition is met, QRZERO returns .TRUE.
+     and output values of XLO and XHI satisfy the following
+          F(XLO)*F(XHI)  .LE. 0.
+          ABS(F(XLO)  .LE. ABS(F(XHI)
+          ABS(XLO-XHI)  .LE. TOL(X)
+     where
+          TOL(X) = MAX(ABSTOL,RELTOL*ABS(X))
+     If this algorithm does not find XLO and XHI satisfying
+     these conditions then QRZERO returns .FALSE.  This
+     implies that the input condition was not met.
+                              Arguments
+     XLO --> The left endpoint of the interval to be
+           searched for a solution.
+                    XLO is DOUBLE PRECISION
+     XHI --> The right endpoint of the interval to be
+           for a solution.
+                    XHI is DOUBLE PRECISION
+     ABSTOL, RELTOL --> Two numbers that determine the accuracy
+                      of the solution.  See function for a
+                      precise definition.
+                    ABSTOL is DOUBLE PRECISION
+                    RELTOL is DOUBLE PRECISION
+                              Method
+     Algorithm R of the paper 'Two Efficient Algorithms with
+     Guaranteed Convergence for Finding a Zero of a Function'
+     by J. C. P. Bus and T. J. Dekker in ACM Transactions on
+     Mathematical Software, Volume 1, no. 4 page 330
+     (Dec. '75) is employed to find the zero of F(X)-Y.
+**********************************************************************
+*/
+{
+    E0001(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,zabstl,zreltl,zxhi,zxlo);
+}
+double erf1(double *x)
+/*
+-----------------------------------------------------------------------
+             EVALUATION OF THE REAL ERROR FUNCTION
+-----------------------------------------------------------------------
+*/
+{
+static double c = .564189583547756e0;
+static double a[5] = {
+    .771058495001320e-04,-.133733772997339e-02,.323076579225834e-01,
+    .479137145607681e-01,.128379167095513e+00
+};
+static double b[3] = {
+    .301048631703895e-02,.538971687740286e-01,.375795757275549e+00
+};
+static double p[8] = {
+    -1.36864857382717e-07,5.64195517478974e-01,7.21175825088309e+00,
+    4.31622272220567e+01,1.52989285046940e+02,3.39320816734344e+02,
+    4.51918953711873e+02,3.00459261020162e+02
+};
+static double q[8] = {
+    1.00000000000000e+00,1.27827273196294e+01,7.70001529352295e+01,
+    2.77585444743988e+02,6.38980264465631e+02,9.31354094850610e+02,
+    7.90950925327898e+02,3.00459260956983e+02
+};
+static double r[5] = {
+    2.10144126479064e+00,2.62370141675169e+01,2.13688200555087e+01,
+    4.65807828718470e+00,2.82094791773523e-01
+};
+static double s[4] = {
+    9.41537750555460e+01,1.87114811799590e+02,9.90191814623914e+01,
+    1.80124575948747e+01
+};
+static double erf1,ax,bot,t,top,x2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    ax = fabs(*x);
+    if(ax > 0.5e0) goto S10;
+    t = *x**x;
+    top = (((a[0]*t+a[1])*t+a[2])*t+a[3])*t+a[4]+1.0e0;
+    bot = ((b[0]*t+b[1])*t+b[2])*t+1.0e0;
+    erf1 = *x*(top/bot);
+    return erf1;
+S10:
+    if(ax > 4.0e0) goto S20;
+    top = ((((((p[0]*ax+p[1])*ax+p[2])*ax+p[3])*ax+p[4])*ax+p[5])*ax+p[6])*ax+p[
+      7];
+    bot = ((((((q[0]*ax+q[1])*ax+q[2])*ax+q[3])*ax+q[4])*ax+q[5])*ax+q[6])*ax+q[
+      7];
+    erf1 = 0.5e0+(0.5e0-exp(-(*x**x))*top/bot);
+    if(*x < 0.0e0) erf1 = -erf1;
+    return erf1;
+S20:
+    if(ax >= 5.8e0) goto S30;
+    x2 = *x**x;
+    t = 1.0e0/x2;
+    top = (((r[0]*t+r[1])*t+r[2])*t+r[3])*t+r[4];
+    bot = (((s[0]*t+s[1])*t+s[2])*t+s[3])*t+1.0e0;
+    erf1 = (c-top/(x2*bot))/ax;
+    erf1 = 0.5e0+(0.5e0-exp(-x2)*erf1);
+    if(*x < 0.0e0) erf1 = -erf1;
+    return erf1;
+S30:
+    erf1 = fifdsign(1.0e0,*x);
+    return erf1;
+}
+double erfc1(int *ind,double *x)
+/*
+-----------------------------------------------------------------------
+         EVALUATION OF THE COMPLEMENTARY ERROR FUNCTION
+ 
+          ERFC1(IND,X) = ERFC(X)            IF IND = 0
+          ERFC1(IND,X) = EXP(X*X)*ERFC(X)   OTHERWISE
+-----------------------------------------------------------------------
+*/
+{
+static double c = .564189583547756e0;
+static double a[5] = {
+    .771058495001320e-04,-.133733772997339e-02,.323076579225834e-01,
+    .479137145607681e-01,.128379167095513e+00
+};
+static double b[3] = {
+    .301048631703895e-02,.538971687740286e-01,.375795757275549e+00
+};
+static double p[8] = {
+    -1.36864857382717e-07,5.64195517478974e-01,7.21175825088309e+00,
+    4.31622272220567e+01,1.52989285046940e+02,3.39320816734344e+02,
+    4.51918953711873e+02,3.00459261020162e+02
+};
+static double q[8] = {
+    1.00000000000000e+00,1.27827273196294e+01,7.70001529352295e+01,
+    2.77585444743988e+02,6.38980264465631e+02,9.31354094850610e+02,
+    7.90950925327898e+02,3.00459260956983e+02
+};
+static double r[5] = {
+    2.10144126479064e+00,2.62370141675169e+01,2.13688200555087e+01,
+    4.65807828718470e+00,2.82094791773523e-01
+};
+static double s[4] = {
+    9.41537750555460e+01,1.87114811799590e+02,9.90191814623914e+01,
+    1.80124575948747e+01
+};
+static int K1 = 1;
+static double erfc1,ax,bot,e,t,top,w;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+                     ABS(X) .LE. 0.5
+*/
+    ax = fabs(*x);
+    if(ax > 0.5e0) goto S10;
+    t = *x**x;
+    top = (((a[0]*t+a[1])*t+a[2])*t+a[3])*t+a[4]+1.0e0;
+    bot = ((b[0]*t+b[1])*t+b[2])*t+1.0e0;
+    erfc1 = 0.5e0+(0.5e0-*x*(top/bot));
+    if(*ind != 0) erfc1 = exp(t)*erfc1;
+    return erfc1;
+S10:
+/*
+                  0.5 .LT. ABS(X) .LE. 4
+*/
+    if(ax > 4.0e0) goto S20;
+    top = ((((((p[0]*ax+p[1])*ax+p[2])*ax+p[3])*ax+p[4])*ax+p[5])*ax+p[6])*ax+p[
+      7];
+    bot = ((((((q[0]*ax+q[1])*ax+q[2])*ax+q[3])*ax+q[4])*ax+q[5])*ax+q[6])*ax+q[
+      7];
+    erfc1 = top/bot;
+    goto S40;
+S20:
+/*
+                      ABS(X) .GT. 4
+*/
+    if(*x <= -5.6e0) goto S60;
+    if(*ind != 0) goto S30;
+    if(*x > 100.0e0) goto S70;
+    if(*x**x > -exparg(&K1)) goto S70;
+S30:
+    t = pow(1.0e0/ *x,2.0);
+    top = (((r[0]*t+r[1])*t+r[2])*t+r[3])*t+r[4];
+    bot = (((s[0]*t+s[1])*t+s[2])*t+s[3])*t+1.0e0;
+    erfc1 = (c-t*top/bot)/ax;
+S40:
+/*
+                      FINAL ASSEMBLY
+*/
+    if(*ind == 0) goto S50;
+    if(*x < 0.0e0) erfc1 = 2.0e0*exp(*x**x)-erfc1;
+    return erfc1;
+S50:
+    w = *x**x;
+    t = w;
+    e = w-t;
+    erfc1 = (0.5e0+(0.5e0-e))*exp(-t)*erfc1;
+    if(*x < 0.0e0) erfc1 = 2.0e0-erfc1;
+    return erfc1;
+S60:
+/*
+             LIMIT VALUE FOR LARGE NEGATIVE X
+*/
+    erfc1 = 2.0e0;
+    if(*ind != 0) erfc1 = 2.0e0*exp(*x**x);
+    return erfc1;
+S70:
+/*
+             LIMIT VALUE FOR LARGE POSITIVE X
+                       WHEN IND = 0
+*/
+    erfc1 = 0.0e0;
+    return erfc1;
+}
+double esum(int *mu,double *x)
+/*
+-----------------------------------------------------------------------
+                    EVALUATION OF EXP(MU + X)
+-----------------------------------------------------------------------
+*/
+{
+static double esum,w;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*x > 0.0e0) goto S10;
+    if(*mu < 0) goto S20;
+    w = (double)*mu+*x;
+    if(w > 0.0e0) goto S20;
+    esum = exp(w);
+    return esum;
+S10:
+    if(*mu > 0) goto S20;
+    w = (double)*mu+*x;
+    if(w < 0.0e0) goto S20;
+    esum = exp(w);
+    return esum;
+S20:
+    w = *mu;
+    esum = exp(w)*exp(*x);
+    return esum;
+}
+double exparg(int *l)
+/*
+--------------------------------------------------------------------
+     IF L = 0 THEN  EXPARG(L) = THE LARGEST POSITIVE W FOR WHICH
+     EXP(W) CAN BE COMPUTED.
+ 
+     IF L IS NONZERO THEN  EXPARG(L) = THE LARGEST NEGATIVE W FOR
+     WHICH THE COMPUTED VALUE OF EXP(W) IS NONZERO.
+ 
+     NOTE... ONLY AN APPROXIMATE VALUE FOR EXPARG(L) IS NEEDED.
+--------------------------------------------------------------------
+*/
+{
+static int K1 = 4;
+static int K2 = 9;
+static int K3 = 10;
+static double exparg,lnb;
+static int b,m;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    b = ipmpar(&K1);
+    if(b != 2) goto S10;
+    lnb = .69314718055995e0;
+    goto S40;
+S10:
+    if(b != 8) goto S20;
+    lnb = 2.0794415416798e0;
+    goto S40;
+S20:
+    if(b != 16) goto S30;
+    lnb = 2.7725887222398e0;
+    goto S40;
+S30:
+    lnb = log((double)b);
+S40:
+    if(*l == 0) goto S50;
+    m = ipmpar(&K2)-1;
+    exparg = 0.99999e0*((double)m*lnb);
+    return exparg;
+S50:
+    m = ipmpar(&K3);
+    exparg = 0.99999e0*((double)m*lnb);
+    return exparg;
+}
+double fpser(double *a,double *b,double *x,double *eps)
+/*
+-----------------------------------------------------------------------
+ 
+                 EVALUATION OF I (A,B)
+                                X
+ 
+          FOR B .LT. MIN(EPS,EPS*A) AND X .LE. 0.5.
+ 
+-----------------------------------------------------------------------
+ 
+                  SET  FPSER = X**A
+*/
+{
+static int K1 = 1;
+static double fpser,an,c,s,t,tol;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    fpser = 1.0e0;
+    if(*a <= 1.e-3**eps) goto S10;
+    fpser = 0.0e0;
+    t = *a*log(*x);
+    if(t < exparg(&K1)) return fpser;
+    fpser = exp(t);
+S10:
+/*
+                NOTE THAT 1/B(A,B) = B
+*/
+    fpser = *b/ *a*fpser;
+    tol = *eps/ *a;
+    an = *a+1.0e0;
+    t = *x;
+    s = t/an;
+S20:
+    an += 1.0e0;
+    t = *x*t;
+    c = t/an;
+    s += c;
+    if(fabs(c) > tol) goto S20;
+    fpser *= (1.0e0+*a*s);
+    return fpser;
+}
+double gam1(double *a)
+/*
+     ------------------------------------------------------------------
+     COMPUTATION OF 1/GAMMA(A+1) - 1  FOR -0.5 .LE. A .LE. 1.5
+     ------------------------------------------------------------------
+*/
+{
+static double s1 = .273076135303957e+00;
+static double s2 = .559398236957378e-01;
+static double p[7] = {
+    .577215664901533e+00,-.409078193005776e+00,-.230975380857675e+00,
+    .597275330452234e-01,.766968181649490e-02,-.514889771323592e-02,
+    .589597428611429e-03
+};
+static double q[5] = {
+    .100000000000000e+01,.427569613095214e+00,.158451672430138e+00,
+    .261132021441447e-01,.423244297896961e-02
+};
+static double r[9] = {
+    -.422784335098468e+00,-.771330383816272e+00,-.244757765222226e+00,
+    .118378989872749e+00,.930357293360349e-03,-.118290993445146e-01,
+    .223047661158249e-02,.266505979058923e-03,-.132674909766242e-03
+};
+static double gam1,bot,d,t,top,w,T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    t = *a;
+    d = *a-0.5e0;
+    if(d > 0.0e0) t = d-0.5e0;
+    T1 = t;
+    if(T1 < 0) goto S40;
+    else if(T1 == 0) goto S10;
+    else  goto S20;
+S10:
+    gam1 = 0.0e0;
+    return gam1;
+S20:
+    top = (((((p[6]*t+p[5])*t+p[4])*t+p[3])*t+p[2])*t+p[1])*t+p[0];
+    bot = (((q[4]*t+q[3])*t+q[2])*t+q[1])*t+1.0e0;
+    w = top/bot;
+    if(d > 0.0e0) goto S30;
+    gam1 = *a*w;
+    return gam1;
+S30:
+    gam1 = t/ *a*(w-0.5e0-0.5e0);
+    return gam1;
+S40:
+    top = (((((((r[8]*t+r[7])*t+r[6])*t+r[5])*t+r[4])*t+r[3])*t+r[2])*t+r[1])*t+
+      r[0];
+    bot = (s2*t+s1)*t+1.0e0;
+    w = top/bot;
+    if(d > 0.0e0) goto S50;
+    gam1 = *a*(w+0.5e0+0.5e0);
+    return gam1;
+S50:
+    gam1 = t*w/ *a;
+    return gam1;
+}
+void gaminv(double *a,double *x,double *x0,double *p,double *q,
+	    int *ierr)
+/*
+ ----------------------------------------------------------------------
+            INVERSE INCOMPLETE GAMMA RATIO FUNCTION
+ 
+     GIVEN POSITIVE A, AND NONEGATIVE P AND Q WHERE P + Q = 1.
+     THEN X IS COMPUTED WHERE P(A,X) = P AND Q(A,X) = Q. SCHRODER
+     ITERATION IS EMPLOYED. THE ROUTINE ATTEMPTS TO COMPUTE X
+     TO 10 SIGNIFICANT DIGITS IF THIS IS POSSIBLE FOR THE
+     PARTICULAR COMPUTER ARITHMETIC BEING USED.
+ 
+                      ------------
+ 
+     X IS A VARIABLE. IF P = 0 THEN X IS ASSIGNED THE VALUE 0,
+     AND IF Q = 0 THEN X IS SET TO THE LARGEST FLOATING POINT
+     NUMBER AVAILABLE. OTHERWISE, GAMINV ATTEMPTS TO OBTAIN
+     A SOLUTION FOR P(A,X) = P AND Q(A,X) = Q. IF THE ROUTINE
+     IS SUCCESSFUL THEN THE SOLUTION IS STORED IN X.
+ 
+     X0 IS AN OPTIONAL INITIAL APPROXIMATION FOR X. IF THE USER
+     DOES NOT WISH TO SUPPLY AN INITIAL APPROXIMATION, THEN SET
+     X0 .LE. 0.
+ 
+     IERR IS A VARIABLE THAT REPORTS THE STATUS OF THE RESULTS.
+     WHEN THE ROUTINE TERMINATES, IERR HAS ONE OF THE FOLLOWING
+     VALUES ...
+ 
+       IERR =  0    THE SOLUTION WAS OBTAINED. ITERATION WAS
+                    NOT USED.
+       IERR.GT.0    THE SOLUTION WAS OBTAINED. IERR ITERATIONS
+                    WERE PERFORMED.
+       IERR = -2    (INPUT ERROR) A .LE. 0
+       IERR = -3    NO SOLUTION WAS OBTAINED. THE RATIO Q/A
+                    IS TOO LARGE.
+       IERR = -4    (INPUT ERROR) P + Q .NE. 1
+       IERR = -6    20 ITERATIONS WERE PERFORMED. THE MOST
+                    RECENT VALUE OBTAINED FOR X IS GIVEN.
+                    THIS CANNOT OCCUR IF X0 .LE. 0.
+       IERR = -7    ITERATION FAILED. NO VALUE IS GIVEN FOR X.
+                    THIS MAY OCCUR WHEN X IS APPROXIMATELY 0.
+       IERR = -8    A VALUE FOR X HAS BEEN OBTAINED, BUT THE
+                    ROUTINE IS NOT CERTAIN OF ITS ACCURACY.
+                    ITERATION CANNOT BE PERFORMED IN THIS
+                    CASE. IF X0 .LE. 0, THIS CAN OCCUR ONLY
+                    WHEN P OR Q IS APPROXIMATELY 0. IF X0 IS
+                    POSITIVE THEN THIS CAN OCCUR WHEN A IS
+                    EXCEEDINGLY CLOSE TO X AND A IS EXTREMELY
+                    LARGE (SAY A .GE. 1.E20).
+ ----------------------------------------------------------------------
+     WRITTEN BY ALFRED H. MORRIS, JR.
+        NAVAL SURFACE WEAPONS CENTER
+        DAHLGREN, VIRGINIA
+     -------------------
+*/
+{
+static double a0 = 3.31125922108741e0;
+static double a1 = 11.6616720288968e0;
+static double a2 = 4.28342155967104e0;
+static double a3 = .213623493715853e0;
+static double b1 = 6.61053765625462e0;
+static double b2 = 6.40691597760039e0;
+static double b3 = 1.27364489782223e0;
+static double b4 = .036117081018842e0;
+static double c = .577215664901533e0;
+static double ln10 = 2.302585e0;
+static double tol = 1.e-5;
+static double amin[2] = {
+    500.0e0,100.0e0
+};
+static double bmin[2] = {
+    1.e-28,1.e-13
+};
+static double dmin[2] = {
+    1.e-06,1.e-04
+};
+static double emin[2] = {
+    2.e-03,6.e-03
+};
+static double eps0[2] = {
+    1.e-10,1.e-08
+};
+static int K1 = 1;
+static int K2 = 2;
+static int K3 = 3;
+static int K8 = 0;
+static double am1,amax,ap1,ap2,ap3,apn,b,c1,c2,c3,c4,c5,d,e,e2,eps,g,h,pn,qg,qn,
+    r,rta,s,s2,sum,t,u,w,xmax,xmin,xn,y,z;
+static int iop;
+static double T4,T5,T6,T7,T9;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     ****** E, XMIN, AND XMAX ARE MACHINE DEPENDENT CONSTANTS.
+            E IS THE SMALLEST NUMBER FOR WHICH 1.0 + E .GT. 1.0.
+            XMIN IS THE SMALLEST POSITIVE NUMBER AND XMAX IS THE
+            LARGEST POSITIVE NUMBER.
+*/
+    e = spmpar(&K1);
+    xmin = spmpar(&K2);
+    xmax = spmpar(&K3);
+    *x = 0.0e0;
+    if(*a <= 0.0e0) goto S300;
+    t = *p+*q-1.e0;
+    if(fabs(t) > e) goto S320;
+    *ierr = 0;
+    if(*p == 0.0e0) return;
+    if(*q == 0.0e0) goto S270;
+    if(*a == 1.0e0) goto S280;
+    e2 = 2.0e0*e;
+    amax = 0.4e-10/(e*e);
+    iop = 1;
+    if(e > 1.e-10) iop = 2;
+    eps = eps0[iop-1];
+    xn = *x0;
+    if(*x0 > 0.0e0) goto S160;
+/*
+        SELECTION OF THE INITIAL APPROXIMATION XN OF X
+                       WHEN A .LT. 1
+*/
+    if(*a > 1.0e0) goto S80;
+    T4 = *a+1.0e0;
+    g = Xgamm(&T4);
+    qg = *q*g;
+    if(qg == 0.0e0) goto S360;
+    b = qg/ *a;
+    if(qg > 0.6e0**a) goto S40;
+    if(*a >= 0.30e0 || b < 0.35e0) goto S10;
+    t = exp(-(b+c));
+    u = t*exp(t);
+    xn = t*exp(u);
+    goto S160;
+S10:
+    if(b >= 0.45e0) goto S40;
+    if(b == 0.0e0) goto S360;
+    y = -log(b);
+    s = 0.5e0+(0.5e0-*a);
+    z = log(y);
+    t = y-s*z;
+    if(b < 0.15e0) goto S20;
+    xn = y-s*log(t)-log(1.0e0+s/(t+1.0e0));
+    goto S220;
+S20:
+    if(b <= 0.01e0) goto S30;
+    u = ((t+2.0e0*(3.0e0-*a))*t+(2.0e0-*a)*(3.0e0-*a))/((t+(5.0e0-*a))*t+2.0e0);
+    xn = y-s*log(t)-log(u);
+    goto S220;
+S30:
+    c1 = -(s*z);
+    c2 = -(s*(1.0e0+c1));
+    c3 = s*((0.5e0*c1+(2.0e0-*a))*c1+(2.5e0-1.5e0**a));
+    c4 = -(s*(((c1/3.0e0+(2.5e0-1.5e0**a))*c1+((*a-6.0e0)**a+7.0e0))*c1+(
+      (11.0e0**a-46.0)**a+47.0e0)/6.0e0));
+    c5 = -(s*((((-(c1/4.0e0)+(11.0e0**a-17.0e0)/6.0e0)*c1+((-(3.0e0**a)+13.0e0)*
+      *a-13.0e0))*c1+0.5e0*(((2.0e0**a-25.0e0)**a+72.0e0)**a-61.0e0))*c1+((
+      (25.0e0**a-195.0e0)**a+477.0e0)**a-379.0e0)/12.0e0));
+    xn = (((c5/y+c4)/y+c3)/y+c2)/y+c1+y;
+    if(*a > 1.0e0) goto S220;
+    if(b > bmin[iop-1]) goto S220;
+    *x = xn;
+    return;
+S40:
+    if(b**q > 1.e-8) goto S50;
+    xn = exp(-(*q/ *a+c));
+    goto S70;
+S50:
+    if(*p <= 0.9e0) goto S60;
+    T5 = -*q;
+    xn = exp((alnrel(&T5)+gamln1(a))/ *a);
+    goto S70;
+S60:
+    xn = exp(log(*p*g)/ *a);
+S70:
+    if(xn == 0.0e0) goto S310;
+    t = 0.5e0+(0.5e0-xn/(*a+1.0e0));
+    xn /= t;
+    goto S160;
+S80:
+/*
+        SELECTION OF THE INITIAL APPROXIMATION XN OF X
+                       WHEN A .GT. 1
+*/
+    if(*q <= 0.5e0) goto S90;
+    w = log(*p);
+    goto S100;
+S90:
+    w = log(*q);
+S100:
+    t = sqrt(-(2.0e0*w));
+    s = t-(((a3*t+a2)*t+a1)*t+a0)/((((b4*t+b3)*t+b2)*t+b1)*t+1.0e0);
+    if(*q > 0.5e0) s = -s;
+    rta = sqrt(*a);
+    s2 = s*s;
+    xn = *a+s*rta+(s2-1.0e0)/3.0e0+s*(s2-7.0e0)/(36.0e0*rta)-((3.0e0*s2+7.0e0)*
+      s2-16.0e0)/(810.0e0**a)+s*((9.0e0*s2+256.0e0)*s2-433.0e0)/(38880.0e0**a*
+      rta);
+    xn = fifdmax1(xn,0.0e0);
+    if(*a < amin[iop-1]) goto S110;
+    *x = xn;
+    d = 0.5e0+(0.5e0-*x/ *a);
+    if(fabs(d) <= dmin[iop-1]) return;
+S110:
+    if(*p <= 0.5e0) goto S130;
+    if(xn < 3.0e0**a) goto S220;
+    y = -(w+gamln(a));
+    d = fifdmax1(2.0e0,*a*(*a-1.0e0));
+    if(y < ln10*d) goto S120;
+    s = 1.0e0-*a;
+    z = log(y);
+    goto S30;
+S120:
+    t = *a-1.0e0;
+    T6 = -(t/(xn+1.0e0));
+    xn = y+t*log(xn)-alnrel(&T6);
+    T7 = -(t/(xn+1.0e0));
+    xn = y+t*log(xn)-alnrel(&T7);
+    goto S220;
+S130:
+    ap1 = *a+1.0e0;
+    if(xn > 0.70e0*ap1) goto S170;
+    w += gamln(&ap1);
+    if(xn > 0.15e0*ap1) goto S140;
+    ap2 = *a+2.0e0;
+    ap3 = *a+3.0e0;
+    *x = exp((w+*x)/ *a);
+    *x = exp((w+*x-log(1.0e0+*x/ap1*(1.0e0+*x/ap2)))/ *a);
+    *x = exp((w+*x-log(1.0e0+*x/ap1*(1.0e0+*x/ap2)))/ *a);
+    *x = exp((w+*x-log(1.0e0+*x/ap1*(1.0e0+*x/ap2*(1.0e0+*x/ap3))))/ *a);
+    xn = *x;
+    if(xn > 1.e-2*ap1) goto S140;
+    if(xn <= emin[iop-1]*ap1) return;
+    goto S170;
+S140:
+    apn = ap1;
+    t = xn/apn;
+    sum = 1.0e0+t;
+S150:
+    apn += 1.0e0;
+    t *= (xn/apn);
+    sum += t;
+    if(t > 1.e-4) goto S150;
+    t = w-log(sum);
+    xn = exp((xn+t)/ *a);
+    xn *= (1.0e0-(*a*log(xn)-xn-t)/(*a-xn));
+    goto S170;
+S160:
+/*
+                 SCHRODER ITERATION USING P
+*/
+    if(*p > 0.5e0) goto S220;
+S170:
+    if(*p <= 1.e10*xmin) goto S350;
+    am1 = *a-0.5e0-0.5e0;
+S180:
+    if(*a <= amax) goto S190;
+    d = 0.5e0+(0.5e0-xn/ *a);
+    if(fabs(d) <= e2) goto S350;
+S190:
+    if(*ierr >= 20) goto S330;
+    *ierr += 1;
+    gratio(a,&xn,&pn,&qn,&K8);
+    if(pn == 0.0e0 || qn == 0.0e0) goto S350;
+    r = rcomp(a,&xn);
+    if(r == 0.0e0) goto S350;
+    t = (pn-*p)/r;
+    w = 0.5e0*(am1-xn);
+    if(fabs(t) <= 0.1e0 && fabs(w*t) <= 0.1e0) goto S200;
+    *x = xn*(1.0e0-t);
+    if(*x <= 0.0e0) goto S340;
+    d = fabs(t);
+    goto S210;
+S200:
+    h = t*(1.0e0+w*t);
+    *x = xn*(1.0e0-h);
+    if(*x <= 0.0e0) goto S340;
+    if(fabs(w) >= 1.0e0 && fabs(w)*t*t <= eps) return;
+    d = fabs(h);
+S210:
+    xn = *x;
+    if(d > tol) goto S180;
+    if(d <= eps) return;
+    if(fabs(*p-pn) <= tol**p) return;
+    goto S180;
+S220:
+/*
+                 SCHRODER ITERATION USING Q
+*/
+    if(*q <= 1.e10*xmin) goto S350;
+    am1 = *a-0.5e0-0.5e0;
+S230:
+    if(*a <= amax) goto S240;
+    d = 0.5e0+(0.5e0-xn/ *a);
+    if(fabs(d) <= e2) goto S350;
+S240:
+    if(*ierr >= 20) goto S330;
+    *ierr += 1;
+    gratio(a,&xn,&pn,&qn,&K8);
+    if(pn == 0.0e0 || qn == 0.0e0) goto S350;
+    r = rcomp(a,&xn);
+    if(r == 0.0e0) goto S350;
+    t = (*q-qn)/r;
+    w = 0.5e0*(am1-xn);
+    if(fabs(t) <= 0.1e0 && fabs(w*t) <= 0.1e0) goto S250;
+    *x = xn*(1.0e0-t);
+    if(*x <= 0.0e0) goto S340;
+    d = fabs(t);
+    goto S260;
+S250:
+    h = t*(1.0e0+w*t);
+    *x = xn*(1.0e0-h);
+    if(*x <= 0.0e0) goto S340;
+    if(fabs(w) >= 1.0e0 && fabs(w)*t*t <= eps) return;
+    d = fabs(h);
+S260:
+    xn = *x;
+    if(d > tol) goto S230;
+    if(d <= eps) return;
+    if(fabs(*q-qn) <= tol**q) return;
+    goto S230;
+S270:
+/*
+                       SPECIAL CASES
+*/
+    *x = xmax;
+    return;
+S280:
+    if(*q < 0.9e0) goto S290;
+    T9 = -*p;
+    *x = -alnrel(&T9);
+    return;
+S290:
+    *x = -log(*q);
+    return;
+S300:
+/*
+                       ERROR RETURN
+*/
+    *ierr = -2;
+    return;
+S310:
+    *ierr = -3;
+    return;
+S320:
+    *ierr = -4;
+    return;
+S330:
+    *ierr = -6;
+    return;
+S340:
+    *ierr = -7;
+    return;
+S350:
+    *x = xn;
+    *ierr = -8;
+    return;
+S360:
+    *x = xmax;
+    *ierr = -8;
+    return;
+}
+double gamln(double *a)
+/*
+-----------------------------------------------------------------------
+            EVALUATION OF LN(GAMMA(A)) FOR POSITIVE A
+-----------------------------------------------------------------------
+     WRITTEN BY ALFRED H. MORRIS
+          NAVAL SURFACE WARFARE CENTER
+          DAHLGREN, VIRGINIA
+--------------------------
+     D = 0.5*(LN(2*PI) - 1)
+--------------------------
+*/
+{
+static double c0 = .833333333333333e-01;
+static double c1 = -.277777777760991e-02;
+static double c2 = .793650666825390e-03;
+static double c3 = -.595202931351870e-03;
+static double c4 = .837308034031215e-03;
+static double c5 = -.165322962780713e-02;
+static double d = .418938533204673e0;
+static double gamln,t,w;
+static int i,n;
+static double T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*a > 0.8e0) goto S10;
+    gamln = gamln1(a)-log(*a);
+    return gamln;
+S10:
+    if(*a > 2.25e0) goto S20;
+    t = *a-0.5e0-0.5e0;
+    gamln = gamln1(&t);
+    return gamln;
+S20:
+    if(*a >= 10.0e0) goto S40;
+    n = (long)(*a - 1.25e0);
+    t = *a;
+    w = 1.0e0;
+    for(i=1; i<=n; i++) {
+        t -= 1.0e0;
+        w = t*w;
+    }
+    T1 = t-1.0e0;
+    gamln = gamln1(&T1)+log(w);
+    return gamln;
+S40:
+    t = pow(1.0e0/ *a,2.0);
+    w = (((((c5*t+c4)*t+c3)*t+c2)*t+c1)*t+c0)/ *a;
+    gamln = d+w+(*a-0.5e0)*(log(*a)-1.0e0);
+    return gamln;
+}
+double gamln1(double *a)
+/*
+-----------------------------------------------------------------------
+     EVALUATION OF LN(GAMMA(1 + A)) FOR -0.2 .LE. A .LE. 1.25
+-----------------------------------------------------------------------
+*/
+{
+static double p0 = .577215664901533e+00;
+static double p1 = .844203922187225e+00;
+static double p2 = -.168860593646662e+00;
+static double p3 = -.780427615533591e+00;
+static double p4 = -.402055799310489e+00;
+static double p5 = -.673562214325671e-01;
+static double p6 = -.271935708322958e-02;
+static double q1 = .288743195473681e+01;
+static double q2 = .312755088914843e+01;
+static double q3 = .156875193295039e+01;
+static double q4 = .361951990101499e+00;
+static double q5 = .325038868253937e-01;
+static double q6 = .667465618796164e-03;
+static double r0 = .422784335098467e+00;
+static double r1 = .848044614534529e+00;
+static double r2 = .565221050691933e+00;
+static double r3 = .156513060486551e+00;
+static double r4 = .170502484022650e-01;
+static double r5 = .497958207639485e-03;
+static double s1 = .124313399877507e+01;
+static double s2 = .548042109832463e+00;
+static double s3 = .101552187439830e+00;
+static double s4 = .713309612391000e-02;
+static double s5 = .116165475989616e-03;
+static double gamln1,w,x;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*a >= 0.6e0) goto S10;
+    w = ((((((p6**a+p5)**a+p4)**a+p3)**a+p2)**a+p1)**a+p0)/((((((q6**a+q5)**a+
+      q4)**a+q3)**a+q2)**a+q1)**a+1.0e0);
+    gamln1 = -(*a*w);
+    return gamln1;
+S10:
+    x = *a-0.5e0-0.5e0;
+    w = (((((r5*x+r4)*x+r3)*x+r2)*x+r1)*x+r0)/(((((s5*x+s4)*x+s3)*x+s2)*x+s1)*x
+      +1.0e0);
+    gamln1 = x*w;
+    return gamln1;
+}
+double Xgamm(double *a)
+/*
+-----------------------------------------------------------------------
+ 
+         EVALUATION OF THE GAMMA FUNCTION FOR REAL ARGUMENTS
+ 
+                           -----------
+ 
+     GAMMA(A) IS ASSIGNED THE VALUE 0 WHEN THE GAMMA FUNCTION CANNOT
+     BE COMPUTED.
+ 
+-----------------------------------------------------------------------
+     WRITTEN BY ALFRED H. MORRIS, JR.
+          NAVAL SURFACE WEAPONS CENTER
+          DAHLGREN, VIRGINIA
+-----------------------------------------------------------------------
+*/
+{
+static double d = .41893853320467274178e0;
+static double pi = 3.1415926535898e0;
+static double r1 = .820756370353826e-03;
+static double r2 = -.595156336428591e-03;
+static double r3 = .793650663183693e-03;
+static double r4 = -.277777777770481e-02;
+static double r5 = .833333333333333e-01;
+static double p[7] = {
+    .539637273585445e-03,.261939260042690e-02,.204493667594920e-01,
+    .730981088720487e-01,.279648642639792e+00,.553413866010467e+00,1.0e0
+};
+static double q[7] = {
+    -.832979206704073e-03,.470059485860584e-02,.225211131035340e-01,
+    -.170458969313360e+00,-.567902761974940e-01,.113062953091122e+01,1.0e0
+};
+static int K2 = 3;
+static int K3 = 0;
+static double Xgamm,bot,g,lnx,s,t,top,w,x,z;
+static int i,j,m,n,T1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    Xgamm = 0.0e0;
+    x = *a;
+    if(fabs(*a) >= 15.0e0) goto S110;
+/*
+-----------------------------------------------------------------------
+            EVALUATION OF GAMMA(A) FOR ABS(A) .LT. 15
+-----------------------------------------------------------------------
+*/
+    t = 1.0e0;
+    m = fifidint(*a)-1;
+/*
+     LET T BE THE PRODUCT OF A-J WHEN A .GE. 2
+*/
+    T1 = m;
+    if(T1 < 0) goto S40;
+    else if(T1 == 0) goto S30;
+    else  goto S10;
+S10:
+    for(j=1; j<=m; j++) {
+        x -= 1.0e0;
+        t = x*t;
+    }
+S30:
+    x -= 1.0e0;
+    goto S80;
+S40:
+/*
+     LET T BE THE PRODUCT OF A+J WHEN A .LT. 1
+*/
+    t = *a;
+    if(*a > 0.0e0) goto S70;
+    m = -m-1;
+    if(m == 0) goto S60;
+    for(j=1; j<=m; j++) {
+        x += 1.0e0;
+        t = x*t;
+    }
+S60:
+    x += (0.5e0+0.5e0);
+    t = x*t;
+    if(t == 0.0e0) return Xgamm;
+S70:
+/*
+     THE FOLLOWING CODE CHECKS IF 1/T CAN OVERFLOW. THIS
+     CODE MAY BE OMITTED IF DESIRED.
+*/
+    if(fabs(t) >= 1.e-30) goto S80;
+    if(fabs(t)*spmpar(&K2) <= 1.0001e0) return Xgamm;
+    Xgamm = 1.0e0/t;
+    return Xgamm;
+S80:
+/*
+     COMPUTE GAMMA(1 + X) FOR  0 .LE. X .LT. 1
+*/
+    top = p[0];
+    bot = q[0];
+    for(i=1; i<7; i++) {
+        top = p[i]+x*top;
+        bot = q[i]+x*bot;
+    }
+    Xgamm = top/bot;
+/*
+     TERMINATION
+*/
+    if(*a < 1.0e0) goto S100;
+    Xgamm *= t;
+    return Xgamm;
+S100:
+    Xgamm /= t;
+    return Xgamm;
+S110:
+/*
+-----------------------------------------------------------------------
+            EVALUATION OF GAMMA(A) FOR ABS(A) .GE. 15
+-----------------------------------------------------------------------
+*/
+    if(fabs(*a) >= 1.e3) return Xgamm;
+    if(*a > 0.0e0) goto S120;
+    x = -*a;
+    n = (long)(x);
+    t = x-(double)n;
+    if(t > 0.9e0) t = 1.0e0-t;
+    s = sin(pi*t)/pi;
+    if(fifmod(n,2) == 0) s = -s;
+    if(s == 0.0e0) return Xgamm;
+S120:
+/*
+     COMPUTE THE MODIFIED ASYMPTOTIC SUM
+*/
+    t = 1.0e0/(x*x);
+    g = ((((r1*t+r2)*t+r3)*t+r4)*t+r5)/x;
+/*
+     ONE MAY REPLACE THE NEXT STATEMENT WITH  LNX = ALOG(X)
+     BUT LESS ACCURACY WILL NORMALLY BE OBTAINED.
+*/
+    lnx = log(x);
+/*
+     FINAL ASSEMBLY
+*/
+    z = x;
+    g = d+g+(z-0.5e0)*(lnx-1.e0);
+    w = g;
+    t = g-w;
+    if(w > 0.99999e0*exparg(&K3)) return Xgamm;
+    Xgamm = exp(w)*(1.0e0+t);
+    if(*a < 0.0e0) Xgamm = 1.0e0/(Xgamm*s)/x;
+    return Xgamm;
+}
+void grat1(double *a,double *x,double *r,double *p,double *q,
+	   double *eps)
+{
+static int K2 = 0;
+static double a2n,a2nm1,am0,an,an0,b2n,b2nm1,c,cma,g,h,j,l,sum,t,tol,w,z,T1,T3;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+-----------------------------------------------------------------------
+        EVALUATION OF THE INCOMPLETE GAMMA RATIO FUNCTIONS
+                      P(A,X) AND Q(A,X)
+     IT IS ASSUMED THAT A .LE. 1.  EPS IS THE TOLERANCE TO BE USED.
+     THE INPUT ARGUMENT R HAS THE VALUE E**(-X)*X**A/GAMMA(A).
+-----------------------------------------------------------------------
+*/
+    if(*a**x == 0.0e0) goto S120;
+    if(*a == 0.5e0) goto S100;
+    if(*x < 1.1e0) goto S10;
+    goto S60;
+S10:
+/*
+             TAYLOR SERIES FOR P(A,X)/X**A
+*/
+    an = 3.0e0;
+    c = *x;
+    sum = *x/(*a+3.0e0);
+    tol = 0.1e0**eps/(*a+1.0e0);
+S20:
+    an += 1.0e0;
+    c = -(c*(*x/an));
+    t = c/(*a+an);
+    sum += t;
+    if(fabs(t) > tol) goto S20;
+    j = *a**x*((sum/6.0e0-0.5e0/(*a+2.0e0))**x+1.0e0/(*a+1.0e0));
+    z = *a*log(*x);
+    h = gam1(a);
+    g = 1.0e0+h;
+    if(*x < 0.25e0) goto S30;
+    if(*a < *x/2.59e0) goto S50;
+    goto S40;
+S30:
+    if(z > -.13394e0) goto S50;
+S40:
+    w = exp(z);
+    *p = w*g*(0.5e0+(0.5e0-j));
+    *q = 0.5e0+(0.5e0-*p);
+    return;
+S50:
+    l = rexp(&z);
+    w = 0.5e0+(0.5e0+l);
+    *q = (w*j-l)*g-h;
+    if(*q < 0.0e0) goto S90;
+    *p = 0.5e0+(0.5e0-*q);
+    return;
+S60:
+/*
+              CONTINUED FRACTION EXPANSION
+*/
+    a2nm1 = a2n = 1.0e0;
+    b2nm1 = *x;
+    b2n = *x+(1.0e0-*a);
+    c = 1.0e0;
+S70:
+    a2nm1 = *x*a2n+c*a2nm1;
+    b2nm1 = *x*b2n+c*b2nm1;
+    am0 = a2nm1/b2nm1;
+    c += 1.0e0;
+    cma = c-*a;
+    a2n = a2nm1+cma*a2n;
+    b2n = b2nm1+cma*b2n;
+    an0 = a2n/b2n;
+    if(fabs(an0-am0) >= *eps*an0) goto S70;
+    *q = *r*an0;
+    *p = 0.5e0+(0.5e0-*q);
+    return;
+S80:
+/*
+                SPECIAL CASES
+*/
+    *p = 0.0e0;
+    *q = 1.0e0;
+    return;
+S90:
+    *p = 1.0e0;
+    *q = 0.0e0;
+    return;
+S100:
+    if(*x >= 0.25e0) goto S110;
+    T1 = sqrt(*x);
+    *p = erf1(&T1);
+    *q = 0.5e0+(0.5e0-*p);
+    return;
+S110:
+    T3 = sqrt(*x);
+    *q = erfc1(&K2,&T3);
+    *p = 0.5e0+(0.5e0-*q);
+    return;
+S120:
+    if(*x <= *a) goto S80;
+    goto S90;
+}
+void gratio(double *a,double *x,double *ans,double *qans,int *ind)
+/*
+ ----------------------------------------------------------------------
+        EVALUATION OF THE INCOMPLETE GAMMA RATIO FUNCTIONS
+                      P(A,X) AND Q(A,X)
+ 
+                        ----------
+ 
+     IT IS ASSUMED THAT A AND X ARE NONNEGATIVE, WHERE A AND X
+     ARE NOT BOTH 0.
+ 
+     ANS AND QANS ARE VARIABLES. GRATIO ASSIGNS ANS THE VALUE
+     P(A,X) AND QANS THE VALUE Q(A,X). IND MAY BE ANY INTEGER.
+     IF IND = 0 THEN THE USER IS REQUESTING AS MUCH ACCURACY AS
+     POSSIBLE (UP TO 14 SIGNIFICANT DIGITS). OTHERWISE, IF
+     IND = 1 THEN ACCURACY IS REQUESTED TO WITHIN 1 UNIT OF THE
+     6-TH SIGNIFICANT DIGIT, AND IF IND .NE. 0,1 THEN ACCURACY
+     IS REQUESTED TO WITHIN 1 UNIT OF THE 3RD SIGNIFICANT DIGIT.
+ 
+     ERROR RETURN ...
+        ANS IS ASSIGNED THE VALUE 2 WHEN A OR X IS NEGATIVE,
+     WHEN A*X = 0, OR WHEN P(A,X) AND Q(A,X) ARE INDETERMINANT.
+     P(A,X) AND Q(A,X) ARE COMPUTATIONALLY INDETERMINANT WHEN
+     X IS EXCEEDINGLY CLOSE TO A AND A IS EXTREMELY LARGE.
+ ----------------------------------------------------------------------
+     WRITTEN BY ALFRED H. MORRIS, JR.
+        NAVAL SURFACE WEAPONS CENTER
+        DAHLGREN, VIRGINIA
+     --------------------
+*/
+{
+static double alog10 = 2.30258509299405e0;
+static double d10 = -.185185185185185e-02;
+static double d20 = .413359788359788e-02;
+static double d30 = .649434156378601e-03;
+static double d40 = -.861888290916712e-03;
+static double d50 = -.336798553366358e-03;
+static double d60 = .531307936463992e-03;
+static double d70 = .344367606892378e-03;
+static double rt2pin = .398942280401433e0;
+static double rtpi = 1.77245385090552e0;
+static double third = .333333333333333e0;
+static double acc0[3] = {
+    5.e-15,5.e-7,5.e-4
+};
+static double big[3] = {
+    20.0e0,14.0e0,10.0e0
+};
+static double d0[13] = {
+    .833333333333333e-01,-.148148148148148e-01,.115740740740741e-02,
+    .352733686067019e-03,-.178755144032922e-03,.391926317852244e-04,
+    -.218544851067999e-05,-.185406221071516e-05,.829671134095309e-06,
+    -.176659527368261e-06,.670785354340150e-08,.102618097842403e-07,
+    -.438203601845335e-08
+};
+static double d1[12] = {
+    -.347222222222222e-02,.264550264550265e-02,-.990226337448560e-03,
+    .205761316872428e-03,-.401877572016461e-06,-.180985503344900e-04,
+    .764916091608111e-05,-.161209008945634e-05,.464712780280743e-08,
+    .137863344691572e-06,-.575254560351770e-07,.119516285997781e-07
+};
+static double d2[10] = {
+    -.268132716049383e-02,.771604938271605e-03,.200938786008230e-05,
+    -.107366532263652e-03,.529234488291201e-04,-.127606351886187e-04,
+    .342357873409614e-07,.137219573090629e-05,-.629899213838006e-06,
+    .142806142060642e-06
+};
+static double d3[8] = {
+    .229472093621399e-03,-.469189494395256e-03,.267720632062839e-03,
+    -.756180167188398e-04,-.239650511386730e-06,.110826541153473e-04,
+    -.567495282699160e-05,.142309007324359e-05
+};
+static double d4[6] = {
+    .784039221720067e-03,-.299072480303190e-03,-.146384525788434e-05,
+    .664149821546512e-04,-.396836504717943e-04,.113757269706784e-04
+};
+static double d5[4] = {
+    -.697281375836586e-04,.277275324495939e-03,-.199325705161888e-03,
+    .679778047793721e-04
+};
+static double d6[2] = {
+    -.592166437353694e-03,.270878209671804e-03
+};
+static double e00[3] = {
+    .25e-3,.25e-1,.14e0
+};
+static double x00[3] = {
+    31.0e0,17.0e0,9.7e0
+};
+static int K1 = 1;
+static int K2 = 0;
+static double a2n,a2nm1,acc,am0,amn,an,an0,apn,b2n,b2nm1,c,c0,c1,c2,c3,c4,c5,c6,
+    cma,e,e0,g,h,j,l,r,rta,rtx,s,sum,t,t1,tol,twoa,u,w,x0,y,z;
+static int i,iop,m,max,n;
+static double wk[20],T3;
+static int T4,T5;
+static double T6,T7;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+     --------------------
+     ****** E IS A MACHINE DEPENDENT CONSTANT. E IS THE SMALLEST
+            FLOATING POINT NUMBER FOR WHICH 1.0 + E .GT. 1.0 .
+*/
+    e = spmpar(&K1);
+    if(*a < 0.0e0 || *x < 0.0e0) goto S430;
+    if(*a == 0.0e0 && *x == 0.0e0) goto S430;
+    if(*a**x == 0.0e0) goto S420;
+    iop = *ind+1;
+    if(iop != 1 && iop != 2) iop = 3;
+    acc = fifdmax1(acc0[iop-1],e);
+    e0 = e00[iop-1];
+    x0 = x00[iop-1];
+/*
+            SELECT THE APPROPRIATE ALGORITHM
+*/
+    if(*a >= 1.0e0) goto S10;
+    if(*a == 0.5e0) goto S390;
+    if(*x < 1.1e0) goto S160;
+    t1 = *a*log(*x)-*x;
+    u = *a*exp(t1);
+    if(u == 0.0e0) goto S380;
+    r = u*(1.0e0+gam1(a));
+    goto S250;
+S10:
+    if(*a >= big[iop-1]) goto S30;
+    if(*a > *x || *x >= x0) goto S20;
+    twoa = *a+*a;
+    m = fifidint(twoa);
+    if(twoa != (double)m) goto S20;
+    i = m/2;
+    if(*a == (double)i) goto S210;
+    goto S220;
+S20:
+    t1 = *a*log(*x)-*x;
+    r = exp(t1)/Xgamm(a);
+    goto S40;
+S30:
+    l = *x/ *a;
+    if(l == 0.0e0) goto S370;
+    s = 0.5e0+(0.5e0-l);
+    z = rlog(&l);
+    if(z >= 700.0e0/ *a) goto S410;
+    y = *a*z;
+    rta = sqrt(*a);
+    if(fabs(s) <= e0/rta) goto S330;
+    if(fabs(s) <= 0.4e0) goto S270;
+    t = pow(1.0e0/ *a,2.0);
+    t1 = (((0.75e0*t-1.0e0)*t+3.5e0)*t-105.0e0)/(*a*1260.0e0);
+    t1 -= y;
+    r = rt2pin*rta*exp(t1);
+S40:
+    if(r == 0.0e0) goto S420;
+    if(*x <= fifdmax1(*a,alog10)) goto S50;
+    if(*x < x0) goto S250;
+    goto S100;
+S50:
+/*
+                 TAYLOR SERIES FOR P/R
+*/
+    apn = *a+1.0e0;
+    t = *x/apn;
+    wk[0] = t;
+    for(n=2; n<=20; n++) {
+        apn += 1.0e0;
+        t *= (*x/apn);
+        if(t <= 1.e-3) goto S70;
+        wk[n-1] = t;
+    }
+    n = 20;
+S70:
+    sum = t;
+    tol = 0.5e0*acc;
+S80:
+    apn += 1.0e0;
+    t *= (*x/apn);
+    sum += t;
+    if(t > tol) goto S80;
+    max = n-1;
+    for(m=1; m<=max; m++) {
+        n -= 1;
+        sum += wk[n-1];
+    }
+    *ans = r/ *a*(1.0e0+sum);
+    *qans = 0.5e0+(0.5e0-*ans);
+    return;
+S100:
+/*
+                 ASYMPTOTIC EXPANSION
+*/
+    amn = *a-1.0e0;
+    t = amn/ *x;
+    wk[0] = t;
+    for(n=2; n<=20; n++) {
+        amn -= 1.0e0;
+        t *= (amn/ *x);
+        if(fabs(t) <= 1.e-3) goto S120;
+        wk[n-1] = t;
+    }
+    n = 20;
+S120:
+    sum = t;
+S130:
+    if(fabs(t) <= acc) goto S140;
+    amn -= 1.0e0;
+    t *= (amn/ *x);
+    sum += t;
+    goto S130;
+S140:
+    max = n-1;
+    for(m=1; m<=max; m++) {
+        n -= 1;
+        sum += wk[n-1];
+    }
+    *qans = r/ *x*(1.0e0+sum);
+    *ans = 0.5e0+(0.5e0-*qans);
+    return;
+S160:
+/*
+             TAYLOR SERIES FOR P(A,X)/X**A
+*/
+    an = 3.0e0;
+    c = *x;
+    sum = *x/(*a+3.0e0);
+    tol = 3.0e0*acc/(*a+1.0e0);
+S170:
+    an += 1.0e0;
+    c = -(c*(*x/an));
+    t = c/(*a+an);
+    sum += t;
+    if(fabs(t) > tol) goto S170;
+    j = *a**x*((sum/6.0e0-0.5e0/(*a+2.0e0))**x+1.0e0/(*a+1.0e0));
+    z = *a*log(*x);
+    h = gam1(a);
+    g = 1.0e0+h;
+    if(*x < 0.25e0) goto S180;
+    if(*a < *x/2.59e0) goto S200;
+    goto S190;
+S180:
+    if(z > -.13394e0) goto S200;
+S190:
+    w = exp(z);
+    *ans = w*g*(0.5e0+(0.5e0-j));
+    *qans = 0.5e0+(0.5e0-*ans);
+    return;
+S200:
+    l = rexp(&z);
+    w = 0.5e0+(0.5e0+l);
+    *qans = (w*j-l)*g-h;
+    if(*qans < 0.0e0) goto S380;
+    *ans = 0.5e0+(0.5e0-*qans);
+    return;
+S210:
+/*
+             FINITE SUMS FOR Q WHEN A .GE. 1
+                 AND 2*A IS AN INTEGER
+*/
+    sum = exp(-*x);
+    t = sum;
+    n = 1;
+    c = 0.0e0;
+    goto S230;
+S220:
+    rtx = sqrt(*x);
+    sum = erfc1(&K2,&rtx);
+    t = exp(-*x)/(rtpi*rtx);
+    n = 0;
+    c = -0.5e0;
+S230:
+    if(n == i) goto S240;
+    n += 1;
+    c += 1.0e0;
+    t = *x*t/c;
+    sum += t;
+    goto S230;
+S240:
+    *qans = sum;
+    *ans = 0.5e0+(0.5e0-*qans);
+    return;
+S250:
+/*
+              CONTINUED FRACTION EXPANSION
+*/
+    tol = fifdmax1(5.0e0*e,acc);
+    a2nm1 = a2n = 1.0e0;
+    b2nm1 = *x;
+    b2n = *x+(1.0e0-*a);
+    c = 1.0e0;
+S260:
+    a2nm1 = *x*a2n+c*a2nm1;
+    b2nm1 = *x*b2n+c*b2nm1;
+    am0 = a2nm1/b2nm1;
+    c += 1.0e0;
+    cma = c-*a;
+    a2n = a2nm1+cma*a2n;
+    b2n = b2nm1+cma*b2n;
+    an0 = a2n/b2n;
+    if(fabs(an0-am0) >= tol*an0) goto S260;
+    *qans = r*an0;
+    *ans = 0.5e0+(0.5e0-*qans);
+    return;
+S270:
+/*
+                GENERAL TEMME EXPANSION
+*/
+    if(fabs(s) <= 2.0e0*e && *a*e*e > 3.28e-3) goto S430;
+    c = exp(-y);
+    T3 = sqrt(y);
+    w = 0.5e0*erfc1(&K1,&T3);
+    u = 1.0e0/ *a;
+    z = sqrt(z+z);
+    if(l < 1.0e0) z = -z;
+    T4 = iop-2;
+    if(T4 < 0) goto S280;
+    else if(T4 == 0) goto S290;
+    else  goto S300;
+S280:
+    if(fabs(s) <= 1.e-3) goto S340;
+    c0 = ((((((((((((d0[12]*z+d0[11])*z+d0[10])*z+d0[9])*z+d0[8])*z+d0[7])*z+d0[
+      6])*z+d0[5])*z+d0[4])*z+d0[3])*z+d0[2])*z+d0[1])*z+d0[0])*z-third;
+    c1 = (((((((((((d1[11]*z+d1[10])*z+d1[9])*z+d1[8])*z+d1[7])*z+d1[6])*z+d1[5]
+      )*z+d1[4])*z+d1[3])*z+d1[2])*z+d1[1])*z+d1[0])*z+d10;
+    c2 = (((((((((d2[9]*z+d2[8])*z+d2[7])*z+d2[6])*z+d2[5])*z+d2[4])*z+d2[3])*z+
+      d2[2])*z+d2[1])*z+d2[0])*z+d20;
+    c3 = (((((((d3[7]*z+d3[6])*z+d3[5])*z+d3[4])*z+d3[3])*z+d3[2])*z+d3[1])*z+
+      d3[0])*z+d30;
+    c4 = (((((d4[5]*z+d4[4])*z+d4[3])*z+d4[2])*z+d4[1])*z+d4[0])*z+d40;
+    c5 = (((d5[3]*z+d5[2])*z+d5[1])*z+d5[0])*z+d50;
+    c6 = (d6[1]*z+d6[0])*z+d60;
+    t = ((((((d70*u+c6)*u+c5)*u+c4)*u+c3)*u+c2)*u+c1)*u+c0;
+    goto S310;
+S290:
+    c0 = (((((d0[5]*z+d0[4])*z+d0[3])*z+d0[2])*z+d0[1])*z+d0[0])*z-third;
+    c1 = (((d1[3]*z+d1[2])*z+d1[1])*z+d1[0])*z+d10;
+    c2 = d2[0]*z+d20;
+    t = (c2*u+c1)*u+c0;
+    goto S310;
+S300:
+    t = ((d0[2]*z+d0[1])*z+d0[0])*z-third;
+S310:
+    if(l < 1.0e0) goto S320;
+    *qans = c*(w+rt2pin*t/rta);
+    *ans = 0.5e0+(0.5e0-*qans);
+    return;
+S320:
+    *ans = c*(w-rt2pin*t/rta);
+    *qans = 0.5e0+(0.5e0-*ans);
+    return;
+S330:
+/*
+               TEMME EXPANSION FOR L = 1
+*/
+    if(*a*e*e > 3.28e-3) goto S430;
+    c = 0.5e0+(0.5e0-y);
+    w = (0.5e0-sqrt(y)*(0.5e0+(0.5e0-y/3.0e0))/rtpi)/c;
+    u = 1.0e0/ *a;
+    z = sqrt(z+z);
+    if(l < 1.0e0) z = -z;
+    T5 = iop-2;
+    if(T5 < 0) goto S340;
+    else if(T5 == 0) goto S350;
+    else  goto S360;
+S340:
+    c0 = ((((((d0[6]*z+d0[5])*z+d0[4])*z+d0[3])*z+d0[2])*z+d0[1])*z+d0[0])*z-
+      third;
+    c1 = (((((d1[5]*z+d1[4])*z+d1[3])*z+d1[2])*z+d1[1])*z+d1[0])*z+d10;
+    c2 = ((((d2[4]*z+d2[3])*z+d2[2])*z+d2[1])*z+d2[0])*z+d20;
+    c3 = (((d3[3]*z+d3[2])*z+d3[1])*z+d3[0])*z+d30;
+    c4 = (d4[1]*z+d4[0])*z+d40;
+    c5 = (d5[1]*z+d5[0])*z+d50;
+    c6 = d6[0]*z+d60;
+    t = ((((((d70*u+c6)*u+c5)*u+c4)*u+c3)*u+c2)*u+c1)*u+c0;
+    goto S310;
+S350:
+    c0 = (d0[1]*z+d0[0])*z-third;
+    c1 = d1[0]*z+d10;
+    t = (d20*u+c1)*u+c0;
+    goto S310;
+S360:
+    t = d0[0]*z-third;
+    goto S310;
+S370:
+/*
+                     SPECIAL CASES
+*/
+    *ans = 0.0e0;
+    *qans = 1.0e0;
+    return;
+S380:
+    *ans = 1.0e0;
+    *qans = 0.0e0;
+    return;
+S390:
+    if(*x >= 0.25e0) goto S400;
+    T6 = sqrt(*x);
+    *ans = erf1(&T6);
+    *qans = 0.5e0+(0.5e0-*ans);
+    return;
+S400:
+    T7 = sqrt(*x);
+    *qans = erfc1(&K2,&T7);
+    *ans = 0.5e0+(0.5e0-*qans);
+    return;
+S410:
+    if(fabs(s) <= 2.0e0*e) goto S430;
+S420:
+    if(*x <= *a) goto S370;
+    goto S380;
+S430:
+/*
+                     ERROR RETURN
+*/
+    *ans = 2.0e0;
+    return;
+}
+double gsumln(double *a,double *b)
+/*
+-----------------------------------------------------------------------
+          EVALUATION OF THE FUNCTION LN(GAMMA(A + B))
+          FOR 1 .LE. A .LE. 2  AND  1 .LE. B .LE. 2
+-----------------------------------------------------------------------
+*/
+{
+static double gsumln,x,T1,T2;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    x = *a+*b-2.e0;
+    if(x > 0.25e0) goto S10;
+    T1 = 1.0e0+x;
+    gsumln = gamln1(&T1);
+    return gsumln;
+S10:
+    if(x > 1.25e0) goto S20;
+    gsumln = gamln1(&x)+alnrel(&x);
+    return gsumln;
+S20:
+    T2 = x-1.0e0;
+    gsumln = gamln1(&T2)+log(x*(1.0e0+x));
+    return gsumln;
+}
+double psi(double *xx)
+/*
+---------------------------------------------------------------------
+ 
+                 EVALUATION OF THE DIGAMMA FUNCTION
+ 
+                           -----------
+ 
+     PSI(XX) IS ASSIGNED THE VALUE 0 WHEN THE DIGAMMA FUNCTION CANNOT
+     BE COMPUTED.
+ 
+     THE MAIN COMPUTATION INVOLVES EVALUATION OF RATIONAL CHEBYSHEV
+     APPROXIMATIONS PUBLISHED IN MATH. COMP. 27, 123-127(1973) BY
+     CODY, STRECOK AND THACHER.
+ 
+---------------------------------------------------------------------
+     PSI WAS WRITTEN AT ARGONNE NATIONAL LABORATORY FOR THE FUNPACK
+     PACKAGE OF SPECIAL FUNCTION SUBROUTINES. PSI WAS MODIFIED BY
+     A.H. MORRIS (NSWC).
+---------------------------------------------------------------------
+*/
+{
+static double dx0 = 1.461632144968362341262659542325721325e0;
+static double piov4 = .785398163397448e0;
+static double p1[7] = {
+    .895385022981970e-02,.477762828042627e+01,.142441585084029e+03,
+    .118645200713425e+04,.363351846806499e+04,.413810161269013e+04,
+    .130560269827897e+04
+};
+static double p2[4] = {
+    -.212940445131011e+01,-.701677227766759e+01,-.448616543918019e+01,
+    -.648157123766197e+00
+};
+static double q1[6] = {
+    .448452573429826e+02,.520752771467162e+03,.221000799247830e+04,
+    .364127349079381e+04,.190831076596300e+04,.691091682714533e-05
+};
+static double q2[4] = {
+    .322703493791143e+02,.892920700481861e+02,.546117738103215e+02,
+    .777788548522962e+01
+};
+static int K1 = 3;
+static int K2 = 1;
+static double psi,aug,den,sgn,upper,w,x,xmax1,xmx0,xsmall,z;
+static int i,m,n,nq;
+/*
+     ..
+     .. Executable Statements ..
+*/
+/*
+---------------------------------------------------------------------
+     MACHINE DEPENDENT CONSTANTS ...
+        XMAX1  = THE SMALLEST POSITIVE FLOATING POINT CONSTANT
+                 WITH ENTIRELY INTEGER REPRESENTATION.  ALSO USED
+                 AS NEGATIVE OF LOWER BOUND ON ACCEPTABLE NEGATIVE
+                 ARGUMENTS AND AS THE POSITIVE ARGUMENT BEYOND WHICH
+                 PSI MAY BE REPRESENTED AS ALOG(X).
+        XSMALL = ABSOLUTE ARGUMENT BELOW WHICH PI*COTAN(PI*X)
+                 MAY BE REPRESENTED BY 1/X.
+---------------------------------------------------------------------
+*/
+    xmax1 = ipmpar(&K1);
+    xmax1 = fifdmin1(xmax1,1.0e0/spmpar(&K2));
+    xsmall = 1.e-9;
+    x = *xx;
+    aug = 0.0e0;
+    if(x >= 0.5e0) goto S50;
+/*
+---------------------------------------------------------------------
+     X .LT. 0.5,  USE REFLECTION FORMULA
+     PSI(1-X) = PSI(X) + PI * COTAN(PI*X)
+---------------------------------------------------------------------
+*/
+    if(fabs(x) > xsmall) goto S10;
+    if(x == 0.0e0) goto S100;
+/*
+---------------------------------------------------------------------
+     0 .LT. ABS(X) .LE. XSMALL.  USE 1/X AS A SUBSTITUTE
+     FOR  PI*COTAN(PI*X)
+---------------------------------------------------------------------
+*/
+    aug = -(1.0e0/x);
+    goto S40;
+S10:
+/*
+---------------------------------------------------------------------
+     REDUCTION OF ARGUMENT FOR COTAN
+---------------------------------------------------------------------
+*/
+    w = -x;
+    sgn = piov4;
+    if(w > 0.0e0) goto S20;
+    w = -w;
+    sgn = -sgn;
+S20:
+/*
+---------------------------------------------------------------------
+     MAKE AN ERROR EXIT IF X .LE. -XMAX1
+---------------------------------------------------------------------
+*/
+    if(w >= xmax1) goto S100;
+    nq = fifidint(w);
+    w -= (double)nq;
+    nq = fifidint(w*4.0e0);
+    w = 4.0e0*(w-(double)nq*.25e0);
+/*
+---------------------------------------------------------------------
+     W IS NOW RELATED TO THE FRACTIONAL PART OF  4.0 * X.
+     ADJUST ARGUMENT TO CORRESPOND TO VALUES IN FIRST
+     QUADRANT AND DETERMINE SIGN
+---------------------------------------------------------------------
+*/
+    n = nq/2;
+    if(n+n != nq) w = 1.0e0-w;
+    z = piov4*w;
+    m = n/2;
+    if(m+m != n) sgn = -sgn;
+/*
+---------------------------------------------------------------------
+     DETERMINE FINAL VALUE FOR  -PI*COTAN(PI*X)
+---------------------------------------------------------------------
+*/
+    n = (nq+1)/2;
+    m = n/2;
+    m += m;
+    if(m != n) goto S30;
+/*
+---------------------------------------------------------------------
+     CHECK FOR SINGULARITY
+---------------------------------------------------------------------
+*/
+    if(z == 0.0e0) goto S100;
+/*
+---------------------------------------------------------------------
+     USE COS/SIN AS A SUBSTITUTE FOR COTAN, AND
+     SIN/COS AS A SUBSTITUTE FOR TAN
+---------------------------------------------------------------------
+*/
+    aug = sgn*(cos(z)/sin(z)*4.0e0);
+    goto S40;
+S30:
+    aug = sgn*(sin(z)/cos(z)*4.0e0);
+S40:
+    x = 1.0e0-x;
+S50:
+    if(x > 3.0e0) goto S70;
+/*
+---------------------------------------------------------------------
+     0.5 .LE. X .LE. 3.0
+---------------------------------------------------------------------
+*/
+    den = x;
+    upper = p1[0]*x;
+    for(i=1; i<=5; i++) {
+        den = (den+q1[i-1])*x;
+        upper = (upper+p1[i+1-1])*x;
+    }
+    den = (upper+p1[6])/(den+q1[5]);
+    xmx0 = x-dx0;
+    psi = den*xmx0+aug;
+    return psi;
+S70:
+/*
+---------------------------------------------------------------------
+     IF X .GE. XMAX1, PSI = LN(X)
+---------------------------------------------------------------------
+*/
+    if(x >= xmax1) goto S90;
+/*
+---------------------------------------------------------------------
+     3.0 .LT. X .LT. XMAX1
+---------------------------------------------------------------------
+*/
+    w = 1.0e0/(x*x);
+    den = w;
+    upper = p2[0]*w;
+    for(i=1; i<=3; i++) {
+        den = (den+q2[i-1])*w;
+        upper = (upper+p2[i+1-1])*w;
+    }
+    aug = upper/(den+q2[3])-0.5e0/x+aug;
+S90:
+    psi = aug+log(x);
+    return psi;
+S100:
+/*
+---------------------------------------------------------------------
+     ERROR RETURN
+---------------------------------------------------------------------
+*/
+    psi = 0.0e0;
+    return psi;
+}
+double rcomp(double *a,double *x)
+/*
+     -------------------
+     EVALUATION OF EXP(-X)*X**A/GAMMA(A)
+     -------------------
+     RT2PIN = 1/SQRT(2*PI)
+     -------------------
+*/
+{
+static double rt2pin = .398942280401433e0;
+static double rcomp,t,t1,u;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    rcomp = 0.0e0;
+    if(*a >= 20.0e0) goto S20;
+    t = *a*log(*x)-*x;
+    if(*a >= 1.0e0) goto S10;
+    rcomp = *a*exp(t)*(1.0e0+gam1(a));
+    return rcomp;
+S10:
+    rcomp = exp(t)/Xgamm(a);
+    return rcomp;
+S20:
+    u = *x/ *a;
+    if(u == 0.0e0) return rcomp;
+    t = pow(1.0e0/ *a,2.0);
+    t1 = (((0.75e0*t-1.0e0)*t+3.5e0)*t-105.0e0)/(*a*1260.0e0);
+    t1 -= (*a*rlog(&u));
+    rcomp = rt2pin*sqrt(*a)*exp(t1);
+    return rcomp;
+}
+double rexp(double *x)
+/*
+-----------------------------------------------------------------------
+            EVALUATION OF THE FUNCTION EXP(X) - 1
+-----------------------------------------------------------------------
+*/
+{
+static double p1 = .914041914819518e-09;
+static double p2 = .238082361044469e-01;
+static double q1 = -.499999999085958e+00;
+static double q2 = .107141568980644e+00;
+static double q3 = -.119041179760821e-01;
+static double q4 = .595130811860248e-03;
+static double rexp,w;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(fabs(*x) > 0.15e0) goto S10;
+    rexp = *x*(((p2**x+p1)**x+1.0e0)/((((q4**x+q3)**x+q2)**x+q1)**x+1.0e0));
+    return rexp;
+S10:
+    w = exp(*x);
+    if(*x > 0.0e0) goto S20;
+    rexp = w-0.5e0-0.5e0;
+    return rexp;
+S20:
+    rexp = w*(0.5e0+(0.5e0-1.0e0/w));
+    return rexp;
+}
+double rlog(double *x)
+/*
+     -------------------
+     COMPUTATION OF  X - 1 - LN(X)
+     -------------------
+*/
+{
+static double a = .566749439387324e-01;
+static double b = .456512608815524e-01;
+static double p0 = .333333333333333e+00;
+static double p1 = -.224696413112536e+00;
+static double p2 = .620886815375787e-02;
+static double q1 = -.127408923933623e+01;
+static double q2 = .354508718369557e+00;
+static double rlog,r,t,u,w,w1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*x < 0.61e0 || *x > 1.57e0) goto S40;
+    if(*x < 0.82e0) goto S10;
+    if(*x > 1.18e0) goto S20;
+/*
+              ARGUMENT REDUCTION
+*/
+    u = *x-0.5e0-0.5e0;
+    w1 = 0.0e0;
+    goto S30;
+S10:
+    u = *x-0.7e0;
+    u /= 0.7e0;
+    w1 = a-u*0.3e0;
+    goto S30;
+S20:
+    u = 0.75e0**x-1.e0;
+    w1 = b+u/3.0e0;
+S30:
+/*
+               SERIES EXPANSION
+*/
+    r = u/(u+2.0e0);
+    t = r*r;
+    w = ((p2*t+p1)*t+p0)/((q2*t+q1)*t+1.0e0);
+    rlog = 2.0e0*t*(1.0e0/(1.0e0-r)-r*w)+w1;
+    return rlog;
+S40:
+    r = *x-0.5e0-0.5e0;
+    rlog = r-log(*x);
+    return rlog;
+}
+double rlog1(double *x)
+/*
+-----------------------------------------------------------------------
+             EVALUATION OF THE FUNCTION X - LN(1 + X)
+-----------------------------------------------------------------------
+*/
+{
+static double a = .566749439387324e-01;
+static double b = .456512608815524e-01;
+static double p0 = .333333333333333e+00;
+static double p1 = -.224696413112536e+00;
+static double p2 = .620886815375787e-02;
+static double q1 = -.127408923933623e+01;
+static double q2 = .354508718369557e+00;
+static double rlog1,h,r,t,w,w1;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*x < -0.39e0 || *x > 0.57e0) goto S40;
+    if(*x < -0.18e0) goto S10;
+    if(*x > 0.18e0) goto S20;
+/*
+              ARGUMENT REDUCTION
+*/
+    h = *x;
+    w1 = 0.0e0;
+    goto S30;
+S10:
+    h = *x+0.3e0;
+    h /= 0.7e0;
+    w1 = a-h*0.3e0;
+    goto S30;
+S20:
+    h = 0.75e0**x-0.25e0;
+    w1 = b+h/3.0e0;
+S30:
+/*
+               SERIES EXPANSION
+*/
+    r = h/(h+2.0e0);
+    t = r*r;
+    w = ((p2*t+p1)*t+p0)/((q2*t+q1)*t+1.0e0);
+    rlog1 = 2.0e0*t*(1.0e0/(1.0e0-r)-r*w)+w1;
+    return rlog1;
+S40:
+    w = *x+0.5e0+0.5e0;
+    rlog1 = *x-log(w);
+    return rlog1;
+}
+double spmpar(int *i)
+/*
+-----------------------------------------------------------------------
+ 
+     SPMPAR PROVIDES THE SINGLE PRECISION MACHINE CONSTANTS FOR
+     THE COMPUTER BEING USED. IT IS ASSUMED THAT THE ARGUMENT
+     I IS AN INTEGER HAVING ONE OF THE VALUES 1, 2, OR 3. IF THE
+     SINGLE PRECISION ARITHMETIC BEING USED HAS M BASE B DIGITS AND
+     ITS SMALLEST AND LARGEST EXPONENTS ARE EMIN AND EMAX, THEN
+ 
+        SPMPAR(1) = B**(1 - M), THE MACHINE PRECISION,
+ 
+        SPMPAR(2) = B**(EMIN - 1), THE SMALLEST MAGNITUDE,
+ 
+        SPMPAR(3) = B**EMAX*(1 - B**(-M)), THE LARGEST MAGNITUDE.
+ 
+-----------------------------------------------------------------------
+     WRITTEN BY
+        ALFRED H. MORRIS, JR.
+        NAVAL SURFACE WARFARE CENTER
+        DAHLGREN VIRGINIA
+-----------------------------------------------------------------------
+-----------------------------------------------------------------------
+     MODIFIED BY BARRY W. BROWN TO RETURN DOUBLE PRECISION MACHINE
+     CONSTANTS FOR THE COMPUTER BEING USED.  THIS MODIFICATION WAS
+     MADE AS PART OF CONVERTING BRATIO TO DOUBLE PRECISION
+-----------------------------------------------------------------------
+*/
+{
+static int K1 = 4;
+static int K2 = 8;
+static int K3 = 9;
+static int K4 = 10;
+static double spmpar,b,binv,bm1,one,w,z;
+static int emax,emin,ibeta,m;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(*i > 1) goto S10;
+    b = ipmpar(&K1);
+    m = ipmpar(&K2);
+    spmpar = pow(b,(double)(1-m));
+    return spmpar;
+S10:
+    if(*i > 2) goto S20;
+    b = ipmpar(&K1);
+    emin = ipmpar(&K3);
+    one = 1.0;
+    binv = one/b;
+    w = pow(b,(double)(emin+2));
+    spmpar = w*binv*binv*binv;
+    return spmpar;
+S20:
+    ibeta = ipmpar(&K1);
+    m = ipmpar(&K2);
+    emax = ipmpar(&K4);
+    b = ibeta;
+    bm1 = ibeta-1;
+    one = 1.0;
+    z = pow(b,(double)(m-1));
+    w = ((z-one)*b+bm1)/(b*z);
+    z = pow(b,(double)(emax-2));
+    spmpar = w*z*b*b;
+    return spmpar;
+}
+double stvaln(double *p)
+/*
+**********************************************************************
+ 
+     double stvaln(double *p)
+                    STarting VALue for Neton-Raphon
+                calculation of Normal distribution Inverse
+ 
+ 
+                              Function
+ 
+ 
+     Returns X  such that CUMNOR(X)  =   P,  i.e., the  integral from -
+     infinity to X of (1/SQRT(2*PI)) EXP(-U*U/2) dU is P
+ 
+ 
+                              Arguments
+ 
+ 
+     P --> The probability whose normal deviate is sought.
+                    P is DOUBLE PRECISION
+ 
+ 
+                              Method
+ 
+ 
+     The  rational   function   on  page 95    of Kennedy  and  Gentle,
+     Statistical Computing, Marcel Dekker, NY , 1980.
+ 
+**********************************************************************
+*/
+{
+static double xden[5] = {
+    0.993484626060e-1,0.588581570495e0,0.531103462366e0,0.103537752850e0,
+    0.38560700634e-2
+};
+static double xnum[5] = {
+    -0.322232431088e0,-1.000000000000e0,-0.342242088547e0,-0.204231210245e-1,
+    -0.453642210148e-4
+};
+static int K1 = 5;
+static double stvaln,sign,y,z;
+/*
+     ..
+     .. Executable Statements ..
+*/
+    if(!(*p <= 0.5e0)) goto S10;
+    sign = -1.0e0;
+    z = *p;
+    goto S20;
+S10:
+    sign = 1.0e0;
+    z = 1.0e0-*p;
+S20:
+    y = sqrt(-(2.0e0*log(z)));
+    stvaln = y+devlpl(xnum,&K1,&y)/devlpl(xden,&K1,&y);
+    stvaln = sign*stvaln;
+    return stvaln;
+}
+/************************************************************************
+FIFDINT:
+Truncates a double precision number to an integer and returns the
+value in a double.
+************************************************************************/
+double fifdint(double a)
+/* a     -     number to be truncated */
+{
+  long temp;
+  temp = (long)(a);
+  return (double)(temp);
+}
+/************************************************************************
+FIFDMAX1:
+returns the maximum of two numbers a and b
+************************************************************************/
+double fifdmax1(double a,double b)
+/* a     -      first number */
+/* b     -      second number */
+{
+  if (a < b) return b;
+  else return a;
+}
+/************************************************************************
+FIFDMIN1:
+returns the minimum of two numbers a and b
+************************************************************************/
+double fifdmin1(double a,double b)
+/* a     -     first number */
+/* b     -     second number */
+{
+  if (a < b) return a;
+  else return b;
+}
+/************************************************************************
+FIFDSIGN:
+transfers the sign of the variable "sign" to the variable "mag"
+************************************************************************/
+double fifdsign(double mag,double sign)
+/* mag     -     magnitude */
+/* sign    -     sign to be transfered */
+{
+  if (mag < 0) mag = -mag;
+  if (sign < 0) mag = -mag;
+  return mag;
+
+}
+/************************************************************************
+FIFIDINT:
+Truncates a double precision number to a long integer
+************************************************************************/
+long fifidint(double a)
+/* a - number to be truncated */
+{
+  return (long)(a);
+}
+/************************************************************************
+FIFMOD:
+returns the modulo of a and b
+************************************************************************/
+long fifmod(long a,long b)
+/* a - numerator */
+/* b - denominator */
+{
+  return a % b;
+}
+/************************************************************************
+FTNSTOP:
+Prints msg to standard error and then exits
+************************************************************************/
+void ftnstop(char* msg)
+/* msg - error message */
+{
+  if (msg != NULL) fprintf(stderr,"%s\n",msg);
+  exit(EXIT_FAILURE); /* EXIT_FAILURE from stdlib.h, or use an int */
+}
diff --git a/src/cdflib90/ipmpar.c b/src/cdflib90/ipmpar.c
new file mode 100644
index 0000000..c5705d3
--- /dev/null
+++ b/src/cdflib90/ipmpar.c
@@ -0,0 +1,426 @@
+int ipmpar(int*);
+/*
+-----------------------------------------------------------------------
+ 
+     IPMPAR PROVIDES THE INTEGER MACHINE CONSTANTS FOR THE COMPUTER
+     THAT IS USED. IT IS ASSUMED THAT THE ARGUMENT I IS AN INTEGER
+     HAVING ONE OF THE VALUES 1-10. IPMPAR(I) HAS THE VALUE ...
+ 
+  INTEGERS.
+ 
+     ASSUME INTEGERS ARE REPRESENTED IN THE N-DIGIT, BASE-A FORM
+ 
+               SIGN ( X(N-1)*A**(N-1) + ... + X(1)*A + X(0) )
+ 
+               WHERE 0 .LE. X(I) .LT. A FOR I=0,...,N-1.
+ 
+     IPMPAR(1) = A, THE BASE.
+ 
+     IPMPAR(2) = N, THE NUMBER OF BASE-A DIGITS.
+ 
+     IPMPAR(3) = A**N - 1, THE LARGEST MAGNITUDE.
+ 
+  FLOATING-POINT NUMBERS.
+ 
+     IT IS ASSUMED THAT THE SINGLE AND DOUBLE PRECISION FLOATING
+     POINT ARITHMETICS HAVE THE SAME BASE, SAY B, AND THAT THE
+     NONZERO NUMBERS ARE REPRESENTED IN THE FORM
+ 
+               SIGN (B**E) * (X(1)/B + ... + X(M)/B**M)
+ 
+               WHERE X(I) = 0,1,...,B-1 FOR I=1,...,M,
+               X(1) .GE. 1, AND EMIN .LE. E .LE. EMAX.
+ 
+     IPMPAR(4) = B, THE BASE.
+ 
+  SINGLE-PRECISION
+ 
+     IPMPAR(5) = M, THE NUMBER OF BASE-B DIGITS.
+ 
+     IPMPAR(6) = EMIN, THE SMALLEST EXPONENT E.
+ 
+     IPMPAR(7) = EMAX, THE LARGEST EXPONENT E.
+ 
+  DOUBLE-PRECISION
+ 
+     IPMPAR(8) = M, THE NUMBER OF BASE-B DIGITS.
+ 
+     IPMPAR(9) = EMIN, THE SMALLEST EXPONENT E.
+ 
+     IPMPAR(10) = EMAX, THE LARGEST EXPONENT E.
+ 
+-----------------------------------------------------------------------
+ 
+     TO DEFINE THIS FUNCTION FOR THE COMPUTER BEING USED REMOVE
+     THE COMMENT DELIMITORS FROM THE DEFINITIONS DIRECTLY BELOW THE NAME
+     OF THE MACHINE
+ 
+-----------------------------------------------------------------------
+ 
+     IPMPAR IS AN ADAPTATION OF THE FUNCTION I1MACH, WRITTEN BY
+     P.A. FOX, A.D. HALL, AND N.L. SCHRYER (BELL LABORATORIES).
+     IPMPAR WAS FORMED BY A.H. MORRIS (NSWC). THE CONSTANTS ARE
+     FROM BELL LABORATORIES, NSWC, AND OTHER SOURCES.
+ 
+-----------------------------------------------------------------------
+     .. Scalar Arguments ..
+*/
+int ipmpar(int *i)
+{
+static int imach[11];
+static int ipmpar;
+/*     MACHINE CONSTANTS FOR AMDAHL MACHINES. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 16;
+   imach[5] = 6;
+   imach[6] = -64;
+   imach[7] = 63;
+   imach[8] = 14;
+   imach[9] = -64;
+   imach[10] = 63;
+*/
+/*     MACHINE CONSTANTS FOR THE AT&T 3B SERIES, AT&T
+       PC 7300, AND AT&T 6300. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -125;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+*/
+/*     MACHINE CONSTANTS FOR THE BURROUGHS 1700 SYSTEM. */
+/*
+   imach[1] = 2;
+   imach[2] = 33;
+   imach[3] = 8589934591;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -256;
+   imach[7] = 255;
+   imach[8] = 60;
+   imach[9] = -256;
+   imach[10] = 255;
+*/
+/*     MACHINE CONSTANTS FOR THE BURROUGHS 5700 SYSTEM. */
+/*
+   imach[1] = 2;
+   imach[2] = 39;
+   imach[3] = 549755813887;
+   imach[4] = 8;
+   imach[5] = 13;
+   imach[6] = -50;
+   imach[7] = 76;
+   imach[8] = 26;
+   imach[9] = -50;
+   imach[10] = 76;
+*/
+/*     MACHINE CONSTANTS FOR THE BURROUGHS 6700/7700 SYSTEMS. */
+/*
+   imach[1] = 2;
+   imach[2] = 39;
+   imach[3] = 549755813887;
+   imach[4] = 8;
+   imach[5] = 13;
+   imach[6] = -50;
+   imach[7] = 76;
+   imach[8] = 26;
+   imach[9] = -32754;
+   imach[10] = 32780;
+*/
+/*     MACHINE CONSTANTS FOR THE CDC 6000/7000 SERIES
+       60 BIT ARITHMETIC, AND THE CDC CYBER 995 64 BIT
+       ARITHMETIC (NOS OPERATING SYSTEM). */
+/*
+   imach[1] = 2;
+   imach[2] = 48;
+   imach[3] = 281474976710655;
+   imach[4] = 2;
+   imach[5] = 48;
+   imach[6] = -974;
+   imach[7] = 1070;
+   imach[8] = 95;
+   imach[9] = -926;
+   imach[10] = 1070;
+*/
+/*     MACHINE CONSTANTS FOR THE CDC CYBER 995 64 BIT
+       ARITHMETIC (NOS/VE OPERATING SYSTEM). */
+/*
+   imach[1] = 2;
+   imach[2] = 63;
+   imach[3] = 9223372036854775807;
+   imach[4] = 2;
+   imach[5] = 48;
+   imach[6] = -4096;
+   imach[7] = 4095;
+   imach[8] = 96;
+   imach[9] = -4096;
+   imach[10] = 4095;
+*/
+/*     MACHINE CONSTANTS FOR THE CRAY 1, XMP, 2, AND 3. */
+/*
+   imach[1] = 2;
+   imach[2] = 63;
+   imach[3] = 9223372036854775807;
+   imach[4] = 2;
+   imach[5] = 47;
+   imach[6] = -8189;
+   imach[7] = 8190;
+   imach[8] = 94;
+   imach[9] = -8099;
+   imach[10] = 8190;
+*/
+/*     MACHINE CONSTANTS FOR THE DATA GENERAL ECLIPSE S/200. */
+/*
+   imach[1] = 2;
+   imach[2] = 15;
+   imach[3] = 32767;
+   imach[4] = 16;
+   imach[5] = 6;
+   imach[6] = -64;
+   imach[7] = 63;
+   imach[8] = 14;
+   imach[9] = -64;
+   imach[10] = 63;
+*/
+/*     MACHINE CONSTANTS FOR THE HARRIS 220. */
+/*
+   imach[1] = 2;
+   imach[2] = 23;
+   imach[3] = 8388607;
+   imach[4] = 2;
+   imach[5] = 23;
+   imach[6] = -127;
+   imach[7] = 127;
+   imach[8] = 38;
+   imach[9] = -127;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE HONEYWELL 600/6000
+       AND DPS 8/70 SERIES. */
+/*
+   imach[1] = 2;
+   imach[2] = 35;
+   imach[3] = 34359738367;
+   imach[4] = 2;
+   imach[5] = 27;
+   imach[6] = -127;
+   imach[7] = 127;
+   imach[8] = 63;
+   imach[9] = -127;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE HP 2100
+       3 WORD DOUBLE PRECISION OPTION WITH FTN4 */
+/*
+   imach[1] = 2;
+   imach[2] = 15;
+   imach[3] = 32767;
+   imach[4] = 2;
+   imach[5] = 23;
+   imach[6] = -128;
+   imach[7] = 127;
+   imach[8] = 39;
+   imach[9] = -128;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE HP 2100
+       4 WORD DOUBLE PRECISION OPTION WITH FTN4 */
+/*
+   imach[1] = 2;
+   imach[2] = 15;
+   imach[3] = 32767;
+   imach[4] = 2;
+   imach[5] = 23;
+   imach[6] = -128;
+   imach[7] = 127;
+   imach[8] = 55;
+   imach[9] = -128;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE HP 9000. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -126;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+*/
+/*     MACHINE CONSTANTS FOR THE IBM 360/370 SERIES,
+       THE ICL 2900, THE ITEL AS/6, THE XEROX SIGMA
+       5/7/9 AND THE SEL SYSTEMS 85/86. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 16;
+   imach[5] = 6;
+   imach[6] = -64;
+   imach[7] = 63;
+   imach[8] = 14;
+   imach[9] = -64;
+   imach[10] = 63;
+*/
+/*     MACHINE CONSTANTS FOR THE IBM PC. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -125;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+*/
+/*     MACHINE CONSTANTS FOR THE MACINTOSH II - ABSOFT
+       MACFORTRAN II. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -125;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+*/
+/*     MACHINE CONSTANTS FOR THE MICROVAX - VMS FORTRAN. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -127;
+   imach[7] = 127;
+   imach[8] = 56;
+   imach[9] = -127;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE PDP-10 (KA PROCESSOR). */
+/*
+   imach[1] = 2;
+   imach[2] = 35;
+   imach[3] = 34359738367;
+   imach[4] = 2;
+   imach[5] = 27;
+   imach[6] = -128;
+   imach[7] = 127;
+   imach[8] = 54;
+   imach[9] = -101;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE PDP-10 (KI PROCESSOR). */
+/*
+   imach[1] = 2;
+   imach[2] = 35;
+   imach[3] = 34359738367;
+   imach[4] = 2;
+   imach[5] = 27;
+   imach[6] = -128;
+   imach[7] = 127;
+   imach[8] = 62;
+   imach[9] = -128;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE PDP-11 FORTRAN SUPPORTING
+       32-BIT INTEGER ARITHMETIC. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -127;
+   imach[7] = 127;
+   imach[8] = 56;
+   imach[9] = -127;
+   imach[10] = 127;
+*/
+/*     MACHINE CONSTANTS FOR THE SEQUENT BALANCE 8000. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -125;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+*/
+/*     MACHINE CONSTANTS FOR THE SILICON GRAPHICS IRIS-4D
+       SERIES (MIPS R3000 PROCESSOR). */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -125;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+*/
+/*     MACHINE CONSTANTS FOR IEEE ARITHMETIC MACHINES, SUCH AS THE AT&T
+       3B SERIES, MOTOROLA 68000 BASED MACHINES (E.G. SUN 3 AND AT&T
+       PC 7300), AND 8087 BASED MICROS (E.G. IBM PC AND AT&T 6300). */
+
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -125;
+   imach[7] = 128;
+   imach[8] = 53;
+   imach[9] = -1021;
+   imach[10] = 1024;
+
+/*     MACHINE CONSTANTS FOR THE UNIVAC 1100 SERIES. */
+/*
+   imach[1] = 2;
+   imach[2] = 35;
+   imach[3] = 34359738367;
+   imach[4] = 2;
+   imach[5] = 27;
+   imach[6] = -128;
+   imach[7] = 127;
+   imach[8] = 60;
+   imach[9] = -1024;
+   imach[10] = 1023;
+*/
+/*     MACHINE CONSTANTS FOR THE VAX 11/780. */
+/*
+   imach[1] = 2;
+   imach[2] = 31;
+   imach[3] = 2147483647;
+   imach[4] = 2;
+   imach[5] = 24;
+   imach[6] = -127;
+   imach[7] = 127;
+   imach[8] = 56;
+   imach[9] = -127;
+   imach[10] = 127;
+*/
+    ipmpar = imach[*i];
+    return ipmpar;
+}
diff --git a/src/cdflib90/readme b/src/cdflib90/readme
new file mode 100644
index 0000000..4dbf23b
--- /dev/null
+++ b/src/cdflib90/readme
@@ -0,0 +1,384 @@
+
+
+
+
+
+
+
+
+
+
+
+                                    DCDFLIB
+
+               Library of C Routines for Cumulative Distribution
+                 Functions, Inverses, and Other Parameters
+
+                                  Version 1.1
+
+                                (November, 1997)
+
+
+
+
+
+
+                    Summary Documentation of Each Routine
+
+
+
+
+
+
+
+
+                            Compiled and Written by:
+
+                                 Barry W. Brown
+                                  James Lovato
+                                  Kathy Russell
+
+
+
+
+
+
+
+                     Department of Biomathematics, Box 237
+                     The University of Texas, M.D. Anderson Cancer Center
+                     1515 Holcombe Boulevard
+                     Houston, TX      77030
+
+
+ This work was supported by grant CA-16672 from the National Cancer Institute.
+

+
+                          SUMMARY OF DCDFLIB
+
+This  library  contains routines  to compute  cumulative  distribution
+functions, inverses, and    parameters  of the  distribution  for  the
+following set of statistical distributions:
+
+    (1) Beta
+    (2) Binomial
+    (3) Chi-square
+    (4) Noncentral Chi-square
+    (5) F
+    (6) Noncentral F
+    (7) Gamma
+    (8) Negative Binomial
+    (9) Normal
+    (10) Poisson
+    (11) Student's t
+    (12) Noncentral t
+
+Given values of all but one parameter of a distribution, the other is
+computed. These calculations are done with C pointers to Doubles.
+
+          -------------------- WARNINGS --------------------
+
+The F and  Noncentral F distribution are  not necessarily monotone  in
+either degree  of  freedom argument.  Consequently,  there  may be two
+degree of freedom arguments that satisfy the specified condition.  An
+arbitrary one of these will be found by the cdf routines.
+
+The  amount of computation  required for  the noncentral chisquare and
+noncentral F  distribution    is proportional  to  the  value  of  the
+noncentrality   parameter.  Very large values  of   this parameter can
+require  immense   numbers of   computation.  Consequently,  when  the
+noncentrality parameter is to  be calculated, the upper limit searched
+is 10,000.  For the noncentral t, the computation time is proportional
+to the noncentrality parameter so the upper limit searched is 10000.
+
+        -------------------- END WARNINGS --------------------
+
+
+                 COMMENTS ON THE C VERSION OF DCDFLIB
+
+The C version was obtained by converting the original  Fortran DCDFLIB
+to C using PROMULA.FORTRAN  and performing  some hand  crafting of the
+result.  Information on PROMULA.FORTRAN can be obtained from
+
+                   PROMULA Development Corporation
+                    3620 N. High Street, Suite 301
+                         Columbus, Ohio 43214
+                            (614) 263-5454
+
+DCDFLIB.C was tested  using the xlc  compiler under AIX  3.1 on an IBM
+RS/6000.  The code  was  also examined  with lint  on the same system.
+DCDFLIB  was also successfully tested run using the gcc compiler  (see
+below) on a Solbourne.
+
+DCDFLIB.C  can  be obtained by anonymous  ftp  to odin.mda.uth.tmc.edu
+(129.106.3.17) where it is available as
+                        /pub/unix/dcdflib.c.tar.Z
+
+The Fortran version of DCDFLIB is available as
+                        /pub/unix/dcdflib.f.tar.Z
+on the same machine.
+^L
+
+
+
+
+                                 CAVEAT
+
+DCDFLIB.C is written in ANSI C and makes heavy use  of prototypes.  It
+will not compile under old style (KR) C compilers (such as the default
+Sun cc compiler).
+
+I don't  recommend conversion to an  obsolete C dialect.  Instead, get
+the  Free  Software Foundation's  excellent  ANSI C compiler,  gcc. It
+compiles KR C as well as  ANSI C. A  version of gcc that  runs on many
+varieties of Unix is available by anonymous ftp as
+                        /pub/gnu/gcc-1.40.tar.Z
+at prep.ai.mit.edu  (18.71.0.38).   A Vax version  is also  present on
+/pub/gnu.  The compilers  are also available  on tape.  Write the Free
+Software Foundation at:
+
+                    Free Software Foundation, Inc.
+                       675 Massachusetts Avenue
+                         Cambridge, MA  02139
+                        Phone: (617) 876-3296
+
+A MSDOS port of gcc, performed by DJ Delorie is also available by ftp.
+
+File location:
+
+    host:      grape.ecs.clarkson.edu
+    login:     ftp
+    password:  send your e-mail address
+    directory: ~ftp/pub/msdos/djgcc
+
+File in .ZIP format - djgpp.zip - one 2.2M file, contains everything.
+
+A version of DCDFLIB which compiles under old style C can be obtained
+by anonymous ftp to odin.mda.uth.tmc.edu  (129.106.3.17)  where it is
+available as
+                        /pub/unix/dcdflib.kr.c.tar.Z
+
+
+                            DOCUMENTATION
+
+This  file  contains an  overview  of the library   and is the primary
+documentation.
+
+Other documentation  is  in  directory 'doc'  on  the  distribution as
+character  (ASCII) files.  A summary  of all of the available routines
+is contained in dcdflib.chs (chs is an abbreviation of 'cheat sheet').
+The  'chs'  file will  probably  be the  primary reference.  The file,
+dcdflib.fdoc,  contains  the comments for  each  routine intended  for
+direct use.  The file, dcdflib.h, contains prototypes for each routine
+intended for direct use.
+
+                             INSTALLATION
+
+Directory src contains the C source.  The files ipmpar.c and dcdflib.c
+constitute DCDFLIB.  The file cdflib.h is included in dcdflib.c.
+
+A  few  routines use   machine  dependent  constants.  Lists  of  such
+constants for different machines are found in ipmpar.c.  Uncomment the
+ones  appropriate to your  machine.  The distributed  version uses the
+IEEE arithmetic that is used by  the IBM PC,  Macintosh, and most Unix
+workstations.  If you need to change the distribution version you must
+comment out the definitions for  IEEE arithmetic as  well as uncomment
+the ones appropriate to your machine.
+
+NOTE: dcdflib should be linked to the C math library.
+
+NOTE: Ignore compiler warnings of the type "statement not reached".
+
+                               SOURCES
+
+The following   routines, written  by   others, are  incorporated into
+DCDFLIB.
+
+                          Beta Distribution
+
+DiDinato, A.  R. and Morris, A.  H.   Algorithm 708: Significant Digit
+Computation of the Incomplete Beta  Function Ratios.  ACM Trans. Math.
+Softw. 18 (1993), 360-373.
+
+                 Gamma Distribution and It's Inverse
+
+DiDinato, A. R. and Morris, A.  H. Computation of the Incomplete Gamma
+Function  Ratios and  their  Inverse.   ACM  Trans.  Math.   Softw. 12
+(1986), 377-393.
+
+                         Normal Distribution
+
+Kennedy and  Gentle, Statistical Computing,  Marcel  Dekker, NY, 1980.
+The rational function approximations  from pages 90-95 are used during
+the calculation of the inverse normal.
+
+Cody, W.D.  (1993).  "ALGORITHM  715:  SPECFUN  -  A Portabel  FORTRAN
+Package   of  Special  Function   Routines   and  Test  Drivers",  acm
+Transactions on Mathematical Software. 19, 22-32.  A slightly modified
+version of Cody's function  anorm  is used for the cumultive normal.
+
+                             Zero Finder
+
+J.   C. P.   Bus and  T.  J.  Dekker.   Two Efficient  Algorithms with
+Guaranteed Convergence  for Finding a  Zero of a Function.  ACM Trans.
+Math. Softw. 4 (1975), 330.
+
+We transliterated Algoritm R of this paper from Algol to Fortran.
+
+                          General Reference
+
+Abramowitz,  M. and Stegun,  I. A.  Handbook of Mathematical Functions
+With  Formulas, Graphs,  and   Mathematical Tables.   (1964)  National
+Bureau of Standards.
+
+This book has been reprinted by Dover and others.
+
+
+                              LEGALITIES
+
+Code that appeared  in an    ACM  publication  is subject  to    their
+algorithms policy:
+
+     Submittal of  an  algorithm    for publication  in   one of   the  ACM
+     Transactions implies that unrestricted use  of the algorithm within  a
+     computer is permissible.   General permission  to copy and  distribute
+     the algorithm without fee is granted provided that the copies  are not
+     made  or   distributed for  direct   commercial  advantage.    The ACM
+     copyright notice and the title of the publication and its date appear,
+     and  notice is given that copying  is by permission of the Association
+     for Computing Machinery.  To copy otherwise, or to republish, requires
+     a fee and/or specific permission.
+
+     Krogh, F.  Algorithms  Policy.  ACM  Tran.   Math.  Softw.   13(1987),
+     183-186.
+
+We place the DCDFLIB code that we have written in the public domain.  
+
+                                 NO WARRANTY
+     
+     WE PROVIDE ABSOLUTELY  NO WARRANTY  OF ANY  KIND  EITHER  EXPRESSED OR
+     IMPLIED,  INCLUDING BUT   NOT LIMITED TO,  THE  IMPLIED  WARRANTIES OF
+     MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK
+     AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS  WITH YOU.  SHOULD
+     THIS PROGRAM PROVE  DEFECTIVE, YOU ASSUME  THE COST  OF  ALL NECESSARY
+     SERVICING, REPAIR OR CORRECTION.
+     
+     IN NO  EVENT  SHALL THE UNIVERSITY  OF TEXAS OR  ANY  OF ITS COMPONENT
+     INSTITUTIONS INCLUDING M. D.   ANDERSON HOSPITAL BE LIABLE  TO YOU FOR
+     DAMAGES, INCLUDING ANY  LOST PROFITS, LOST MONIES,   OR OTHER SPECIAL,
+     INCIDENTAL   OR  CONSEQUENTIAL DAMAGES   ARISING   OUT  OF  THE USE OR
+     INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA OR
+     ITS ANALYSIS BEING  RENDERED INACCURATE OR  LOSSES SUSTAINED  BY THIRD
+     PARTIES) THE PROGRAM.
+     
+     (Above NO WARRANTY modified from the GNU NO WARRANTY statement.)
+

+                    HOW TO USE THE ROUTINES
+
+The calling sequence for each routine is of the form:
+
+   void cdf<name>(int *which,double *p,double *q,double *x,
+               double *<parameters>,int *status,double *bound)
+
+WHICH   and STATUS  are  pointers to  int  ,  all other arguments  are
+pointers to double.
+
+<name> is a one to  three character name identifying the distribution.
+which  is an input integer value  that identifies what parameter value
+is to be calculated from the values of the other parameters.
+
+P is always the cdf evaluated at X, Q is always the compliment of the
+cdf evaluated at X, i.e.  1-P, and X is always the value at which the
+cdf  is evaluated.   The auxiliary parameters,  <parameters>,  of the
+distribution differ by distribution.
+
+If WHICH is 1, P  and Q are to be calculated, i.e., the cdf; if WHICH
+is 2, X is to be calculated, i.e., the inverse cdf.  The value of one
+auxiliary parameter in <parameters> can also be the value calculated.
+
+STATUS returns 0 if the calculation completes correctly.
+
+           --------------------WARNING--------------------
+
+If STATUS is not 0, no meaningful answer is returned.
+
+        -------------------- END WARNING --------------------
+
+STATUS returns  -I if the I'th  input parameter was  not  in the legal
+range (see below).  Parameters are counted  with which being the first
+in these return values.
+
+A STATUS  value of 1 indicates that  the desired answer was apparently
+lower than the lower bound on the search interval.  A return code of 2
+indicates that  the answer was  apparently higher than the upper bound
+on the search interval.  A return code of 3 indicates that P and Q did
+not sum to 1. Other positive codes are routine specific.
+
+BOUND is not  set if status is returned  as 0.  If  STATUS is -I  then
+BOUND is   the bound illegally  exceeded by  input  parameter I, where
+WHICH  is  counted as 1,  P as 2,  Q as 3,  X as 4, etc.  If STATUS is 
+returned as 1 or 2 then bound  is returned as the lower or upper bound
+on the search interval respectively.
+
+
+                                BOUNDS
+
+Below are  the rules that we used  in determining bounds on quantities
+to be  calculated.   Those who don't care   can find a summary  of the
+bounds in  dcdflib.chs.   Input bounds  are  checked for  legality  of
+input.  The search  range  is  the range   of values searched  for  an
+answer.
+
+                             Input Bounds
+
+Bounds on input parameters are  checked by the  cdf* routines.   These
+bounds were set according to the following rules.
+
+P: If the  domain of the cdf (X) extends to  -infinity  then P must be
+greater than 0 otherwise P must be greater than or equal to 0.  P must
+always be less than or equal to 1.
+
+Q: If the  domain of the cdf (X) extends to  +infinity  then Q must be
+greater than 0 otherwise Q must be greater than or equal to 0.  Q must
+always be less than or equal to 1.
+
+Further, P and Q must sum to 1. The smaller of the two P and Q will be
+used in calculations to increase accuracy
+
+X:  If  the  domain is infinite  in   either the positive  or negative
+direction, no check  is performed in  that direction.  If the left end
+of the domain is 0, then X is checked to assure non-negativity.
+
+DF, SD, etc.:  Some auxiliary parameters must  be positive. The lowest
+input values accepted for these parameters is 1E-100.
+
+
+                                Search Bounds
+
+These are the  ranges searched for an  answer.   If the domain  of the
+parameter in the cdf  is closed at  some  finite value, e.g., 0,  then
+this value is the same endpoint of the search range.  If the domain is
+open  at  some finite   endpoint (which only  occurs   for  0 --  some
+parameters must be strictly positive) then  the endpoint is 1E-100. If
+the  domain is infinite in either  direction then +/- 1E100 is used as
+the endpoint of the search range.
+
+                        HOW THE ROUTINES WORK
+
+The cumulative  distribution   functions are computed  directly.   The
+normal, gamma,  and  beta functions use the  code  from the references
+cited.  Other  cdfs are calculated  by relating them  to one  of these
+distributions.  For example, the  binomial and negative binomial  cdfs
+can be converted  to a beta cdf.   This is how fractional observations
+are handled.  The  formula from Abramowitz  and Stegun  for converting
+the cdfs is cited  in the fdoc file.    (We think the formula  for the
+negative binomial in A&S is wrong, but there is a correct one which we
+used.)
+
+The inverse normal and gamma are also taken  from the references.  For
+all other parameters, a search is made for the value that provides the
+desired P.  Initial  values are chosen crudely  for the search  (e.g.,
+5).  If the domain  of the cdf for the  parameter being calculated  is
+infinite, a step doubling strategy is  used to bound the desired value
+then the  zero  finder is  employed  to refine the answer.    The zero
+finder attempts to obtain the answer accurately to about eight decimal
+places.
diff --git a/src/lofreq/.gitignore b/src/lofreq/.gitignore
new file mode 100644
index 0000000..9d4fd01
--- /dev/null
+++ b/src/lofreq/.gitignore
@@ -0,0 +1,19 @@
+# backup files
+*~
+*#
+
+# objects
+*.o
+*.a
+*.lo
+*.la
+lofreq
+lofreq2
+
+Makefile
+Makefile.log
+.deps
+.libs
+clang_output*
+
+schmock*
\ No newline at end of file
diff --git a/src/lofreq/Makefile.am b/src/lofreq/Makefile.am
new file mode 100644
index 0000000..edbadde
--- /dev/null
+++ b/src/lofreq/Makefile.am
@@ -0,0 +1,32 @@
+AM_CFLAGS = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -Wall -I../cdflib90/ -I../uthash -I at HTSLIB@ -I at SAMTOOLS@ @AM_CFLAGS@
+bin_PROGRAMS = lofreq
+lofreq_SOURCES = bam_md_ext.c bam_md_ext.h \
+bedidx.c bam_index.c \
+binom.c binom.h \
+defaults.h \
+fet.c fet.h \
+kprobaln_ext.c kprobaln_ext.h \
+log.c log.h \
+lofreq_alnqual.c lofreq_alnqual.h \
+lofreq_index.c lofreq_index.h \
+lofreq_uniq.h lofreq_uniq.c \
+lofreq_checkref.h lofreq_checkref.c \
+lofreq_indelqual.h lofreq_indelqual.c \
+lofreq_main.c \
+lofreq_viterbi.c lofreq_viterbi.h \
+lofreq_vcfset.c lofreq_vcfset.h \
+lofreq_filter.c lofreq_filter.h  \
+lofreq_call.c lofreq_call.h \
+multtest.c multtest.h \
+plp.c plp.h \
+samutils.h samutils.c \
+snpcaller.h snpcaller.c \
+utils.c utils.h \
+vcf.c vcf.h \
+viterbi.c viterbi.h
+#lofreq_bamstats.h lofreq_bamstats.c
+
+
+# note: order matters
+#lofreq_LDADD = @htslib_dir@/libhts.a @samtools_dir@/libbam.a
+lofreq_LDADD = @HTSLIB@/libhts.a @SAMTOOLS@/libbam.a ../cdflib90/libcdf.a
diff --git a/src/lofreq/bam_index.c b/src/lofreq/bam_index.c
new file mode 100644
index 0000000..b0654bc
--- /dev/null
+++ b/src/lofreq/bam_index.c
@@ -0,0 +1,110 @@
+/*  bam_index.c -- index and idxstats subcommands.
+
+    Copyright (C) 2008-2011, 2013, 2014 Genome Research Ltd.
+    Portions copyright (C) 2010 Broad Institute.
+    Portions copyright (C) 2013 Peter Cock, The James Hutton Institute.
+
+    Author: Heng Li <lh3 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notices and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <htslib/hts.h>
+#include <htslib/sam.h>
+#include <htslib/khash.h>
+#include <stdlib.h>
+#include <stdio.h>
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <unistd.h>
+
+#define BAM_LIDX_SHIFT    14
+
+int bam_index_build2(const char *fn, const char *_fnidx)
+{
+    fprintf(stderr, "Samtools-htslib-API: bam_index_build2() not yet implemented\n");
+    abort();
+}
+
+static void index_usage(FILE *fp)
+{
+    fprintf(fp,
+"Usage: samtools index [-bc] [-m INT] <in.bam> [out.index]\n"
+"Options:\n"
+"  -b       Generate BAI-format index for BAM files [default]\n"
+"  -c       Generate CSI-format index for BAM files\n"
+"  -m INT   Set minimum interval size for CSI indices to 2^INT [%d]\n", BAM_LIDX_SHIFT);
+}
+
+int bam_index(int argc, char *argv[])
+{
+    int csi = 0;
+    int min_shift = BAM_LIDX_SHIFT;
+    int c;
+
+    while ((c = getopt(argc, argv, "bcm:")) >= 0)
+        switch (c) {
+        case 'b': csi = 0; break;
+        case 'c': csi = 1; break;
+        case 'm': csi = 1; min_shift = atoi(optarg); break;
+        default:
+            index_usage(stderr);
+            return 1;
+        }
+
+    if (optind == argc) {
+        index_usage(stdout);
+        return 1;
+    }
+    if (argc - optind > 1) bam_index_build2(argv[optind], argv[optind+1]);
+    else bam_index_build(argv[optind], csi? min_shift : 0);
+    return 0;
+}
+
+int bam_idxstats(int argc, char *argv[])
+{
+    hts_idx_t* idx;
+    bam_hdr_t* header;
+    samFile* fp;
+
+    if (argc < 2) {
+        fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
+        return 1;
+    }
+    fp = sam_open(argv[1], "r");
+    if (fp == NULL) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
+    header = sam_hdr_read(fp);
+    idx = sam_index_load(fp, argv[1]);
+    if (idx == NULL) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
+
+    int i;
+    for (i = 0; i < header->n_targets; ++i) {
+        // Print out contig name and length
+        printf("%s\t%d", header->target_name[i], header->target_len[i]);
+        // Now fetch info about it from the meta bin
+        uint64_t u, v;
+        hts_idx_get_stat(idx, i, &u, &v);
+        printf("\t%" PRIu64 "\t%" PRIu64 "\n", u, v);
+    }
+    // Dump information about unmapped reads
+    printf("*\t0\t0\t%" PRIu64 "\n", hts_idx_get_n_no_coor(idx));
+    bam_hdr_destroy(header);
+    hts_idx_destroy(idx);
+    sam_close(fp);
+    return 0;
+}
diff --git a/src/lofreq/bam_index.c.LICENSE b/src/lofreq/bam_index.c.LICENSE
new file mode 100644
index 0000000..f22c0c0
--- /dev/null
+++ b/src/lofreq/bam_index.c.LICENSE
@@ -0,0 +1,37 @@
+Unchanged copy of corresponding file in samtools 1.1.
+
+---
+
+The MIT/Expat License
+
+Copyright (C) 2008-2014 Genome Research Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+[The use of a range of years within a copyright notice in this distribution
+should be interpreted as being equivalent to a list of years including the
+first and last year specified and all consecutive years between them.
+
+For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009,
+2011-2012" should be interpreted as being identical to a notice that reads
+"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice
+that reads "Copyright (C) 2005-2012" should be interpreted as being identical
+to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012".]
diff --git a/src/lofreq/bam_index.c.README b/src/lofreq/bam_index.c.README
new file mode 100644
index 0000000..de17f59
--- /dev/null
+++ b/src/lofreq/bam_index.c.README
@@ -0,0 +1,2 @@
+copied from samtools-1.1
+needed because idxstats is not part of the samtools API yet
diff --git a/src/lofreq/bam_md_ext.c b/src/lofreq/bam_md_ext.c
new file mode 100644
index 0000000..1eaea5c
--- /dev/null
+++ b/src/lofreq/bam_md_ext.c
@@ -0,0 +1,495 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*
+  This is part of LoFreq Star and largely based on samtools' bam_md.c
+  (0.1.19) which was originally published under the MIT License.
+  
+  Copyright (c) 2003-2006, 2008-2010, by Heng Li <lh3lh3 at live.co.uk>
+  
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+  
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+  
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+*/
+
+#include <unistd.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <ctype.h>
+#include <math.h>
+#include <float.h>
+
+#include "htslib/sam.h"
+#include "bam.h"
+#include "htslib/faidx.h"
+#include "htslib/kstring.h"
+
+#include "kprobaln_ext.h"
+#include "samutils.h"
+#include "defaults.h"
+#include "bam_md_ext.h"
+
+#ifdef PACBIO_REALN
+static int pacbio_msg_printed = 0;
+#endif
+
+
+
+/* bam_md.c */
+const char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
+
+void idaq(bam1_t *b, const char *ref, double **pd, int xe, int xb, int bw);
+
+#define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; }
+#define prob_to_sangerq(p) (p < 0.0 + DBL_EPSILON ? 126+1 : ((int)(-10 * log10(p))+33))
+#define encode_q(q) (uint8_t)(q < 33 ? '!' : (q > 126 ? '~' : q))
+
+
+/* fw and bck matrices in kprob have alloc limit
+   bw2 = bw * 2 + 1
+   alloc(bw2 * 3 + 6)
+   and in addition the original BAQ checks whether if (u < 3 || u >= bw2*3+3) and continues if so
+*/
+int u_within_limits(int u, int bw) {
+     int bw2 = bw * 2 + 1;
+     if (u<3 || u >= bw2*3+3) {
+          return 0;
+     } else {
+          return 1;
+     }
+}     
+
+void idaq(bam1_t *b, const char *ref, double **pd, int xe, int xb, int bw)
+{
+	uint32_t *cigar = bam1_cigar(b);
+	bam1_core_t *c = &b->core;
+    // count the number of indels and compute posterior probability
+    uint8_t *iaq = 0, *daq = 0;
+    int n_ins = 0, n_del = 0;
+    int k, x, y, z;
+
+#if 0
+    fprintf(stderr, "Running idaq on %s with cigar %s\n", bam1_qname(b), cigar_str_from_bam(b));
+#endif
+
+    iaq = calloc(c->l_qseq + 1, 1);
+    daq = calloc(c->l_qseq + 1, 1);
+    
+    /* init to highest possible value */
+    for (k = 0; k < c->l_qseq; k++) {
+         iaq[k] = daq[k] = '~';
+    }
+    iaq[k] = daq[k] = '\0';
+    
+    /* equivalent indels may occur in repetitive regions. In such
+     * cases, we estimate the alignment probability of an indel event
+     * as the sum of the alignment probability of all equivalent indel
+     * events. see del_rep and ins_rep handling below 
+     */
+    for (k = 0, x = c->pos, y = 0, z = 0; k < c->n_cigar; ++k) { 
+         int j, op = cigar[k]&0xf, oplen = cigar[k]>>4;
+         // this could be merged into the later block
+         if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+              for (j = 0; j < oplen; j++) {
+                   x++; // coordinate on reference
+                   y++; // coordinate on query
+                   z++; // coordinate on query w/o softclip
+              }
+         } else if (op == BAM_CDEL) {
+              char *del_seq;
+              int rpos = x; 
+              int qpos = y;
+              int ref_i;
+              int del_rep = 0;/* if in repetetive region */
+              int rep_i = 0;
+              double ap = 0;
+
+              if (qpos == 0) continue;
+              if (oplen > 16) continue; /*FIXME why */
+              n_del += 1;
+              del_seq = malloc((oplen+1)*sizeof(char));
+              for (j = 0; j < oplen; j++) {
+                   del_seq[j] = ref[x];
+                   x++;
+              }
+              del_seq[j] = '\0';
+              ref_i = x;
+              while (ref_i < xe) {
+                   if (ref[ref_i] != del_seq[rep_i]) {
+                        break;
+                   }
+                   del_rep += 1;
+                   ref_i += 1;
+                   rep_i += 1;
+                   if (rep_i >= oplen) {
+                        rep_i = 0;
+                   }
+              }
+              for (j = 0; j < del_rep+1; j++) {
+                   if (qpos+j > c->l_qseq) break;
+                   double *pdi = pd[qpos+j];
+                   int u;
+
+                   set_u(u, bw, qpos+j, rpos-xb+1+j);
+                   /* FIXME happens for long reads, i.e. pacbio. why? see corresponding bit for ins_rep 
+                    */
+                   if (! u_within_limits(u, bw)) {
+#if 0
+                        fprintf(stderr, "WARNING u of %d not within limits for %s\n", u, bam1_qname(b));
+#endif
+                        continue;
+                   }
+                   ap += pdi[u+2];
+#if 0
+                   fprintf(stderr, "probability to add comes from pd[%d+%d + %d+%d = %d]. qseq+1 is %d\n", 
+                           qpos,j,u,2, qpos+j+u+2, c->l_qseq+1);
+                   fprintf(stderr, "probability to add is (%d:%d:%d) %lg\n", 
+                           qpos+j, rpos-xb+1+j, u, pdi[u+2]);
+                   fflush(stderr);
+
+#endif
+              }
+              ap = 1 - ap;
+              daq[qpos-1] = encode_q(prob_to_sangerq(ap));
+              /*fprintf(stderr, "DAQ %d: %c %g\n", qpos-1, daq[qpos-1], ap);*/
+              free(del_seq);
+#ifdef DEBUG
+              fprintf(stderr, "DEL %s %d %lg %c %s\n",
+                      del_seq, del_rep+1, ap, daq[qpos-1], bam1_qname(b));
+#endif
+         } else if (op == BAM_CINS) {
+              char *ins_seq;
+              int rpos = x;
+              int qpos = y;
+              int ins_rep = 0; /* if in repetetive region */
+              int ref_i = x;
+              int rep_i = 0;
+              double ap = 0;
+
+              if (oplen > 16) continue; /*FIXME why */
+              n_ins += 1;
+              if (qpos == 0) continue;
+              ins_seq = malloc((oplen+1)*sizeof(char));
+              for (j = 0; j < oplen; j++) {
+                   ins_seq[j] = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), y)];
+                   y++;
+                   z++;
+              }
+              ins_seq[j] = '\0';
+              ref_i = x;
+              while (ref_i < xe) {
+                   if (ref[ref_i] != ins_seq[rep_i]) {
+                        break;
+                   }
+                   ins_rep += 1;
+                   ref_i += 1;
+                   rep_i += 1;
+                   if (rep_i >= oplen) {
+                        rep_i = 0;
+                   }
+              }
+              for (j = 0; j < ins_rep+1; j++) {
+                   if (qpos+j+1 > c->l_qseq) break;
+                   double *pdi = pd[qpos+j+1]; 
+                   int u;
+
+                   set_u(u, bw, qpos+j+1, rpos-xb+j);
+                   /* FIXME happens for long reads, i.e. pacbio. why? see corresponding bit for del_rep 
+                    */
+                   if (! u_within_limits(u, bw)) {
+#if 0
+                        fprintf(stderr, "WARNING u of %d not within limits for %s\n", u, bam1_qname(b));
+#endif
+                        continue;
+                   }
+                   ap += pdi[u+1];
+#if 0
+                   fprintf(stderr, "probability to add comes from pd[%d+%d+%d + %d+%d = %d]. qseq+1 is %d\n", 
+                           qpos,j,1,u,1, qpos+j+1+u+1, c->l_qseq+1);
+                   fprintf(stderr, "probability to add is (%d:%d:%d) %lg\n", 
+                           qpos+j+1, rpos-xb+j, u, pdi[u+1]);
+                   fflush(stderr);
+#endif
+              }
+              ap = 1 - ap; // probability of alignment error
+              iaq[qpos-1] = encode_q(prob_to_sangerq(ap));
+              /*fprintf(stderr, "IAQ %d: %c %g\n", qpos-1, iaq[qpos-1], ap);*/
+              free(ins_seq);
+#ifdef DEBUG
+              fprintf(stderr, "INS %s %d %lg %c %s\n", 
+                      ins_seq, ins_rep+1, ap, iaq[qpos-1], bam1_qname(b));
+#endif
+         } else if (op == BAM_CSOFT_CLIP) {
+              for (j = 0; j < oplen; j++) {
+                   y++;
+              }
+         }
+    }
+    
+    /*fprintf(stderr, "%s:%s:%d n_ins=%d n_del=%d\n", __FILE__, __FUNCTION__, __LINE__, n_ins, n_del);*/
+    if (n_ins) {
+         bam_aux_append(b, AI_TAG, 'Z', c->l_qseq+1, iaq);
+    }
+    if (n_del)  {
+         bam_aux_append(b, AD_TAG, 'Z', c->l_qseq+1, daq);
+    }
+
+    free(iaq); free(daq);
+}
+
+
+
+/* this is lofreq's target function which was heavily modified to accomodate our needs:
+ * 1. compute indel alignment qualities on top of base alignment qualities
+ * 2. keep base alignment qualities separates, i.e. don't mix with base-qualities
+ *
+ * baq_flag: 0 off, 1 on, 2 redo
+ * aq_flag: 0 off, 1 on, 2 redo
+ */
+int bam_prob_realn_core_ext(bam1_t *b, const char *ref, 
+                            int baq_flag, int baq_extended,
+                            int idaq_flag)
+{
+/*#define ORIG_BAQ 1*/
+     int k, i, bw, x, y, yb, ye, xb, xe;
+     uint32_t *cigar = bam1_cigar(b);
+     bam1_core_t *c = &b->core;
+#ifdef PACBIO_REALN
+     kpa_ext_par_t conf = kpa_ext_par_lofreq_pacbio;
+     if (! pacbio_msg_printed) {
+          fprintf(stderr, "WARN(%s|%s): Using pacbio viterbi params\n", __FILE__, __FUNCTION__);
+          pacbio_msg_printed = 1;
+     }
+#else
+     kpa_ext_par_t conf = kpa_ext_par_lofreq_illumina;
+#endif
+     /*uint8_t *bq = 0, *zq = 0, *qual = bam1_qual(b);*/
+     uint8_t *qual = bam1_qual(b);
+     uint8_t *prec_ai, *prec_ad, *prec_baq;
+     int has_ins = 0, has_del = 0;
+     double **pd = 0;
+
+     /* nothing to do ? */
+     if (! baq_flag && ! idaq_flag) {
+          return 0;
+     }
+
+     /*fprintf(stderr, "FIXME baq_flag=%d idaq_flag=%d\n", baq_flag, idaq_flag);*/
+
+     /* no alignment? */
+     if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0) {
+          return 0;
+     }
+     
+     /* get existing tags. delete if existing and redo is on
+      */
+     if ((prec_baq = bam_aux_get(b, BAQ_TAG)) != 0 && *prec_baq == 'Z') {
+          if (baq_flag==2) {
+               bam_aux_del(b, prec_baq);
+               prec_baq = NULL;
+          }
+     }
+     if ((prec_ai = bam_aux_get(b, AI_TAG)) != 0 && *prec_ai == 'Z') {
+          if (idaq_flag==2) {
+               bam_aux_del(b, prec_ai);
+               prec_ai = NULL;
+          }
+     }
+     if ((prec_ad = bam_aux_get(b, AD_TAG)) != 0 && *prec_ad == 'Z') {
+          if (idaq_flag==2) {
+               bam_aux_del(b, prec_ad);
+               prec_ad = NULL;
+          }
+     }
+
+	/* find the start and end of the alignment */
+	x = c->pos, y = 0, yb = ye = xb = xe = -1;
+	for (k = 0; k < c->n_cigar; ++k) {
+		int op, l;
+		op = cigar[k]&0xf; l = cigar[k]>>4;
+		if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+			if (yb < 0) yb = y;
+			if (xb < 0) xb = x;
+			ye = y + l; xe = x + l;
+			x += l; y += l;
+		} else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) {
+             y += l;
+             if (op == BAM_CINS) {
+                  has_ins = 1;
+             }
+		} else if (op == BAM_CDEL) {
+             has_del = 1;
+             x += l;
+        }
+		else if (op == BAM_CREF_SKIP) {
+#if 0
+             return 0; /* do nothing if there is a reference skip */
+#else
+             /* returning would mean give up and compute no BAQ. 
+                behaviour now modelled after calc_read_alnerrprof(),
+                where CDEL and CREF_SKIP behave the same */
+             x += l; 
+#endif
+        }
+	}
+
+#if 0
+    fprintf(stderr, "%s with cigar %s: baq_flag=%d prec_baq=%p has_del=%d prec_ad=%p has_ins=%d prec_ai=%p, idaq_flag=%d\n", 
+            bam1_qname(b), cigar_str_from_bam(b),  baq_flag, prec_baq, has_del, prec_ad, has_ins, prec_ai, idaq_flag);
+#endif
+    /* don't do anything if everything's there already */
+    if (baq_flag==0 || prec_baq) {
+         int skip = 1;
+         if (has_del && ! prec_ad) {
+              skip = 0;
+         }
+         if (has_ins && ! prec_ai) {
+              skip = 0;
+         }
+         if (skip) {
+#if 0
+              fprintf(stderr, "Reusing all alignment quality values for read %s!\n", bam1_qname(b));
+#endif
+              return 0;
+         }
+    }
+
+    if (has_ins || has_del) {
+         pd = calloc(c->l_qseq+1, sizeof(double*));
+    }
+
+    /* either need to compute BAQ or IDAQ 
+     */
+
+	/* set bandwidth and the start and the end */
+	bw = 7;
+	if (abs((xe - xb) - (ye - yb)) > bw)
+		bw = abs((xe - xb) - (ye - yb)) + 3;
+	conf.bw = bw;
+	xb -= yb + bw/2; if (xb < 0) xb = 0;
+	xe += c->l_qseq - ye + bw/2;
+	if (xe - xb - c->l_qseq > bw)
+		xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2;
+
+
+	{ /* glocal */
+		uint8_t *s, *r, *q, *seq = bam1_seq(b), *bq;
+		int *state;
+        int bw;
+
+		bq = calloc(c->l_qseq + 1, 1);
+		memcpy(bq, qual, c->l_qseq);
+		s = calloc(c->l_qseq, 1);
+		for (i = 0; i < c->l_qseq; ++i) s[i] = bam_nt16_nt4_table[bam1_seqi(seq, i)];
+		r = calloc(xe - xb, 1);
+		for (i = xb; i < xe; ++i) {
+			if (ref[i] == 0) { xe = i; break; }
+			r[i-xb] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[i]]];
+		}
+		state = calloc(c->l_qseq, sizeof(int));
+		q = calloc(c->l_qseq, 1);
+          
+          
+#ifdef DEBUG
+        fprintf(stderr, "processing read %s\n", bam1_qname(b));
+#endif
+        kpa_ext_glocal(r, xe-xb, s, c->l_qseq, qual, &conf, state, q, pd, &bw);
+
+        if (baq_flag && ! prec_baq) {
+             if (! baq_extended) { // in this block, bq[] is capped by base quality qual[]
+                  for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
+                       int op = cigar[k]&0xf, l = cigar[k]>>4;
+                       if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+                            for (i = y; i < y + l; ++i) {
+                                 if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0;
+#ifdef ORIG_BAQ
+                                 else bq[i] = bq[i] < q[i]? bq[i] : q[i];
+#else
+                                 /* keep the actual values and don't cap by base quality */
+                                 bq[i] = q[i];
+#endif
+                            }
+                            x += l; y += l;
+                       } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
+                       else if (op == BAM_CDEL) x += l;
+                  }
+#ifdef ORIG_BAQ
+                  for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ
+#endif
+                  
+             } else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!)
+                  uint8_t *left, *rght;
+                  left = calloc(c->l_qseq, 1); rght = calloc(c->l_qseq, 1);
+                  for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
+                       int op = cigar[k]&0xf, l = cigar[k]>>4;
+                       if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+                            for (i = y; i < y + l; ++i)
+                                 bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i];
+                            for (left[y] = bq[y], i = y + 1; i < y + l; ++i)
+                                 left[i] = bq[i] > left[i-1]? bq[i] : left[i-1];
+                            for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i)
+                                 rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1];
+                            for (i = y; i < y + l; ++i)
+                                 bq[i] = left[i] < rght[i]? left[i] : rght[i];
+                            x += l; y += l;
+                       } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
+                       else if (op == BAM_CDEL) x += l;
+                  }
+#ifdef ORIG_BAQ
+                  for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ
+#endif
+                  free(left); free(rght);
+             }
+             
+#ifndef ORIG_BAQ
+             /* need to cap to phred max to be able to store it */
+             for (i = 0; i < c->l_qseq; ++i) {
+                  if (bq[i] > SANGER_PHRED_MAX) {
+                       bq[i] = SANGER_PHRED_MAX;
+                  }
+                  bq[i] += 33;
+             }
+#endif
+             
+/*#undef ORIG_BAQ*/
+#ifdef ORIG_BAQ
+             if (apply_baq) {
+                  for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual
+                  bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq);
+             } else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq);
+#else
+             bam_aux_append(b, BAQ_TAG, 'Z', c->l_qseq + 1, bq);
+#endif
+        }
+        /* no baq */
+        
+        
+        if (idaq_flag && pd) {/* pd served as previous check to see if ai or ad actually need to be computed */
+             idaq(b, ref, pd, xe, xb, bw);
+        }
+        
+        if (pd) {
+             for (i = 0; i<=c->l_qseq; ++i) free(pd[i]);
+             free(pd); 
+        }
+        free(bq); free(s); free(r); free(q); free(state);
+	}
+
+	return 0;
+}
+
diff --git a/src/lofreq/bam_md_ext.h b/src/lofreq/bam_md_ext.h
new file mode 100644
index 0000000..2c8d90c
--- /dev/null
+++ b/src/lofreq/bam_md_ext.h
@@ -0,0 +1,34 @@
+/* The MIT License
+
+   Copyright (c) 2003-2006, 2008, 2009 by Heng Li <lh3 at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef BAM_MD_EXT_H
+#define BAM_MD_EXT_H
+
+
+int bam_prob_realn_core_ext(bam1_t *b, const char *ref, 
+                            int baq_flag, int ext_baq, int idaq_flag);
+
+
+#endif
diff --git a/src/lofreq/bedidx.c b/src/lofreq/bedidx.c
new file mode 100644
index 0000000..627783e
--- /dev/null
+++ b/src/lofreq/bedidx.c
@@ -0,0 +1,258 @@
+/*  bedidx.c -- BED file indexing.
+
+    Copyright (C) 2011 Broad Institute.
+    Copyright (C) 2014 Genome Research Ltd.
+
+    Author: Heng Li <lh3 at sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.  */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <zlib.h>
+
+#ifdef _WIN32
+#define drand48() ((double)rand() / RAND_MAX)
+#endif
+
+#include "htslib/ksort.h"
+KSORT_INIT_GENERIC(uint64_t)
+
+#include "htslib/kseq.h"
+KSTREAM_INIT(gzFile, gzread, 8192)
+
+typedef struct {
+    int n, m;
+    uint64_t *a;
+    int *idx;
+} bed_reglist_t;
+
+#include "htslib/khash.h"
+KHASH_MAP_INIT_STR(reg, bed_reglist_t)
+
+#define LIDX_SHIFT 13
+
+typedef kh_reg_t reghash_t;
+
+void bed_destroy(void *_h);
+
+
+int *bed_index_core(int n, uint64_t *a, int *n_idx)
+{
+    int i, j, m, *idx;
+    m = *n_idx = 0; idx = 0;
+    for (i = 0; i < n; ++i) {
+        int beg, end;
+        beg = a[i]>>32 >> LIDX_SHIFT; end = ((uint32_t)a[i]) >> LIDX_SHIFT;
+        if (m < end + 1) {
+            int oldm = m;
+            m = end + 1;
+            kroundup32(m);
+            idx = realloc(idx, m * sizeof(int));
+            for (j = oldm; j < m; ++j) idx[j] = -1;
+        }
+        if (beg == end) {
+            if (idx[beg] < 0) idx[beg] = i;
+        } else {
+            for (j = beg; j <= end; ++j)
+                if (idx[j] < 0) idx[j] = i;
+        }
+        *n_idx = end + 1;
+    }
+    return idx;
+}
+
+void bed_index(void *_h)
+{
+    reghash_t *h = (reghash_t*)_h;
+    khint_t k;
+    for (k = 0; k < kh_end(h); ++k) {
+        if (kh_exist(h, k)) {
+            bed_reglist_t *p = &kh_val(h, k);
+            if (p->idx) free(p->idx);
+            ks_introsort(uint64_t, p->n, p->a);
+            p->idx = bed_index_core(p->n, p->a, &p->m);
+        }
+    }
+}
+
+int bed_overlap_core(const bed_reglist_t *p, int beg, int end)
+{
+    int i, min_off;
+    if (p->n == 0) return 0;
+    min_off = (beg>>LIDX_SHIFT >= p->n)? p->idx[p->n-1] : p->idx[beg>>LIDX_SHIFT];
+    if (min_off < 0) { // TODO: this block can be improved, but speed should not matter too much here
+        int n = beg>>LIDX_SHIFT;
+        if (n > p->n) n = p->n;
+        for (i = n - 1; i >= 0; --i)
+            if (p->idx[i] >= 0) break;
+        min_off = i >= 0? p->idx[i] : 0;
+    }
+    for (i = min_off; i < p->n; ++i) {
+        if ((int)(p->a[i]>>32) >= end) break; // out of range; no need to proceed
+        if ((int32_t)p->a[i] > beg && (int32_t)(p->a[i]>>32) < end)
+            return 1; // find the overlap; return
+    }
+    return 0;
+}
+
+int bed_overlap(const void *_h, const char *chr, int beg, int end)
+{
+    const reghash_t *h = (const reghash_t*)_h;
+    khint_t k;
+    if (!h) return 0;
+    k = kh_get(reg, h, chr);
+    if (k == kh_end(h)) return 0;
+    return bed_overlap_core(&kh_val(h, k), beg, end);
+}
+
+/* "BED" file reader, which actually reads two different formats.
+
+   BED files contain between three and nine fields per line, of which
+   only the first three (reference, start, end) are of interest to us.
+   BED counts positions from base 0, and the end is the base after the
+   region of interest.  While not properly documented in the specification,
+   it is also possible to have 'browser' and 'track' lines in BED files that
+   do not follow the standard format and should be ignored.  Examination
+   of the BED file reading code in
+   http://genome-source.cse.ucsc.edu/gitweb/?p=kent.git shows that BED
+   files can also have comment lines starting with '#', leading whitespace
+   is stripped, and that fields are separated by one or more consecutive
+   whitespace characters.
+
+   The alternative format was originally for reading positions in VCF
+   format.  This expects two columns, which indicate the reference and
+   a position.  The position corresponds to a single base, and unlike
+   BED counts from 1.
+
+   Which format is in use is determined based on whether one or two
+   numbers can be decoded on the line.  As this choice is made line-by-line
+   in this implementation, it is possible (but probably a bad idea) to mix
+   both formats in the same file.  If trying to read a VCF file by this
+   method, it would be important to ensure that the third column (ID) does
+   not contain any entries that start with a digit, to avoid the line
+   erroneously being parsed as a BED file entry.
+
+   The BED specification is at http://www.genome.ucsc.edu/FAQ/FAQformat.html
+   The VCF specification is at https://github.com/samtools/hts-specs
+ */
+
+void *bed_read(const char *fn)
+{
+    reghash_t *h = kh_init(reg);
+    gzFile fp;
+    kstream_t *ks = NULL;
+    int dret;
+    unsigned int line = 0;
+    kstring_t str = { 0, 0, NULL };
+
+    if (NULL == h) return NULL;
+    // read the list
+    fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
+    if (fp == 0) return 0;
+    ks = ks_init(fp);
+    if (NULL == ks) goto fail;  // In case ks_init ever gets error checking...
+    while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) > 0) { // read a line
+        char *ref = str.s, *ref_end;
+        unsigned int beg = 0, end = 0;
+        int num = 0;
+        khint_t k;
+        bed_reglist_t *p;
+
+        line++;
+        while (*ref && isspace(*ref)) ref++;
+        if ('\0' == *ref) continue;  // Skip blank lines
+        if ('#'  == *ref) continue;  // Skip BED file comments
+        ref_end = ref;   // look for the end of the reference name
+        while (*ref_end && !isspace(*ref_end)) ref_end++;
+        if ('\0' != *ref_end) {
+            *ref_end = '\0';  // terminate ref and look for start, end
+            num = sscanf(ref_end + 1, "%u %u", &beg, &end);
+        }
+        if (1 == num) {  // VCF-style format
+            end = beg--; // Counts from 1 instead of 0 for BED files
+        }
+        if (num < 1 || end < beg) {
+            // These two are special lines that can occur in BED files.
+            // Check for them here instead of earlier in case someone really
+            // has called their reference "browser" or "track".
+            if (0 == strcmp(ref, "browser")) continue;
+            if (0 == strcmp(ref, "track")) continue;
+            fprintf(stderr, "[bed_read] Parse error reading %s at line %u\n",
+                    fn, line);
+            goto fail_no_msg;
+        }
+
+        // Put reg in the hash table if not already there
+        k = kh_get(reg, h, ref);
+        if (k == kh_end(h)) { // absent from the hash table
+            int ret;
+            char *s = strdup(ref);
+            if (NULL == s) goto fail;
+            k = kh_put(reg, h, s, &ret);
+            if (-1 == ret) {
+                free(s);
+                goto fail;
+            }
+            memset(&kh_val(h, k), 0, sizeof(bed_reglist_t));
+        }
+        p = &kh_val(h, k);
+
+        // Add begin,end to the list
+        if (p->n == p->m) {
+            p->m = p->m? p->m<<1 : 4;
+            p->a = realloc(p->a, p->m * 8);
+            if (NULL == p->a) goto fail;
+        }
+        p->a[p->n++] = (uint64_t)beg<<32 | end;
+    }
+    // FIXME: Need to check for errors in ks_getuntil.  At the moment it
+    // doesn't look like it can return one.  Possibly use gzgets instead?
+
+    ks_destroy(ks);
+    gzclose(fp);
+    free(str.s);
+    bed_index(h);
+    return h;
+ fail:
+    fprintf(stderr, "[bed_read] Error reading %s : %s\n", fn, strerror(errno));
+ fail_no_msg:
+    if (ks) ks_destroy(ks);
+    if (fp) gzclose(fp);
+    free(str.s);
+    bed_destroy(h);
+    return NULL;
+}
+
+void bed_destroy(void *_h)
+{
+    reghash_t *h = (reghash_t*)_h;
+    khint_t k;
+    for (k = 0; k < kh_end(h); ++k) {
+        if (kh_exist(h, k)) {
+            free(kh_val(h, k).a);
+            free(kh_val(h, k).idx);
+            free((char*)kh_key(h, k));
+        }
+    }
+    kh_destroy(reg, h);
+}
diff --git a/src/lofreq/bedidx.c.LICENSE b/src/lofreq/bedidx.c.LICENSE
new file mode 100644
index 0000000..f22c0c0
--- /dev/null
+++ b/src/lofreq/bedidx.c.LICENSE
@@ -0,0 +1,37 @@
+Unchanged copy of corresponding file in samtools 1.1.
+
+---
+
+The MIT/Expat License
+
+Copyright (C) 2008-2014 Genome Research Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+
+[The use of a range of years within a copyright notice in this distribution
+should be interpreted as being equivalent to a list of years including the
+first and last year specified and all consecutive years between them.
+
+For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009,
+2011-2012" should be interpreted as being identical to a notice that reads
+"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice
+that reads "Copyright (C) 2005-2012" should be interpreted as being identical
+to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010,
+2011, 2012".]
diff --git a/src/lofreq/binom.c b/src/lofreq/binom.c
new file mode 100644
index 0000000..97ed8a3
--- /dev/null
+++ b/src/lofreq/binom.c
@@ -0,0 +1,135 @@
+/* -*- mode: c; tab-width: 4; c-basic-offset: 4;  indent-tabs-mode: nil -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "cdflib.h"
+#include "binom.h"
+
+
+
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+
+
+
+/**
+ * @brief Compute cdf and sf
+ *
+ * P is the cdf evaluated at X, Q is the compliment of the cdf
+ * evaluated at X, i.e. 1-P (AKA sf)
+ *
+ * Returns non-zero status on failure
+ *
+ */
+int binom(double *p, double *q,
+          int num_trials, int num_success, double prob_success) 
+{
+		int which=1;
+		int status=1; /* error by default */
+		double ompr = 1.0 - prob_success;
+		double bound;
+        double q2, p2;
+
+		double s = (double)num_success;
+		double xn = (double)num_trials;
+		double pr = (double)prob_success;
+
+        /* P is always the cdf evaluated at X, Q is always the compliment of the
+           cdf evaluated at X, i.e.  1-P, and X is always the value at which the
+           cdf  is evaluated. */
+
+		(void) cdfbin(&which, p?p:&p2, q?q:&q2,
+			   &s, &xn, &pr, &ompr,
+			   &status, &bound);
+        
+#ifdef DEBUG
+
+		fprintf(stderr, "DEBUG(%s:%s:%d): in num_success = %d\n", 
+                __FILE__, __FUNCTION__, __LINE__, num_success);
+		fprintf(stderr, "DEBUG(%s:%s:%d): in num_trials = %d\n", 
+                __FILE__, __FUNCTION__, __LINE__, num_trials);
+		fprintf(stderr, "DEBUG(%s:%s:%d): in pr = %g\n", 
+                __FILE__, __FUNCTION__, __LINE__, prob_success);
+		fprintf(stderr, "DEBUG(%s:%s:%d): out p=%g\n", 
+                __FILE__, __FUNCTION__, __LINE__, p?*p:p2);
+		fprintf(stderr, "DEBUG(%s:%s:%d): out q=%g\n", 
+                __FILE__, __FUNCTION__, __LINE__, q?*q:q2);
+		fprintf(stderr, "DEBUG(%s:%s:%d): out status=%d\n",
+                __FILE__, __FUNCTION__, __LINE__, status);
+		fprintf(stderr, "DEBUG(%s:%s:%d): out bound=%g\n", 
+                __FILE__, __FUNCTION__, __LINE__, bound);
+#endif
+		
+		return status;
+}
+/* end of binom */
+
+
+
+#ifdef BINOM_MAIN
+
+
+/* 
+gcc -pedantic -Wall -g -std=gnu99 -O2 -DBINOM_MAIN -I../cdflib90/ -o binom binom.c utils.c log.c ../cdflib90/libcdf.a -lm
+*/
+#include <stdlib.h>
+#include "log.h"
+
+int main(int argc, char *argv[]) {
+     int num_success;
+     int num_trials;
+     double prob_success;
+     double sf_pvalue;
+     double cdf_pvalue;
+
+     if (argc<4) {
+         fprintf(stderr, "need num_success num_trials and prob_success as args");
+         return -1;
+     }
+
+     num_success = atoi(argv[1]);
+     num_trials = atoi(argv[2]);
+     prob_success = atof(argv[3]);
+
+
+     fprintf(stdout, "num_success=%d num_trials=%d prob_success=%f\n", num_success, num_trials, prob_success);
+     if (0 != binom(&cdf_pvalue, &sf_pvalue, num_trials, num_success, prob_success)) {
+         fprintf(stderr, "%s\n", "binom() failed");
+         return EXIT_FAILURE;
+     }
+     
+     printf("sf: %g\tcdf: %g\n", sf_pvalue, cdf_pvalue);
+
+     printf("sf should be identical to scipy.stats.binom.sf(%d, %d, %f)\n", num_success, num_trials, prob_success);
+     printf("cdf should be identical to scipy.stats.binom.cdf(%d, %d, %f)\n", num_success, num_trials, prob_success);
+     return EXIT_SUCCESS;
+}
+#endif
diff --git a/src/lofreq/binom.h b/src/lofreq/binom.h
new file mode 100644
index 0000000..495446f
--- /dev/null
+++ b/src/lofreq/binom.h
@@ -0,0 +1,35 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef BINOM_H
+#define BINOM_H
+
+int binom(double *q, double *p,
+          int num_trials, int num_successes, double prob_success);
+
+#endif
diff --git a/src/lofreq/defaults.h b/src/lofreq/defaults.h
new file mode 100644
index 0000000..e34af23
--- /dev/null
+++ b/src/lofreq/defaults.h
@@ -0,0 +1,111 @@
+#ifndef LOFREQ_DEFAULTS_H
+#define LOFREQ_DEFAULTS_H
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+
+#define SANGER_PHRED_MAX 93
+
+/* mapping quality filters: applied to all reads. don't set too high as
+ * this is a mapper dependent value
+ * in case of BWA it's also dependent on the alignment command used.
+ */
+#define DEFAULT_MIN_MQ 0
+#define DEFAULT_MAX_MQ 255
+
+/* minimum base quality of any base below which they are skipped.
+   note: GATK doesn't recalibrate BQ <=5 */
+#define DEFAULT_MIN_BQ 6
+/* minimum base quality for alt bases: below and they are skipped */
+#define DEFAULT_MIN_ALT_BQ 6
+#define DEFAULT_DEF_ALT_BQ 0
+/* -1: ref median, 0: keep original, >0: replace with this value */
+
+#define DEFAULT_MIN_JQ 0
+/* minimum merged quality for alt bases  */
+#define DEFAULT_MIN_ALT_JQ 0
+#define DEFAULT_DEF_ALT_JQ 0
+/* -1: ref median, 0: keep original, >0: replace with this value */
+
+/* non match quality for source qual */
+#define DEFAULT_DEF_NM_QUAL -1
+ 
+/* coverage thresholds */
+#define DEFAULT_MIN_COV 1
+#define DEFAULT_MAX_PLP_DEPTH 1000000
+
+#define DEFAULT_BAQ_ON 1
+
+/* make lofreq blind to anything below this value */
+#define DEFAULT_MIN_PLP_BQ 3
+#define DEFAULT_MIN_PLP_IDQ 0
+
+#define DEFAULT_SIG 0.01
+
+/* ---------------------------------------------------------------------- */
+
+/* Four nucleotides, with one consensus, makes three
+   non-consensus bases */
+#define NUM_NONCONS_BASES 3
+
+#define VARCALL_USE_BAQ     1
+#define VARCALL_USE_MQ      2
+#define VARCALL_USE_SQ      4
+/* indel alignment quality */
+#define VARCALL_USE_IDAQ      8
+
+
+/* private tag for actual baq values: "l"ofreseq "b"ase-alignment */
+#define BAQ_TAG "lb"
+
+
+#ifndef MIN
+#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
+#endif
+#ifndef MAX
+#define MAX(X,Y) ((X) > (Y) ? (X) : (Y))
+#endif
+
+
+#define AI_TAG "ai"
+#define AD_TAG "ad"
+
+/* base insertion and deletion qualities. GATK uses BI and BD. 
+ * GATKs BI & BD: "are per-base quantities which estimate
+ * the probability that the next base in the read was
+ * mis-incorporated or mis-deleted (due to slippage, for
+ * example)". See
+ * http://www.broadinstitute.org/gatk/guide/article?id=44
+ * and
+ * http2://gatkforums.broadinstitute.org/discussion/1619/baserecalibratorprintreads-bd-and-bi-flags
+ *
+ */
+#define BI_TAG "BI"
+#define BD_TAG "BD"
+
+#endif
diff --git a/src/lofreq/fet.c b/src/lofreq/fet.c
new file mode 100644
index 0000000..acdb10d
--- /dev/null
+++ b/src/lofreq/fet.c
@@ -0,0 +1,116 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/* Taken from samtools 0.1.18 (r982:295) */
+
+#include <math.h>
+#include <stdlib.h>
+
+/* This program is implemented with ideas from this web page:
+ *
+ *   http://www.langsrud.com/fisher.htm
+ */
+
+/* log\binom{n}{k} */
+static double lbinom(int n, int k)
+{
+	if (k == 0 || n == k) return 0;
+	return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
+}
+
+/* n11  n12  | n1_
+   n21  n22  | n2_
+   -----------+----
+   n_1  n_2  | n
+*/
+
+/* hypergeometric distribution */
+static double hypergeo(int n11, int n1_, int n_1, int n)
+{
+	return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
+}
+
+typedef struct {
+	int n11, n1_, n_1, n;
+	double p;
+} hgacc_t;
+
+/* incremental version of hypergenometric distribution */
+static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)
+{
+	if (n1_ || n_1 || n) {
+		aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
+	} else { /* then only n11 changed; the rest fixed */
+		if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
+                if (n11 == aux->n11 + 1) { /* incremental */
+				aux->p *= (double)(aux->n1_ - aux->n11) / n11
+					* (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
+				aux->n11 = n11;
+				return aux->p;
+			}
+                if (n11 == aux->n11 - 1) { /* incremental */
+				aux->p *= (double)aux->n11 / (aux->n1_ - n11)
+					* (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
+				aux->n11 = n11;
+				return aux->p;
+			}
+		}
+		aux->n11 = n11;
+	}
+	aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
+	return aux->p;
+}
+
+double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)
+{
+	int i, j, max, min;
+	double p, q, left, right;
+	hgacc_t aux;
+	int n1_, n_1, n;
+
+	n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; /* calculate n1_, n_1 and n */
+	max = (n_1 < n1_) ? n_1 : n1_; /* max n11, for right tail */
+	min = n1_ + n_1 - n;
+	if (min < 0) min = 0; /* min n11, for left tail */
+	*two = *_left = *_right = 1.;
+	if (min == max) return 1.; /* no need to do test */
+	q = hypergeo_acc(n11, n1_, n_1, n, &aux); /* the probability of the current table */
+	/* left tail */
+	p = hypergeo_acc(min, 0, 0, 0, &aux);
+	for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) /* loop until underflow */
+		left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
+	--i;
+	if (p < 1.00000001 * q) left += p;
+	else --i;
+	/* right tail */
+	p = hypergeo_acc(max, 0, 0, 0, &aux);
+	for (right = 0., j = max - 1; p < 0.99999999 * q; --j) /* loop until underflow */
+		right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
+	++j;
+	if (p < 1.00000001 * q) right += p;
+	else ++j;
+	/* two-tail */
+	*two = left + right;
+	if (*two > 1.) *two = 1.;
+	/* adjust left and right */
+	if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
+	else left = 1.0 - right + q;
+	*_left = left; *_right = right;
+	return q;
+}
+
+#ifdef FET_MAIN
+#include <stdio.h>
+
+int main(int argc, char *argv[])
+{
+	char id[1024];
+	int n11, n12, n21, n22;
+	double left, right, twotail, prob;
+
+	while (scanf("%s%d%d%d%d", id, &n11, &n12, &n21, &n22) == 5) {
+		prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail);
+		printf("%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", id, n11, n12, n21, n22,
+				prob, left, right, twotail);
+	}
+	return 0;
+}
+#endif
diff --git a/src/lofreq/fet.c.LICENSE b/src/lofreq/fet.c.LICENSE
new file mode 100644
index 0000000..1c8585f
--- /dev/null
+++ b/src/lofreq/fet.c.LICENSE
@@ -0,0 +1,25 @@
+Copied from from samtools 0.1.18 (r982:295)
+
+---
+
+The MIT License
+
+Copyright (c) 2008-2009 Genome Research Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
diff --git a/src/lofreq/fet.h b/src/lofreq/fet.h
new file mode 100644
index 0000000..f30cf92
--- /dev/null
+++ b/src/lofreq/fet.h
@@ -0,0 +1,6 @@
+#ifndef FET_H
+#define FET_H
+
+double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two);
+
+#endif
diff --git a/src/lofreq/kprobaln_ext.c b/src/lofreq/kprobaln_ext.c
new file mode 100644
index 0000000..729eafa
--- /dev/null
+++ b/src/lofreq/kprobaln_ext.c
@@ -0,0 +1,307 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*
+  This is part of LoFreq Star and largely based on samtools'
+  kprobaln_ext.c (0.1.19) which was originally published under the MIT
+  License:
+  
+  Copyright (c) 2003-2006, 2008-2010, by Heng Li <lh3lh3 at live.co.uk>
+  
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+  
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+  
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <math.h>
+#include "kprobaln_ext.h"
+
+/*****************************************
+ * Probabilistic banded glocal alignment *
+ *****************************************/
+
+#define EI .25
+#define EM .33333333333
+
+static float g_qual2prob[256];
+
+#define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; }
+
+kpa_ext_par_t kpa_ext_par_def = { 0.001, 0.1, 10 };
+kpa_ext_par_t kpa_ext_par_alt = { 0.0001, 0.01, 10 };
+kpa_ext_par_t kpa_ext_par_lofreq_illumina = { 0.00001, 0.4, 10};
+kpa_ext_par_t kpa_ext_par_lofreq_pacbio = { 0.1, 0.4, 10};
+
+/*
+  The topology of the profile HMM:
+
+           /\             /\        /\             /\
+           I[1]           I[k-1]    I[k]           I[L]
+            ^   \      \    ^    \   ^   \      \   ^
+            |    \      \   |     \  |    \      \  |
+    M[0]   M[1] -> ... -> M[k-1] -> M[k] -> ... -> M[L]   M[L+1]
+                \      \/        \/      \/      /
+                 \     /\        /\      /\     /
+                       -> D[k-1] -> D[k] ->
+
+   M[0] points to every {M,I}[k] and every {M,I}[k] points M[L+1].
+
+   On input, _ref is the reference sequence and _query is the query
+   sequence. Both are sequences of 0/1/2/3/4 where 4 stands for an
+   ambiguous residue. iqual is the base quality. c sets the gap open
+   probability, gap extension probability and band width.
+
+   On output, state and q are arrays of length l_query. The higher 30
+   bits give the reference position the query base is matched to and the
+   lower two bits can be 0 (an alignment match) or 1 (an
+   insertion). q[i] gives the phred scaled posterior probability of
+   state[i] being wrong.
+
+   LoFreq extension not used if pd == NULL
+ */
+int kpa_ext_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, 
+     const uint8_t *iqual, const kpa_ext_par_t *c, int *state, uint8_t *q, double **pd,
+     int *ret_bw)
+{
+	double **f, **b = 0, *s, m[9], sI, sM, bI, bM, pb;
+	float *qual, *_qual;
+	const uint8_t *ref, *query;
+	int bw, bw2, i, k, /* is_diff = 0, */ is_backward = 1, Pr;
+
+    if ( l_ref<=0 || l_query<=0 ) return 0; // FIXME: this may not be an ideal fix, just prevents sefgault
+
+	/*** initialization ***/
+    is_backward = state && q? 1 : 0;
+    if (pd) {
+         is_backward = 1;
+    }
+	ref = _ref - 1; query = _query - 1; // change to 1-based coordinate
+	bw = l_ref > l_query? l_ref : l_query;
+	if (bw > c->bw) bw = c->bw;
+	if (bw < abs(l_ref - l_query)) bw = abs(l_ref - l_query);
+    if (pd) {
+         *ret_bw = bw;
+    }
+     bw2 = bw * 2 + 1;
+	// allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
+	f = calloc(l_query+1, sizeof(double*));
+    if (is_backward) b = calloc(l_query+1, sizeof(double*));
+	for (i = 0; i <= l_query; ++i) {    // FIXME: this will lead in segfault for l_query==0
+		f[i] = calloc(bw2 * 3 + 6, sizeof(double)); // FIXME: this is over-allocated for very short seqs
+        if (is_backward) b[i] = calloc(bw2 * 3 + 6, sizeof(double));
+        if (pd) pd[i] = calloc(bw2 * 3 + 6, sizeof(double));
+#if 0
+        if (pd && i==0) fprintf(stderr, "pd[i=%d] allocated to bw2*3+6=%d\n", i, bw2 * 3 + 6);
+#endif
+	}
+	s = calloc(l_query+2, sizeof(double)); // s[] is the scaling factor to avoid underflow
+	// initialize qual
+	_qual = calloc(l_query, sizeof(float));
+	if (g_qual2prob[0] == 0)
+		for (i = 0; i < 256; ++i)
+			g_qual2prob[i] = pow(10, -i/10.);
+	for (i = 0; i < l_query; ++i) _qual[i] = g_qual2prob[iqual? iqual[i] : 30];
+	qual = _qual - 1;
+	// initialize transition probability
+	sM = sI = 1. / (2 * l_query + 2); // the value here seems not to affect results; FIXME: need proof
+	m[0*3+0] = (1 - c->d - c->d) * (1 - sM); m[0*3+1] = m[0*3+2] = c->d * (1 - sM);
+	m[1*3+0] = (1 - c->e) * (1 - sI); m[1*3+1] = c->e * (1 - sI); m[1*3+2] = 0.;
+	m[2*3+0] = 1 - c->e; m[2*3+1] = 0.; m[2*3+2] = c->e;
+	bM = (1 - c->d) / l_ref; bI = c->d / l_ref; // (bM+bI)*l_ref==1
+	/*** forward ***/
+	// f[0]
+	set_u(k, bw, 0, 0);
+	f[0][k] = s[0] = 1.;
+	{ // f[1]
+		double *fi = f[1], sum;
+		int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end;
+		for (k = beg, sum = 0.; k <= end; ++k) {
+			int u;
+			double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
+			set_u(u, bw, 1, k);
+			fi[u+0] = e * bM; fi[u+1] = EI * bI;
+			sum += fi[u] + fi[u+1];
+		}
+		// rescale
+		s[1] = sum;
+		set_u(_beg, bw, 1, beg); set_u(_end, bw, 1, end); _end += 2;
+		for (k = _beg; k <= _end; ++k) fi[k] /= sum;
+	}
+	// f[2..l_query]
+	for (i = 2; i <= l_query; ++i) {
+		double *fi = f[i], *fi1 = f[i-1], sum, qli = qual[i];
+		int beg = 1, end = l_ref, x, _beg, _end;
+		uint8_t qyi = query[i];
+		x = i - bw; beg = beg > x? beg : x; // band start
+		x = i + bw; end = end < x? end : x; // band end
+		for (k = beg, sum = 0.; k <= end; ++k) {
+			int u, v11, v01, v10;
+			double e;
+			e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli * EM;
+			set_u(u, bw, i, k); set_u(v11, bw, i-1, k-1); set_u(v10, bw, i-1, k); set_u(v01, bw, i, k-1);
+			fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]);
+			fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]);
+			fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2];
+			sum += fi[u] + fi[u+1] + fi[u+2];
+//			fprintf(stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
+		}
+		// rescale
+		s[i] = sum;
+		set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
+		for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum;
+	}
+	{ // f[l_query+1]
+		double sum;
+		for (k = 1, sum = 0.; k <= l_ref; ++k) {
+			int u;
+			set_u(u, bw, l_query, k);
+			if (u < 3 || u >= bw2*3+3) continue;
+		    sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI;
+		}
+		s[l_query+1] = sum; // the last scaling factor
+	}
+	{ // compute likelihood
+		double p = 1., Pr1 = 0.;
+		for (i = 0; i <= l_query + 1; ++i) {
+			p *= s[i];
+			if (p < 1e-100) Pr1 += -4.343 * log(p), p = 1.;
+		}
+		Pr1 += -4.343 * log(p * l_ref * l_query);
+		Pr = (int)(Pr1 + .499);
+        if (!is_backward) { // skip backward and MAP
+             for (i = 0; i <= l_query; ++i) free(f[i]);
+             free(f); free(s); free(_qual);
+             return Pr;
+        }
+	}
+	/*** backward ***/
+	// b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
+	for (k = 1; k <= l_ref; ++k) {
+		int u;
+		double *bi = b[l_query];
+		set_u(u, bw, l_query, k);
+		if (u < 3 || u >= bw2*3+3) continue;
+		bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1];
+	}
+	// b[l_query-1..1]
+	for (i = l_query - 1; i >= 1; --i) {
+		int beg = 1, end = l_ref, x, _beg, _end;
+		double *bi = b[i], *bi1 = b[i+1], y = (i > 1), qli1 = qual[i+1];
+		uint8_t qyi1 = query[i+1];
+		x = i - bw; beg = beg > x? beg : x;
+		x = i + bw; end = end < x? end : x;
+		for (k = end; k >= beg; --k) {
+			int u, v11, v01, v10;
+			double e;
+			set_u(u, bw, i, k); set_u(v11, bw, i+1, k+1); set_u(v10, bw, i+1, k); set_u(v01, bw, i, k+1);
+			e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 * EM) * bi1[v11];
+			bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
+			bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
+			bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
+//			fprintf(stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
+		}
+		// rescale
+		set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
+		for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y;
+	}
+	{ // b[0]
+		int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1;
+		double sum = 0.;
+		for (k = end; k >= beg; --k) {
+			int u;
+			double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
+			set_u(u, bw, 1, k);
+			if (u < 3 || u >= bw2*3+3) continue;
+		    sum += e * b[1][u+0] * bM + EI * b[1][u+1] * bI;
+		}
+		set_u(k, bw, 0, 0);
+		pb = b[0][k] = sum / s[0]; // if everything works as is expected, pb == 1.0
+	}
+	/* never used? is_diff = fabs(pb - 1.) > 1e-7? 1 : 0; */
+	/*** MAP ***/
+	for (i = 1; i <= l_query; ++i) {
+		double sum = 0., *fi = f[i], *bi = b[i], max = 0.;
+		int beg = 1, end = l_ref, x, max_k = -1;
+        double *pdi = NULL;
+        if (pd) pdi = pd[i];
+		x = i - bw; beg = beg > x? beg : x;
+		x = i + bw; end = end < x? end : x;
+		for (k = beg; k <= end; ++k) {
+			int u;
+			double z;
+			set_u(u, bw, i, k);
+			z = fi[u+0] * bi[u+0]; if (z > max) max = z, max_k = (k-1)<<2 | 0; sum += z;
+			z = fi[u+1] * bi[u+1]; if (z > max) max = z, max_k = (k-1)<<2 | 1; sum += z;
+            if (pd) {
+               pdi[u+0] = fi[u+0] * bi[u+0] * s[i];
+               pdi[u+1] = fi[u+1] * bi[u+1] * s[i];
+               pdi[u+2] = fi[u+2] * bi[u+2] * s[i];
+               //fprintf(stderr, "(%d,%d,%d) %lg %lg %lg\n", i, k, u, pdi[u+0], pdi[u+1], pdi[u+2]);
+            }
+		}
+		max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0
+		if (state) state[i-1] = max_k;
+		if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k;
+#ifdef _MAIN
+		fprintf(stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
+				"ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG
+#endif
+	}
+	/*** free ***/
+	for (i = 0; i <= l_query; ++i) {
+		free(f[i]); free(b[i]); 
+	}
+	free(f); free(b); free(s); free(_qual);
+	return Pr;
+}
+
+#ifdef _MAIN
+#include <unistd.h>
+int main(int argc, char *argv[])
+{
+	uint8_t conv[256], *iqual, *ref, *query;
+	int c, l_ref, l_query, i, q = 30, b = 10, P;
+	while ((c = getopt(argc, argv, "b:q:")) >= 0) {
+		switch (c) {
+		case 'b': b = atoi(optarg); break;
+		case 'q': q = atoi(optarg); break;
+		}
+	}
+	if (optind + 2 > argc) {
+		fprintf(stderr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
+		return 1;
+	}
+	memset(conv, 4, 256);
+	conv['a'] = conv['A'] = 0; conv['c'] = conv['C'] = 1;
+	conv['g'] = conv['G'] = 2; conv['t'] = conv['T'] = 3;
+	ref = (uint8_t*)argv[optind]; query = (uint8_t*)argv[optind+1];
+	l_ref = strlen((char*)ref); l_query = strlen((char*)query);
+	for (i = 0; i < l_ref; ++i) ref[i] = conv[ref[i]];
+	for (i = 0; i < l_query; ++i) query[i] = conv[query[i]];
+	iqual = malloc(l_query);
+	memset(iqual, q, l_query);
+	kpa_ext_par_def.bw = b;
+	P = kpa_ext_glocal(ref, l_ref, query, l_query, iqual, &kpa_ext_par_alt, 0, 0);
+	fprintf(stderr, "%d\n", P);
+	free(iqual);
+	return 0;
+}
+#endif
diff --git a/src/lofreq/kprobaln_ext.h b/src/lofreq/kprobaln_ext.h
new file mode 100644
index 0000000..0511c75
--- /dev/null
+++ b/src/lofreq/kprobaln_ext.h
@@ -0,0 +1,50 @@
+/* The MIT License
+
+   Copyright (c) 2003-2006, 2008, 2009 by Heng Li <lh3 at live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef LH3_KPROBALN_EXT_H_
+#define LH3_KPROBALN_EXT_H_
+
+#include <stdint.h>
+
+typedef struct {
+	float d, e;
+	int bw;
+} kpa_ext_par_t;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+	int kpa_ext_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, 
+    const uint8_t *iqual, const kpa_ext_par_t *c, int *state, uint8_t *q, double **pd, 
+    int *ret_bw);
+
+#ifdef __cplusplus
+}
+#endif
+
+extern kpa_ext_par_t kpa_ext_par_def, kpa_ext_par_alt, kpa_ext_par_lofreq_illumina, kpa_ext_par_lofreq_pacbio;
+
+#endif
diff --git a/src/lofreq/lofreq_alnqual.c b/src/lofreq/lofreq_alnqual.c
new file mode 100644
index 0000000..35dc80d
--- /dev/null
+++ b/src/lofreq/lofreq_alnqual.c
@@ -0,0 +1,169 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*
+  This is part of LoFreq Star and largely based on samtools' bam_md.c
+  (0.1.19) which was originally published under the MIT License.
+  
+  Copyright (c) 2003-2006, 2008-2010, by Heng Li <lh3lh3 at live.co.uk>
+  
+  Permission is hereby granted, free of charge, to any person obtaining
+  a copy of this software and associated documentation files (the
+  "Software"), to deal in the Software without restriction, including
+  without limitation the rights to use, copy, modify, merge, publish,
+  distribute, sublicense, and/or sell copies of the Software, and to
+  permit persons to whom the Software is furnished to do so, subject to
+  the following conditions:
+  
+  The above copyright notice and this permission notice shall be
+  included in all copies or substantial portions of the Software.
+  
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "htslib/faidx.h"
+#include "sam.h"
+
+#include "utils.h"
+#include "bam_md_ext.h"
+#include "defaults.h"
+
+extern const char bam_nt16_nt4_table[];
+
+#define USE_EQUAL 1
+#define DROP_TAG  2
+#define BIN_QUAL  4
+#define UPDATE_NM 8
+#define UPDATE_MD 16
+#define HASH_QNM  32
+
+#define MYNAME "lofreq alnqual"		
+
+
+static void usage()
+{
+     fprintf(stderr, "%s: add base- and indel-alignment qualities (BAQ, IDAQ) to BAM file\n\n", MYNAME);
+     fprintf(stderr, "Usage:   %s [options] <aln.bam> <ref.fasta>\n", MYNAME);
+     fprintf(stderr, "Options:\n");
+     fprintf(stderr, "         -b       BAM output (instead of SAM)\n");
+     fprintf(stderr, "         -u       Uncompressed BAM output (for piping)\n");
+     fprintf(stderr, "         -S       The input is SAM with header\n");
+     fprintf(stderr, "         -e       Use default instead of extended BAQ (the latter gives better sensitivity but lower specificity)\n");
+     fprintf(stderr, "         -B       Don't compute base alignment qualities\n");
+     fprintf(stderr, "         -A       Don't compute indel alignment qualities\n");
+     fprintf(stderr, "         -r       Recompute i.e. overwrite existing values\n");
+     fprintf(stderr, "- Output BAM will be written to stdout.\n");				
+     fprintf(stderr, "- Only reads containing indels will contain indel-alignment qualities (tags: %s and %s).\n", AI_TAG, AD_TAG);
+     fprintf(stderr, "- Do not change the alignmnent after running this, i.e. use this as last postprocessing step!\n");
+     fprintf(stderr, "- This program is based on samtools. BAQ was introduced by Heng Li PMID:21320865\n\n");
+
+}
+
+
+int main_alnqual(int argc, char *argv[])
+{
+     int c, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed;
+     samfile_t *fp, *fpout = 0;
+     faidx_t *fai;
+     char *ref = 0, mode_w[8], mode_r[8];
+     bam1_t *b;
+     int baq_flag = 1;
+     int ext_baq = 1;
+     int idaq_flag = 1;
+     int redo = 0;
+
+     is_bam_out = is_sam_in = is_uncompressed = 0;
+     mode_w[0] = mode_r[0] = 0;
+     strcpy(mode_r, "r"); strcpy(mode_w, "w");
+	
+     while ((c = getopt(argc, argv, "buSeBAr")) >= 0) {
+          switch (c) {
+          case 'b': is_bam_out = 1; break;
+          case 'u': is_uncompressed = is_bam_out = 1; break;
+          case 'S': is_sam_in = 1; break;
+          case 'e': ext_baq = 0; break;
+          case 'B': baq_flag = 0; break;
+          case 'A': idaq_flag = 0; break;
+          case 'r': redo = 1; break;
+          case '?': 
+               fprintf(stderr, "FATAL: unrecognized arguments found. Exiting...\n");
+               return 1;
+          default: 
+               break;
+          }
+     }
+     if (optind + 1 >= argc) {
+          usage();
+          return 1;
+     }
+
+     if (!is_sam_in) strcat(mode_r, "b");
+     if (is_bam_out) {
+          strcat(mode_w, "b");
+     } else{
+          strcat(mode_w, "h");
+     }
+     if (is_uncompressed) strcat(mode_w, "u");
+     
+     if (redo) {
+          if (baq_flag) {
+               baq_flag = 2;
+          }
+          if (idaq_flag) {
+               idaq_flag = 2;
+          }
+     }
+
+     if (! baq_flag && ! idaq_flag) {
+          fprintf(stderr, "FATAL: %s: Nothing to do: BAQ and IDAQ off\n", MYNAME); 
+          return 1;
+     }
+
+     fp = samopen(argv[optind], mode_r, 0);
+     if (fp == 0) return 1;
+     if (is_sam_in && (fp->header == 0 || fp->header->n_targets == 0)) {
+          fprintf(stderr, "FATAL: %s: input SAM does not have header\n", MYNAME);
+          return 1;
+     }
+     fpout = samopen("-", mode_w, fp->header);
+
+     fai = fai_load(argv[optind+1]);
+     if (! fai) {
+          fprintf(stderr, "FATAL: %s: failed to load fai index\n", MYNAME);
+          return 1;
+     }
+
+     b = bam_init1();
+     while ((ret = samread(fp, b)) >= 0) {
+          if (b->core.tid >= 0) {
+               if (tid != b->core.tid) {
+                    free(ref);
+                    ref = fai_fetch(fai, fp->header->target_name[b->core.tid], &len);
+                    strtoupper(ref);/* safeguard */
+                    tid = b->core.tid;
+                    if (ref == 0) {
+                         fprintf(stderr, "FATAL: %s failed to find sequence '%s' in the reference.\n",
+                                   MYNAME, fp->header->target_name[tid]);
+                         return 1;
+                    }
+               }
+               
+               bam_prob_realn_core_ext(b, ref, baq_flag, ext_baq, idaq_flag);
+          }
+          samwrite(fpout, b);
+     }
+     bam_destroy1(b);
+     
+     free(ref);
+     fai_destroy(fai);
+     samclose(fp); 
+     samclose(fpout);
+     return 0;
+}
diff --git a/src/lofreq/lofreq_alnqual.h b/src/lofreq/lofreq_alnqual.h
new file mode 100644
index 0000000..9f44203
--- /dev/null
+++ b/src/lofreq/lofreq_alnqual.h
@@ -0,0 +1,35 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#ifndef LOFREQ_ALNQUAL_H
+#define LOFREQ_ALNQUAL_H
+
+/* funcion prototypes here */
+int main_alnqual(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_bamstats.c b/src/lofreq/lofreq_bamstats.c
new file mode 100644
index 0000000..5dd3210
--- /dev/null
+++ b/src/lofreq/lofreq_bamstats.c
@@ -0,0 +1,491 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+/* loosely based on 0.1.18 sam_view.c */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <assert.h>
+
+/* samtools includes */
+#include "sam.h"
+#include "faidx.h"
+/* from bedidx.c */
+void *bed_read(const char *fn);
+void bed_destroy(void *_h);
+int bed_overlap(const void *_h, const char *chr, int beg, int end);
+
+/* lofreq includes */
+#include "log.h"
+#include "utils.h"
+#include "samutils.h"
+#include "defaults.h"
+
+#if 1
+#define MYNAME "lofreq bamstats"
+#else
+#define MYNAME PACKAGE
+#endif
+
+#define TYPE_MAPERRPROF 0
+#define TYPE_OPCAT 1
+
+typedef struct {
+     int min_mq;
+     int min_bq;
+     char *fa;
+     faidx_t *fai;
+     void *bed;
+     int samflags_on;
+     int samflags_off;
+     FILE *out;
+     int type;
+} bamstats_conf_t;
+
+#ifdef USE_ALNERRPROF
+
+#define WRITE_STATS  if (ref) { \
+          fprintf(bamstats_conf->out, "# Reads ignored for counting (due to bed/mq filtering): %lu\n", num_ign_reads); \
+          fprintf(bamstats_conf->out, "# Reads used for counting: %lu\n", num_good_reads); \
+          if (bamstats_conf->type == TYPE_OPCAT) { \
+               fprintf(bamstats_conf->out, "# Reads with zero matches (after bq filtering): %lu\n", num_zero_matches); \
+               write_cat_stats(target_name, read_cat_counts, num_good_reads, bamstats_conf->out); \
+          } else { \
+               write_alnerrprof_stats(target_name, alnerrprof_usedpos, alnerrprof, max_obs_read_len, bamstats_conf->out); \
+          } \
+          free(ref); \
+     }
+#else
+
+#define WRITE_STATS  if (ref) { \
+          fprintf(bamstats_conf->out, "# Reads ignored for counting (due to bed/mq filtering): %lu\n", num_ign_reads); \
+          fprintf(bamstats_conf->out, "# Reads used for counting: %lu\n", num_good_reads); \
+          if (bamstats_conf->type == TYPE_OPCAT) { \
+               fprintf(bamstats_conf->out, "# Reads with zero matches (after bq filtering): %lu\n", num_zero_matches); \
+               write_cat_stats(target_name, read_cat_counts, num_good_reads, bamstats_conf->out); \
+          } \
+          free(ref); \
+     }
+#endif
+
+
+/* adopted from sam_view.c:__g_skip_aln */
+static inline int 
+skip_aln(const bam_header_t *h, const bam1_t *b,
+         const int min_mq, const int flag_on, const int flag_off, void *bed)
+{
+     if (bed && b->core.tid >= 0 && !bed_overlap(bed, h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b)))) {
+          /*fprintf(stderr, "Skipping because of bed: h->target_name[b->core.tid=%d] = %s; b->core.pos = %d\n", b->core.tid, h->target_name[b->core.tid], b->core.pos);*/
+          return 1;
+     }
+     if (b->core.qual < min_mq) {
+          /*fprintf(stderr, "Skipping because of flag or min_mq\n");*/
+          return 2;
+     } 
+     if (((b->core.flag & flag_on) != flag_on) || (b->core.flag & flag_off)) {
+          /*fprintf(stderr, "Skipping because of flag\n");*/
+          return 3;
+     }
+
+     /*fprintf(stderr, "Not skipping\n");*/
+     return 0;
+}
+
+
+
+static void
+usage(bamstats_conf_t *bamstats_conf)
+{
+     fprintf(stderr, "%s: Compiles statistics from BAM files\n\n", MYNAME);
+
+     fprintf(stderr,"Usage: %s [options] -f reffa in.bam\n\n", MYNAME);
+     fprintf(stderr,"Options:\n");
+     fprintf(stderr, "       --verbose        Be verbose\n");
+     fprintf(stderr, "       --debug          Enable debugging\n");
+     fprintf(stderr, "  -l | --bed FILE       List of positions (chr pos) or regions (BED) [null]\n");
+     fprintf(stderr, "  -f | --reffa FILE     Indexed reference fasta file (gzip supported) [null]\n");
+     fprintf(stderr, "  -o | --out FILE       Write stats to this output file [- = stdout]\n");
+     fprintf(stderr, "  -q | --min-bq INT     Ignore any base with baseQ smaller than INT [%d]\n", bamstats_conf->min_bq);
+     fprintf(stderr, "  -m | --min-mq INT     Ignore reads with mapQ smaller than INT [%d]\n", bamstats_conf->min_mq);
+#ifdef USE_ALNERRPROF
+     fprintf(stderr, "       --opcat          Report cigar OP categories instead of error profile\n");
+#endif
+}
+/* usage() */
+
+
+
+void
+write_cat_stats(char *target_name, unsigned long int **read_cat_counts, 
+           unsigned long int num_reads, FILE *out)
+{
+     int i, j;
+     fprintf(out, "# Listing of proportions of reads with certain number of BAM operations (op)\n");
+     fprintf(out, "# proportions are in scientific notation or missing altogether if no reads for that count were found\n");
+     fprintf(out, "# chrom\top-category\top-count\tread-proportion\n");
+
+     for (i=0; i<NUM_OP_CATS; i++) {
+          unsigned long int cat_sum = 0;
+          for (j=0; j<MAX_READ_LEN; j++) {
+               if (read_cat_counts[i][j]) {
+#if 0
+                    fprintf(out, "%s\t%s\t%d\t%g\t(%lu/%lu)\n", 
+                            target_name, op_cat_str[i], j, read_cat_counts[i][j]/(double)num_reads, read_cat_counts[i][j], num_reads);
+#else
+                    fprintf(out, "%s\t%s\t%d\t%g\n", 
+                            target_name, op_cat_str[i], j, read_cat_counts[i][j]/(double)num_reads);
+#endif
+                    cat_sum += read_cat_counts[i][j];
+               }
+          }
+          if (cat_sum != num_reads) {
+               LOG_FIXME("fail cat_sum=%lu != num_reads=%lu\n", cat_sum, num_reads);
+          }
+     }
+}
+
+
+int 
+bamstats(samfile_t *sam, bamstats_conf_t *bamstats_conf)
+{
+     char *target_name = NULL; /* chrom name */
+     char *ref = NULL; /* reference sequence */
+
+     unsigned long int **read_cat_counts;
+     unsigned long int num_good_reads = 0;
+     unsigned long int num_ign_reads = 0;
+     unsigned long int num_zero_matches = 0;
+
+#ifdef USE_ALNERRPROF
+     double alnerrprof[MAX_READ_LEN];
+     unsigned long int alnerrprof_usedpos[MAX_READ_LEN];
+#endif
+
+     int max_obs_read_len = 0;
+     int r, i, rc;
+     bam1_t *b = bam_init1();
+
+     if (bamstats_conf->type == TYPE_OPCAT) {
+         /* count_cigar_ops/read_cat_counts assume roughtly equal read length */
+         LOG_WARN("%s\n", "cigar op counts not using base qualities and assuming (roughly) equal read length");/* (which could be easily implemented for matches");*/
+     }
+
+#ifdef USE_ALNERRPROF
+     memset(alnerrprof_usedpos, 0, MAX_READ_LEN * sizeof(unsigned long int));
+     memset(alnerrprof, 0, MAX_READ_LEN * sizeof(double));
+#endif
+
+     read_cat_counts = calloc(NUM_OP_CATS, sizeof(unsigned long int *));
+     for (i=0; i<NUM_OP_CATS; i++) {
+          read_cat_counts[i] = calloc(MAX_READ_LEN, sizeof(unsigned long int));
+     }
+     
+     while ((r = samread(sam, b)) >= 0) { /* read one alignment from `in' */
+          int counts[NUM_OP_CATS];
+          int ref_len = -1;
+          if (skip_aln(sam->header, b, bamstats_conf->min_mq, 
+                       bamstats_conf->samflags_on, bamstats_conf->samflags_off,
+                       bamstats_conf->bed)) {
+               num_ign_reads += 1;
+               continue;
+          }
+          num_good_reads += 1;
+
+          if (b->core.l_qseq > max_obs_read_len) {
+              max_obs_read_len = b->core.l_qseq;
+              if (max_obs_read_len>=MAX_READ_LEN) {
+                  LOG_FATAL("%s\n", "Reached maximum read length");
+                  return 1;
+              }
+          }
+
+          if (0 == (num_good_reads+num_ign_reads)%1000000) {
+               LOG_VERBOSE("Still alive and happily crunching away on read number %d\n", (num_good_reads+num_ign_reads));
+          }
+
+          /* load ref only if necessary. also triggers output of
+           * stats per chrom */
+          if (ref == NULL || strcmp(target_name, sam->header->target_name[b->core.tid]) != 0) {
+               /* write report. use macro to avoid code duplication with below */
+               WRITE_STATS;
+
+               /* reset everything for next chrom... */
+               for (i=0; i<NUM_OP_CATS; i++) {
+                    memset(read_cat_counts[i], 0, MAX_READ_LEN * sizeof(unsigned long int));
+               }
+
+#ifdef USE_ALNERRPROF
+               memset(alnerrprof_usedpos, 0, MAX_READ_LEN * sizeof(unsigned long int));
+               memset(alnerrprof, 0, MAX_READ_LEN * sizeof(double));
+#endif
+               max_obs_read_len = 0;
+               num_good_reads = num_ign_reads = num_zero_matches = 0;
+
+               target_name = sam->header->target_name[b->core.tid];
+               ref = faidx_fetch_seq(bamstats_conf->fai, target_name,
+                                     0, 0x7fffffff, &ref_len);
+               strtoupper(ref);/* safeguard */
+          }
+
+          if (bamstats_conf->type == TYPE_OPCAT) {
+               if (-1 == count_cigar_ops(counts, NULL, b, ref, bamstats_conf->min_mq, sam->header->target_name[b->core.tid])) {
+                    LOG_WARN("%s\n", "count_cigar_ops failed on read. ignoring"); /* FIXME print read */
+                    continue;
+               }
+          } else {
+#ifdef USE_ALNERRPROF
+               calc_read_alnerrprof(alnerrprof, alnerrprof_usedpos, b, ref);
+#endif
+          }
+
+          if (bamstats_conf->type == TYPE_OPCAT) {
+               for (i=0; i<NUM_OP_CATS; i++) {
+                    assert(counts[i]<MAX_READ_LEN);               
+                    read_cat_counts[i][counts[i]] += 1;
+               }
+               if (0 == counts[OP_MATCH]) {
+                    LOG_DEBUG("Got read with zero matches after filtering with min_mq %d: name:%s cigar:%s qual:%s\n", 
+                              bamstats_conf->min_mq, bam1_qname(b), cigar_str_from_bam(b), bam1_qual(b));
+                    num_zero_matches += 1;
+               }
+          }
+#if 0
+          LOG_DEBUG("good/ign=%u/%u: m=%d mm=%d i=%d d=%d\n", num_good_reads, num_ign_reads,
+                    counts[OP_MATCH], counts[OP_MISMATCH], counts[OP_INS], counts[OP_DEL]);               
+#endif
+     }
+               
+     /* don't forget to output last seen chrom. use macro to avoid code duplication with above */
+     WRITE_STATS;
+
+     
+     for (i=0; i<NUM_OP_CATS; i++) {
+          free(read_cat_counts[i]);
+     }
+     free(read_cat_counts);
+     
+     if (r < -1) {
+          LOG_FATAL("%s\n", "BAM file is truncated.\n");
+          rc = 1;
+     } else {
+          rc = 0;
+          bam_destroy1(b);
+     }
+     return rc;
+}
+
+
+int 
+main_bamstats(int argc, char *argv[])
+{
+     char *bamfile = NULL;
+     char *bedfile = NULL;
+     samfile_t *sam =  NULL;
+     int rc = 0;
+     bamstats_conf_t bamstats_conf;
+#ifdef USE_ALNERRPROF
+     static int report_opcat = 0;
+#else
+     static int report_opcat = 1;
+#endif     
+
+     memset(&bamstats_conf, 0, sizeof(bamstats_conf_t));
+     bamstats_conf.out = stdout;
+     bamstats_conf.min_mq = DEFAULT_MIN_MQ;
+     bamstats_conf.min_bq = DEFAULT_MIN_BQ;
+     /* will skip read if any of the following is set */
+     bamstats_conf.samflags_off = 0;
+     bamstats_conf.samflags_off |= 0x4; /* segment unmapped */
+     bamstats_conf.samflags_off |= 0x100; /* secondary alignment */
+     bamstats_conf.samflags_off |= 0x200; /* not passing quality controls */
+     bamstats_conf.samflags_off |= 0x400; /* PCR or optical duplicate */
+     bamstats_conf.samflags_off |= 0x800; /* supplementary alignment */
+
+     /* FIXME enable BAQ on request ? */
+
+     /* keep in sync with long_opts_str and usage 
+     *
+     * getopt is a pain in the whole when it comes to syncing of long
+     * and short args and usage. check out gopt, libcfu
+     */
+     while (1) {
+          int c;
+          static struct option long_opts[] = {
+               /* see usage sync */               
+               {"bed", required_argument, NULL, 'l'},
+               {"reffa", required_argument, NULL, 'f'},
+               {"out", required_argument, NULL, 'o'},
+               {"min-bq", required_argument, NULL, 'q'},
+               {"min-mq", required_argument, NULL, 'm'},
+
+               {"help", no_argument, NULL, 'h'},
+               {"verbose", no_argument, &verbose, 1},
+               {"debug", no_argument, &debug, 1},
+#ifdef USE_ALNERRPROF
+               {"opcat", no_argument, &report_opcat, 1},
+#endif
+               {0, 0, 0, 0} /* sentinel */
+          };
+          
+          /* keep in sync with long_opts and usage */
+          static const char *long_opts_str = "hl:f:o:q:m:"; 
+          
+          /* getopt_long stores the option index here. */
+          int long_opts_index = 0;
+          c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
+                          long_opts_str, long_opts, & long_opts_index);
+          if (c == -1) {
+               break;
+          }
+          switch (c) {
+               /* keep in sync with long_opts etc */
+
+          case 'h': 
+               usage(&bamstats_conf); 
+               rc = 0;
+               goto free_and_exit;
+
+          case 'l': 
+              bedfile = strdup(optarg);
+              break;
+
+          case 'f':
+               bamstats_conf.fa = strdup(optarg);
+               bamstats_conf.fai = fai_load(optarg);
+               if (bamstats_conf.fai == 0)  {
+                    rc = 1;
+                    goto free_and_exit;
+               }
+               break;
+
+          case 'o':
+               if (0 != strcmp(optarg, "-")) {
+                    if (file_exists(optarg)) {
+                         LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                         rc = 1;
+                         goto free_and_exit;
+                    }
+                    bamstats_conf.out = fopen(optarg, "w");
+               } else {
+                    bamstats_conf.out = stdout;
+               }
+               break;
+               
+         case 'q': 
+              bamstats_conf.min_bq = atoi(optarg); 
+              break;
+
+          case 'm': 
+              bamstats_conf.min_mq = atoi(optarg); 
+              break;
+
+         case '?': 
+               LOG_FATAL("%s\n", "Unrecognized arguments found. Exiting...\n"); 
+               rc = 1;
+               goto free_and_exit;
+               
+          default:
+               break;
+          }
+     }
+     bamstats_conf.type = report_opcat;
+     
+     if (argc == 2) {
+          fprintf(stderr, "\n");
+          usage(&bamstats_conf);
+          rc = 1;
+          goto free_and_exit;
+     }
+     
+     if (1 != argc - optind - 1) {
+          LOG_FATAL("%s\n\n", "Need exactly one BAM file as last argument");
+          rc = 1;
+          goto free_and_exit;
+     }
+     bamfile = (argv + optind + 1)[0];
+     /*if (0 != strcmp(optarg, "-")) {*/
+          if (!file_exists(bamfile)) {
+               LOG_FATAL("BAM file %s does not exist.\n\n", bamfile);
+               rc = 1;
+               goto free_and_exit;
+          }
+/*     }*/
+     
+     if (NULL == bamstats_conf.fa) {
+          LOG_FATAL("%s\n\n", "ERROR: Missing reference fasta argument");
+          usage(&bamstats_conf);
+          rc = 1;
+          goto free_and_exit;
+     }
+
+     if (bedfile) {
+          LOG_VERBOSE("%s\n", "NOTE: bed routines don't make use of indexing and are therefore as slow as reading the whole BAM file."); /* FIXME */
+          bamstats_conf.bed = bed_read(bedfile);
+          if (! bamstats_conf.bed) {
+               LOG_FATAL("BAM file %s does not exist.\n\n", bedfile);
+               rc = 1;
+               goto free_and_exit;
+          }
+     }
+
+
+    
+     if ((sam = samopen(bamfile, "rb", NULL)) == 0) {
+          LOG_FATAL("Failed to open \"%s\" for reading.\n", bamfile);
+          rc = 1;
+     } else {
+          rc = bamstats(sam, &bamstats_conf);
+          samclose(sam);
+     }
+
+free_and_exit:
+
+     if (bamstats_conf.out != stdout) {
+          fclose(bamstats_conf.out);
+     }
+     free(bamstats_conf.fa);
+     if (bamstats_conf.fai) {
+         fai_destroy(bamstats_conf.fai);
+     }
+
+     free(bedfile);
+     if (bamstats_conf.bed) {
+          bed_destroy(bamstats_conf.bed);
+     }
+
+     if (0==rc) {
+          LOG_VERBOSE("%s\n", "Successful exit.");
+     }
+
+
+     return rc;
+}
+/* main_bamstats */
diff --git a/src/lofreq/lofreq_bamstats.h b/src/lofreq/lofreq_bamstats.h
new file mode 100644
index 0000000..e8955ad
--- /dev/null
+++ b/src/lofreq/lofreq_bamstats.h
@@ -0,0 +1,35 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#ifndef LOFREQ_BAMSTATS_H
+#define LOFREQ_BAMSTATS_H
+
+int main_bamstats(int argc, char *argv[]);
+
+
+#endif
diff --git a/src/lofreq/lofreq_call.c b/src/lofreq/lofreq_call.c
new file mode 100644
index 0000000..f9f8db2
--- /dev/null
+++ b/src/lofreq/lofreq_call.c
@@ -0,0 +1,1558 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+/*
+ * This file is partially based on samtools' bam_plcmd.c. Parts of
+ * code that look like they were written by a other-worldly wizard are
+ * Heng Li's.
+ *
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdlib.h>
+
+/* libbam includes */
+#include "htslib/faidx.h"
+#include "sam.h"
+#include "htslib/kstring.h"
+
+/* from bedidx.c */
+void *bed_read(const char *fn);
+void bed_destroy(void *_h);
+int bed_overlap(const void *_h, const char *chr, int beg, int end);
+
+
+
+/* lofreq includes */
+#include "snpcaller.h"
+#include "vcf.h"
+#include "fet.h"
+#include "utils.h"
+#include "log.h"
+#include "plp.h"
+#include "defaults.h"
+
+#if 1
+#define MYNAME "lofreq call"
+#else
+#define MYNAME PACKAGE
+#endif
+
+
+#define BUF_SIZE 1<<16
+
+
+/* number of tests performed (CONSVAR doesn't count). for downstream
+ * multiple testing correction. corresponds to bonf if bonf_dynamic is
+ * true. */
+long long int num_snv_tests = 0;
+long long int num_indel_tests = 0;
+/* FIXME extend to keep some more stats, e.g. num_pos_with_cov etc */
+
+long int indel_calls_wo_idaq = 0;
+
+
+/* variant reporter to be used for all types */
+void
+report_var(vcf_file_t *vcf_file, const plp_col_t *p, const char *ref,
+           const char *alt, const float af, const int qual,
+           const int is_indel, const int is_consvar,
+           const dp4_counts_t *dp4)
+{
+     var_t *var;
+     double sb_left_pv, sb_right_pv, sb_two_pv;
+     int sb_qual;
+
+     vcf_new_var(&var);
+     var->chrom = strdup(p->target);
+     var->pos = p->pos;
+
+     if (is_indel && ! p->has_indel_aqs) {
+          indel_calls_wo_idaq += 1;
+     }
+     /* var->id = NA */
+     var->ref = strdup(ref);
+     var->alt = strdup(alt);
+     if (qual>-1) {
+          var->qual = qual;
+     }
+     /* var->filter = NA */
+
+     /* strand bias
+      */
+     /* special case: if ref is entirely missing and we have alts on 
+        only one strand fisher's exact test will return 0, which is
+        most certainly not what we want */
+     if ((dp4->ref_fw + dp4->ref_rv)==0  && (dp4->alt_fw==0 || dp4->alt_rv==0)) {
+          sb_qual = INT_MAX;
+     } else {
+          /* double sb_prob = kt... Assignment removed to shut up clang static analyzer */
+          (void) kt_fisher_exact(dp4->ref_fw, dp4->ref_rv, dp4->alt_fw, dp4->alt_rv,
+                                 &sb_left_pv, &sb_right_pv, &sb_two_pv);
+          sb_qual = PROB_TO_PHREDQUAL_SAFE(sb_two_pv);
+     }
+     vcf_var_sprintf_info(var, is_indel? p->coverage_plp - p->num_tails : p->coverage_plp,
+                          af, sb_qual, dp4, is_indel, p->hrun, is_consvar);
+
+     vcf_write_var(vcf_file, var);
+     vcf_free_var(&var);
+}
+/* report_var() */
+
+
+#if 0
+/* report consensus substitution */
+void
+report_cons_sub(const plp_col_t *p, varcall_conf_t *conf){
+
+     const int is_indel = 0;
+     const int is_consvar = 1;
+     const int qual = -1;
+     char report_ref[2];
+     int ref_nt4;
+     int alt_nt4;
+     dp4_counts_t dp4;
+     float af = base_count(p, p->cons_base[0]) / (float)p->coverage_plp;
+     
+     report_ref[0] = p->ref_base;
+     report_ref[1] = '\0';
+     ref_nt4 = bam_nt4_table[(int)report_ref[0]];
+     alt_nt4 = bam_nt4_table[(int)p->cons_base[0]];
+
+     dp4.ref_fw = p->fw_counts[ref_nt4];
+     dp4.ref_rv = p->rv_counts[ref_nt4];
+     dp4.alt_fw = p->fw_counts[alt_nt4];
+     dp4.alt_rv = p->rv_counts[alt_nt4];
+
+
+     LOG_DEBUG("cons var snp: %s %d %c>%s\n",
+               p->target, p->pos+1, p->ref_base, p->cons_base);
+     report_var(& conf->vcf_out, p, report_ref, p->cons_base,
+                af, qual, is_indel, is_consvar, &dp4);
+}
+
+/* report consensus insertion */
+void
+report_cons_ins(const plp_col_t *p, varcall_conf_t *conf) {
+
+     const int is_indel = 1;
+     const int is_consvar = 1;
+     const int qual = -1;
+     char cons_ins_key[MAX_INDELSIZE];
+     ins_event *it_ins = NULL;
+     char report_ins_ref[2];
+     char report_ins_alt[MAX_INDELSIZE];
+     int ins_length;
+     int j;
+     float af;
+     dp4_counts_t dp4;
+
+     strncpy(cons_ins_key, p->cons_base+1, MAX_INDELSIZE-1);
+     it_ins = find_ins_sequence(&p->ins_event_counts, cons_ins_key);
+
+     ins_length = strlen(cons_ins_key);
+     report_ins_ref[0] = report_ins_alt[0] = p->ref_base;
+     for (j = 0; j <= ins_length; ++j) {
+          report_ins_alt[j+1] = cons_ins_key[j];
+     }
+     report_ins_ref[1] = report_ins_alt[j+1] = '\0';
+
+     af = it_ins->count / ((float)p->coverage_plp-p->num_tails);
+
+     dp4.ref_fw = p->non_ins_fw_rv[0];
+     dp4.ref_rv = p->non_ins_fw_rv[1];
+     dp4.alt_fw = it_ins->fw_rv[0];
+     dp4.alt_rv = it_ins->fw_rv[1];
+
+     LOG_DEBUG("Consensus insertion: %s %d %s>%s\n",
+               p->target, p->pos+1, report_ins_ref, report_ins_alt);
+     report_var(& conf->vcf_out, p, report_ins_ref, report_ins_alt,
+                af, qual, is_indel, is_consvar, &dp4);
+     return;
+}
+
+/* report consensus deletion */
+void
+report_cons_del(const plp_col_t *p, varcall_conf_t *conf) {
+
+     const int is_indel = 1;
+     const int is_consvar = 1;
+     const int qual = -1;
+     char report_del_ref[MAX_INDELSIZE];
+     char report_del_alt[2];
+     int j;
+     char cons_del_key[MAX_INDELSIZE];
+     del_event *it_del = NULL;
+     int del_length;
+     dp4_counts_t dp4;
+     float af;
+
+     strncpy(cons_del_key, p->cons_base+1, MAX_INDELSIZE-1);
+     it_del = find_del_sequence(&p->del_event_counts, cons_del_key);
+
+     del_length = strlen(cons_del_key);
+     report_del_ref[0] = report_del_alt[0] = p->ref_base;
+     for (j = 0; j <= del_length; ++j) {
+          report_del_ref[j+1] = cons_del_key[j];
+     }
+     report_del_ref[j+1] = report_del_alt[1] = '\0';
+
+     af = it_del->count / ((float)p->coverage_plp - p->num_tails);
+
+     dp4.ref_fw = p->non_del_fw_rv[0];
+     dp4.ref_rv = p->non_del_fw_rv[1];
+     dp4.alt_fw = it_del->fw_rv[0];
+     dp4.alt_rv = it_del->fw_rv[1];
+
+     LOG_DEBUG("Consensus deletion: %s %d %s>%s\n",
+               p->target, p->pos+1, report_del_ref, report_del_alt);
+     report_var(&conf->vcf_out, p, report_del_ref, report_del_alt,
+                af, qual, is_indel, is_consvar, &dp4);
+
+}
+#endif
+
+
+/* converts del event to reference and alt string representation.
+   ref and alt are allocated here and must be freed by user */
+void
+del_to_str(const del_event *it, const char refbase, 
+           char **refstr, char **altstr)
+{
+     int j;
+     int del_length = strlen(it->key);
+
+     if (((*refstr) = malloc((del_length+2) * sizeof(char)))==NULL) {
+          LOG_FATAL("%s\n", "memory allocation failed");
+          exit(1);
+     }
+     if (((*altstr) = malloc(2 * sizeof(char)))==NULL) {
+          LOG_FATAL("%s\n", "memory allocation failed");
+          exit(1);
+     }
+
+     (*refstr)[0] = (*altstr)[0] = refbase;
+     for (j = 0; j < del_length; ++j) {
+          (*refstr)[j+1] = it->key[j];
+     }
+     (*refstr)[j+1] = (*altstr)[1] = '\0';
+}
+
+
+/* converts ins event to reference and alt string representation.
+   ref and alt are allocated here and must be freed by user */
+void
+ins_to_str(const ins_event *it, const char refbase, 
+           char **refstr, char **altstr)
+{
+     int j;
+     int ins_length = strlen(it->key);
+
+     if (((*refstr) = malloc(2 * sizeof(char)))==NULL) {
+          LOG_FATAL("%s\n", "memory allocation failed");
+          exit(1);
+     }
+     if (((*altstr) = malloc((ins_length+2) * sizeof(char)))==NULL) {
+          LOG_FATAL("%s\n", "memory allocation failed");
+          exit(1);
+     }
+
+     (*refstr)[0] = (*altstr)[0] = refbase;
+     for (j = 0; j < ins_length; ++j) {
+          (*altstr)[j+1] = it->key[j];
+     }
+     (*refstr)[1] = (*altstr)[j+1] = '\0';     
+}
+
+int
+call_alt_ins(const plp_col_t *p, double *bi_err_probs, int bi_num_err_probs,
+             varcall_conf_t *conf, ins_event *it) {
+
+     int ins_counts[3];
+     long double bi_pvalues[3];
+
+     // prep for snpcaller
+     ins_counts[0] = it->count;
+     ins_counts[1] = ins_counts[2] = 0;
+     LOG_DEBUG("%s %d: passing down %d quals with noncons_ins_counts"
+               "(%d, %d, %d) to snpcaller()\n", p->target, p->pos+1,
+               bi_num_err_probs, ins_counts[0], ins_counts[1], ins_counts[2]);
+     // compute p-value for insertion
+     if (snpcaller(bi_pvalues, bi_err_probs, bi_num_err_probs, ins_counts,
+                   conf->bonf_indel, conf->sig)) {
+          fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          return 1;
+     }
+     // see if there was an insertion
+     long double bi_pvalue = bi_pvalues[0];
+     if (bi_pvalue*conf->bonf_indel < conf->sig) {
+          char *report_ins_ref;
+          char *report_ins_alt;
+          dp4_counts_t dp4;
+          const int is_indel = 1;
+          const int is_consvar = 0;
+          const int qual = PROB_TO_PHREDQUAL(bi_pvalue);
+          float af = it->count / ((float)p->coverage_plp - p->num_tails);
+
+          dp4.ref_fw = p->non_ins_fw_rv[0];
+          dp4.ref_rv = p->non_ins_fw_rv[1];
+          dp4.alt_fw = it->fw_rv[0];
+          dp4.alt_rv = it->fw_rv[1];
+
+          ins_to_str(it, p->ref_base, &report_ins_ref, &report_ins_alt);
+
+          LOG_DEBUG("Low freq insertion: %s %d %s>%s pv-prob:%Lg;pv-qual:%d\n",
+                    p->target, p->pos+1, report_ins_ref, report_ins_alt,
+                    bi_pvalue, qual);
+          report_var(&conf->vcf_out, p, report_ins_ref, report_ins_alt,
+                     af, qual, is_indel, is_consvar, &dp4);
+
+          free(report_ins_ref); free(report_ins_alt);
+     } 
+#if 0
+else if (debug) {
+          char *report_ins_ref;
+          char *report_ins_alt;
+          ins_to_str(it, p->ref_base, &report_ins_ref, &report_ins_alt);
+          LOG_DEBUG("insignificant ins: %s %d %s>%s pv-prob:%Lg;pv-qual:%d\n", p->target, p->pos+1, report_ins_ref, report_ins_alt, bi_pvalue, PROB_TO_PHREDQUAL(bi_pvalue));
+     }
+#endif
+     return 0;
+}
+
+int call_alt_del(const plp_col_t *p, double *bd_err_probs, int bd_num_err_probs,
+                 varcall_conf_t *conf, del_event *it) {
+
+     int del_counts[3];
+     long double bd_pvalues[3];
+
+     /* prep for snpcaller */
+     del_counts[0] = it->count;
+     del_counts[1] = del_counts[2] = 0;
+    
+#if 0 
+     int k;
+     for (k = 0; k < bd_num_err_probs; k++) {
+          LOG_DEBUG("bd_err_prob: %lg\n", bd_err_probs[k]);
+     }
+#endif
+     
+     LOG_DEBUG("%s %d: passing down %d quals with noncons_del_counts"
+               "(%d, %d, %d) to snpcaller()\n", p->target, p->pos+1,
+               bd_num_err_probs, del_counts[0], del_counts[1], del_counts[2]);
+
+     /* snpcaller for deletion */
+     if (snpcaller(bd_pvalues, bd_err_probs, bd_num_err_probs, del_counts,
+                   conf->bonf_indel, conf->sig)) {
+          fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          return 1;
+     }
+     /* compute p-value deletion */
+     long double bd_pvalue = bd_pvalues[0];
+     if (bd_pvalue*conf->bonf_indel < conf->sig) {
+          const int is_indel = 1;
+          const int is_consvar = 0;
+          const int qual = PROB_TO_PHREDQUAL(bd_pvalue);
+          char *report_del_ref;
+          char *report_del_alt;
+          float af = it->count / ((float)p->coverage_plp - p->num_tails);
+          dp4_counts_t dp4;
+
+          /* FIXME decision to use ref or cons made elsewhere or do we have to check again? */
+          del_to_str(it, p->ref_base, &report_del_ref, &report_del_alt);
+
+          dp4.ref_fw = p->non_del_fw_rv[0];
+          dp4.ref_rv = p->non_del_fw_rv[1];
+          dp4.alt_fw = it->fw_rv[0];
+          dp4.alt_rv = it->fw_rv[1];
+
+          LOG_DEBUG("Low freq deletion: %s %d %s>%s pv-prob:%Lg;pv-qual:%d\n",
+                    p->target, p->pos+1, report_del_ref, report_del_alt,
+                    bd_pvalue, qual);
+          report_var(&conf->vcf_out, p, report_del_ref, report_del_alt,
+                     af, qual, is_indel, is_consvar, &dp4);
+          free(report_del_ref);
+          free(report_del_alt);
+     } 
+#if 0
+else if (debug) {
+          char *report_del_ref;
+          char *report_del_alt;
+          del_to_str(it, p->ref_base, &report_del_ref, &report_del_alt);
+          LOG_DEBUG("delignificant del: %s %d %s>%s pv-prob:%Lg;pv-qual:%d\n", p->target, p->pos+1, report_del_ref, report_del_alt, bd_pvalue, PROB_TO_PHREDQUAL(bd_pvalue));
+     }
+#endif
+     return 0;
+}
+
+/* allocates bc_err_probs (to size bc_num_err_probs; also set here) and sets
+ * values. user must free.
+ *
+ * qualities are merged here and filtering also happens here
+ *
+ * alt_bases, alt_counts and alt_raw_counts must be pre-allocated and
+ * of size 3 and values will be set here (FIXME that makes the
+ * function call awkward)
+ */
+void
+plp_summary(const plp_col_t *plp_col, void* confp)
+{
+     FILE* stream = stdout;
+     varcall_conf_t *conf = (varcall_conf_t *)confp;
+     static const char* title[] = {"BQ", "BAQ", "MQ", "SQ"};
+     int i, x;
+
+     fprintf(stream, "%s\t%d\t%c\t%s", plp_col->target, plp_col->pos+1,
+             plp_col->ref_base, plp_col->cons_base);
+     for (i=0; i<NUM_NT4; i++) {
+          fprintf(stream, "\t%c:%lu/%lu",
+                  bam_nt4_rev_table[i],
+                  plp_col->fw_counts[i],
+                  plp_col->rv_counts[i]);
+     }
+
+     fprintf(stream, "\theads:%d\ttails:%d", plp_col->num_heads,
+             plp_col->num_tails);
+     fprintf(stream, "\tins:%d\tdels:%d", plp_col->num_ins,
+             plp_col->num_dels);
+     fprintf(stream, "\thrun:%d", plp_col->hrun);
+     fprintf(stream, "\n");
+     for (i=0; i<NUM_NT4; i++) {
+          int X = 3;/* bq, baq, mq */
+          if (conf->flag & VARCALL_USE_SQ) {
+               X = 4;/* bq, baq, mq, sq */
+          }
+          /* assuming we have base quals for all */
+          if (! plp_col->base_quals[i].n) {
+               continue;
+          }
+          for (x=0; x<X; x++) {
+               int j;
+               int nt = bam_nt4_rev_table[i];
+               fprintf(stream, "  %c\t%s =\t", nt, title[x]);
+               /* assuming we have base quals for all */
+               for (j=0; j<plp_col->base_quals[i].n; j++) {
+                    int q = -1;
+                    if (x==0) {
+                         q = plp_col->base_quals[i].data[j];
+                    } else if (x==1 && conf->flag & VARCALL_USE_BAQ) {
+                         q = plp_col->baq_quals[i].data[j];
+                    } else if (x==2) {
+                         q = plp_col->map_quals[i].data[j];
+                    } else if (x==3) {
+                         q = plp_col->source_quals[i].data[j];
+                    }
+                    fprintf(stream, " %d", q);
+               }
+               fprintf(stream, "\n");
+          }
+     }
+
+     /* indels
+      */
+     {
+          char *types = "+-"; char *t;
+          for (t=types; *t!='\0'; t++) {
+               int idq, aq, mq, sq, i;
+               const int_varray_t *id_quals = NULL;
+               const int_varray_t *id_mquals = NULL;
+               ins_event *ins_it, *ins_it_tmp;
+               del_event *del_it, *del_it_tmp;
+
+               /* non-indel qualities first 
+                */
+               if (*t=='+') {
+                    /*fprintf(stream, "  INS events & (non-ins) qualities: %d & %lu\n", 
+                      plp_col->num_ins, plp_col->ins_quals.n);*/
+                    id_quals = & plp_col->ins_quals;
+                    id_mquals = & plp_col->ins_map_quals;
+               } else if (*t=='-') {
+                    /*fprintf(stream, "  DEL events & (non-del) qualities: %d & %lu\n", 
+                      plp_col->num_dels, plp_col->del_quals.n);*/
+                    id_quals = & plp_col->del_quals;
+                    id_mquals = & plp_col->del_map_quals;
+               } else {
+                    LOG_FATAL("%s\n", "Should never get here");
+                    exit(1);
+               }
+               fprintf(stream, "  %c0\tIDQ =\t", *t);
+               for (i = 0; i < id_quals->n; i++) {
+                    idq = id_quals->data[i];
+                    fprintf(stream, " %d", idq);
+               }
+               fprintf(stream, "\n");
+               fprintf(stream, "  %c0\tMQ =\t", *t);
+               for (i = 0; i < id_mquals->n; i++) {
+                    mq = id_mquals->data[i];
+                    fprintf(stream, " %d", mq);
+               }
+               fprintf(stream, "\n");
+
+               /* now the actual indels
+                */
+               if (*t=='+') {
+                    /* WARN copy below for dels */
+                    HASH_ITER(hh_ins, plp_col->ins_event_counts, ins_it, ins_it_tmp) {
+                         fprintf(stream, "  %c%s\tIQ =\t", *t, ins_it->key);
+                         for (i = 0; i < ins_it->ins_quals.n; i++) {
+                              idq = ins_it->ins_quals.data[i];
+                              fprintf(stream, " %d", idq);
+                         }
+                         fprintf(stream, "\n");
+                         
+                         fprintf(stream, "  %c%s\tMQ =\t", *t,  ins_it->key);
+                         for (i = 0; i < ins_it->ins_quals.n; i++) {
+                              mq = ins_it->ins_map_quals.data[i];
+                              fprintf(stream, " %d", mq);
+                         }
+                         fprintf(stream, "\n");
+                         
+                         fprintf(stream, "  %c%s\tAQ =\t",  *t, ins_it->key);
+                         for (i = 0; i < ins_it->ins_quals.n; i++) {
+                              aq = ins_it->ins_aln_quals.data[i];
+                              fprintf(stream, " %d", aq);
+                         }
+                         fprintf(stream, "\n");
+                         
+                         fprintf(stream, "  %c%s\tSQ =\t", *t, ins_it->key);
+                         for (i = 0; i < ins_it->ins_quals.n; i++) {
+                              sq = ins_it->ins_source_quals.data[i];
+                              fprintf(stream, " %d", sq);
+                         }
+                         fprintf(stream, "\n");
+                    }
+               } else if (*t=='-') {
+                    /* WARN copy above for dels */
+                    HASH_ITER(hh_del, plp_col->del_event_counts, del_it, del_it_tmp) {
+                         fprintf(stream, "  %c%s\tIDQ =\t", *t, del_it->key);
+                         for (i = 0; i < del_it->del_quals.n; i++) {
+                              idq = del_it->del_quals.data[i];
+                              fprintf(stream, " %d", idq);
+                         }
+                         fprintf(stream, "\n");
+                         
+                         fprintf(stream, "  %c%s\tMQ =\t", *t,  del_it->key);
+                         for (i = 0; i < del_it->del_quals.n; i++) {
+                              mq = del_it->del_map_quals.data[i];
+                              fprintf(stream, " %d", mq);
+                         }
+                         fprintf(stream, "\n");
+                         
+                         fprintf(stream, "  %c%s\tAQ =\t",  *t, del_it->key);
+                         for (i = 0; i < del_it->del_quals.n; i++) {
+                              aq = del_it->del_aln_quals.data[i];
+                              fprintf(stream, " %d", aq);
+                         }
+                         fprintf(stream, "\n");
+                         
+                         fprintf(stream, "  %c%s\tSQ =\t", *t, del_it->key);
+                         for (i = 0; i < del_it->del_quals.n; i++) {
+                              sq = del_it->del_source_quals.data[i];
+                              fprintf(stream, " %d", sq);
+                         }
+                         fprintf(stream, "\n");
+                    }                  
+               }
+          }
+     }
+     fprintf(stream, "\n");
+}
+
+void
+warn_old_fai(const char *fa)
+{
+     char *fai;
+     if (!fa || fa[0]=='\0') {
+          return;
+     }
+
+     fai = (char*) calloc(strlen(fa) + 5, 1);
+     sprintf(fai, "%s.fai", fa);
+     if (is_newer(fa, fai)==1) {
+          LOG_WARN("Index for fasta file (%s) is older than fasta file! You should reindex (using faidx)!\n", fai);
+     }
+     free(fai);
+}
+
+
+void 
+call_indels(const plp_col_t *p, varcall_conf_t *conf)
+{
+
+     double *bi_err_probs, *bd_err_probs; /* error probs for indel calling */
+     int bi_num_err_probs, bd_num_err_probs;
+     int ign_indels[NUM_NT4] = {0};
+
+     if (p->num_non_indels + p->num_ins + p->num_dels < conf->min_cov) {
+          return;
+     }
+
+#if 0
+      /* Report consensus indel
+       * FIXME: call other indels/substitutions with respect to consensus indel */
+      if (p->cons_base[0] == '+') {
+           report_cons_ins(p, conf);
+           return;
+      }
+      if (p->cons_base[0] == '-') {
+           report_cons_del(p, conf);
+           return;
+      }
+#endif
+
+      /* Multiallelic, low AF, 1bp indels with pattern XY>X and X>XY
+       * (see e.g. ecoli spike-in) where Y is T or A are filtered here. Seem to happen because 
+       * of overestimated indel qual in Illumina homopolyer stretches 
+       *
+       * FIXME make switch
+       */
+      if (p->num_ins && p->ins_quals.n && p->num_dels && p->del_quals.n) {
+           const float max_af = 0.05;
+           ins_event *ins_ev, *ins_ev_tmp;
+           del_event *del_ev, *del_ev_tmp;
+           /* counts of observed 1-base indels */
+           int ins_dict[NUM_NT4] = {0};
+           int del_dict[NUM_NT4] = {0};
+           int i;
+           const char at[] = "AT\0";
+
+           HASH_ITER(hh_ins, p->ins_event_counts, ins_ev, ins_ev_tmp) {
+                /*LOG_FIXME("ins: %s count=%d fw/rv=%d/%d\n", ins_ev->key, ins_ev->count, ins_ev->fw_rv[0], ins_ev->fw_rv[1]);*/
+                if (strlen(ins_ev->key)==1 && strchr(at, ins_ev->key[0])!=NULL) {
+                     ins_dict[bam_nt4_table[(int)ins_ev->key[0]]] = ins_ev->count;
+                }
+           }
+           HASH_ITER(hh_del, p->del_event_counts, del_ev, del_ev_tmp) {
+                /*LOG_FIXME("del: %s count=%d fw/rv=%d/%d\n", del_ev->key, del_ev->count, del_ev->fw_rv[0], del_ev->fw_rv[1]);*/
+                if (strlen(del_ev->key)==1 && strchr(at, del_ev->key[0])!=NULL) {
+                     del_dict[bam_nt4_table[(int)del_ev->key[0]]] = del_ev->count;
+                }
+           }
+           for (i=0; i<NUM_NT4; i++) {
+                if (ins_dict[i] && del_dict[i]) {
+                     float ins_af = ins_dict[i]/((float)(p->coverage_plp - p->num_tails));
+                     float del_af = del_dict[i]/((float)(p->coverage_plp - p->num_tails));
+                     if (ins_af<max_af && del_af<max_af) {
+                          LOG_DEBUG("Ignoring multi-allelic XY>X:X>XY indel of low AF next to Poly-AT at %s:%d\n", p->target, p->pos+1);
+                          ign_indels[i] = 1;
+                     }
+                }
+           }
+      }      
+
+      /*if (p->num_ins && p->ins_quals.n) { FIXME check for ins_quals.n breaks if 100% consvar. why was this needed? see also del */
+      if (p->num_ins) {
+           ins_event *it, *it_tmp;
+           HASH_ITER(hh_ins, p->ins_event_counts, it, it_tmp) {
+                if (strlen(it->key)==1 && ign_indels[bam_nt4_table[(int)it->key[0]]]) {
+                     continue;
+                }
+                plp_to_ins_errprobs(&bi_err_probs, &bi_num_err_probs,
+                                    p, conf, it->key);
+                qsort(bi_err_probs, bi_num_err_probs, sizeof(double), dbl_cmp);
+                if (conf->bonf_dynamic) {
+                     conf->bonf_indel += 1;
+                }
+                num_indel_tests += 1;
+                if (call_alt_ins(p, bi_err_probs, bi_num_err_probs, conf, it)) {
+                     free(bi_err_probs);
+                     return;
+                }
+                free(bi_err_probs);
+           }
+      }
+
+      /*if (p->num_dels && p->del_quals.n) { FIXME check for del_quals.n breaks if 100% consvar. why was this needed? see also ins */
+      if (p->num_dels) {
+           del_event *it, *it_tmp;
+           HASH_ITER(hh_del, p->del_event_counts, it, it_tmp) {
+                if (strlen(it->key)==1 && ign_indels[bam_nt4_table[(int)it->key[0]]]) {
+                     continue;
+                }
+                plp_to_del_errprobs(&bd_err_probs, &bd_num_err_probs,
+                                    p, conf, it->key);
+                qsort(bd_err_probs, bd_num_err_probs, sizeof(double), dbl_cmp);
+                if (conf->bonf_dynamic) {
+                     conf->bonf_indel += 1;
+                }
+                num_indel_tests += 1;
+                if (call_alt_del(p, bd_err_probs, bd_num_err_probs, conf, it)) {
+                     free(bd_err_probs);
+                     return;
+                }
+                free(bd_err_probs);
+           }
+      }
+}
+
+
+/* we always use the reference for calculating a quality.
+ * previous versions used the consensus and reported the
+ * consensus as CONSVAR without quality
+ *
+ */
+void
+call_snvs(const plp_col_t *p, varcall_conf_t *conf)
+{
+     double *bc_err_probs; /* error probs (qualities) passed down to snpcaller */
+     int bc_num_err_probs; /* #elements in bc_err_probs */
+     int i;
+     /* 4 bases ignoring N, -1 reference/consensus base makes 3 */
+     long double pvalues[NUM_NONCONS_BASES]; /* pvalues reported back from snpcaller */
+     int alt_counts[NUM_NONCONS_BASES]; /* counts for alt bases handed down to snpcaller */
+     int alt_raw_counts[NUM_NONCONS_BASES]; /* raw, unfiltered alt-counts */
+     int alt_bases[NUM_NONCONS_BASES];/* actual alt bases */
+     int got_alt_bases = 0;
+
+     if (p->num_bases < conf->min_cov) {
+          return;
+     }
+
+      /* Ns would in theory work as ref. However, downstream functions e.g. plp_to_errprobs
+      * don't support it
+      */
+     if (p->ref_base == 'N') {
+          return;
+     }
+
+      plp_to_errprobs(&bc_err_probs, &bc_num_err_probs,
+                      alt_bases, alt_counts, alt_raw_counts,
+                      p, conf);
+
+#if 0
+      for (i=0; i<NUM_NONCONS_BASES; i++) {
+           LOG_FIXME("NUM_NONCONS_BASES=%d alt_counts=%d alt_raw_counts=%d\n", i, alt_counts[i], alt_raw_counts[i]);
+      }
+#endif
+
+      for (i=0; i<NUM_NONCONS_BASES; i++) {
+           if (alt_counts[i]) {
+                got_alt_bases = 1;
+                break;
+           }
+      }
+      if (! got_alt_bases) {
+           LOG_DEBUG("%s %d: only cons bases left after filtering.\n",
+                     p->target, p->pos+1);
+           /* ...and CONSVAR already reported */
+           free(bc_err_probs);
+           return;
+      }
+
+      /* sorting in ascending order should in theory be numerically
+       * more stable and also make snpcaller faster */
+      qsort(bc_err_probs, bc_num_err_probs, sizeof(double), dbl_cmp);
+
+ #ifdef TRACE
+      {
+           int i=0;
+           for (i=0; i<bc_num_err_probs; i++) {
+                LOG_FATAL("after sorting i=%d err_prob=%g\n", i, bc_err_probs[i]);
+           }
+      }
+ #endif
+      if (conf->bonf_dynamic) {
+           if (1 == conf->bonf_subst) {
+                conf->bonf_subst = NUM_NONCONS_BASES; /* otherwise we start with 1+NUM_NONCONS_BASES */
+           } else {
+                conf->bonf_subst += NUM_NONCONS_BASES; /* will do one test per non-cons nuc */
+           }
+      }
+      num_snv_tests += NUM_NONCONS_BASES;
+
+      LOG_DEBUG("%s %d: passing down %d quals with noncons_counts"
+                " (%d, %d, %d) to snpcaller(num_snv_tests=%lld conf->bonf=%lld, conf->sig=%f)\n", p->target, p->pos+1,
+                bc_num_err_probs, alt_counts[0], alt_counts[1], alt_counts[2], num_snv_tests, conf->bonf_subst, conf->sig);
+
+      if (snpcaller(pvalues, bc_err_probs, bc_num_err_probs,
+                   alt_counts, conf->bonf_subst, conf->sig)) {
+           fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n",
+                   __FILE__, __FUNCTION__, __LINE__);
+           free(bc_err_probs);
+           return;
+      }
+
+      /* for all alt-bases, i.e. non-cons bases (which might include
+       * the ref-base!) */
+      for (i=0; i<NUM_NONCONS_BASES; i++) {
+           int alt_base = alt_bases[i];
+           int alt_count = alt_counts[i];
+           int alt_raw_count = alt_raw_counts[i];
+           long double pvalue = pvalues[i];
+           int reported_snv_ref = p->ref_base;
+
+           if (alt_base==reported_snv_ref) { 
+                /* self comparison */
+#if DEBUG
+                LOG_DEBUG("%s\n", "continue because self comparison")
+#endif
+                continue;
+           }
+
+           if (pvalue * (double)conf->bonf_subst < conf->sig) {
+                const int is_indel = 0;
+                const int is_consvar = 0;
+                float af = alt_raw_count/(float)p->coverage_plp;
+
+                char report_ref[2];
+                char report_alt[2];
+                report_ref[0] = reported_snv_ref;
+                report_alt[0] = alt_base;
+                report_ref[1] = report_alt[1] = '\0';
+
+                int ref_nt4;
+                int alt_nt4;
+                ref_nt4 = bam_nt4_table[(int)report_ref[0]];
+                alt_nt4 = bam_nt4_table[(int)report_alt[0]];
+
+                dp4_counts_t dp4;
+                dp4.ref_fw = p->fw_counts[ref_nt4];
+                dp4.ref_rv = p->rv_counts[ref_nt4];
+                dp4.alt_fw = p->fw_counts[alt_nt4];
+                dp4.alt_rv = p->rv_counts[alt_nt4];
+
+                report_var(& conf->vcf_out, p, report_ref, report_alt,
+                           af, PROB_TO_PHREDQUAL(pvalue),
+                           is_indel, is_consvar, &dp4);
+                LOG_DEBUG("low freq snp: %s %d %c>%c pv-prob:%Lg;pv-qual:%d"
+                          " counts-raw:%d/%d=%.6f counts-filt:%d/%d=%.6f\n",
+                          p->target, p->pos+1, p->cons_base[0], alt_base,
+                          pvalue, PROB_TO_PHREDQUAL(pvalue),
+                          /* counts-raw */ alt_raw_count, p->coverage_plp, alt_raw_count/(float)p->coverage_plp,
+                          /* counts-filt */ alt_count, bc_num_err_probs, alt_count/(float)bc_num_err_probs);
+           }
+#if 0
+           else {
+                LOG_DEBUG("non sig: pvalue=%Lg * (double)conf->bonf=%lld < conf->sig=%f\n", pvalue, conf->bonf, conf->sig);
+           }
+#endif
+      }
+      free(bc_err_probs);
+}
+
+
+/* Assuming conf->min_bq and read-level filtering was already done
+ * upstream. altbase mangling happens here however.
+ *
+ */
+void
+call_vars(const plp_col_t *p, void *confp)
+{
+     varcall_conf_t *conf = (varcall_conf_t *)confp;
+
+     /* don't call if we don't know what to call against */
+     if (p->ref_base == 'N') {
+          return;
+     }
+
+     if (! conf->no_indels) {
+          call_indels(p, conf);
+     }
+
+     /* call snvs
+      */
+     /* don't call snvs if indels only or consensus indel (the latter is in theory
+      * possible but has messy downstream effects). in some cases we might not have
+      * an official indel consensus (AQ, BI/BD missing). Catch those by simply
+      * not calling anyhthing if the indel coverage is higher than the
+      * 'substitution' coverage
+      */
+#if 0
+     LOG_FIXME("%s:%d: p->del_quals.n=%d p->ins_quals.n=%d p->num_dels=%d p->num_ins=%d p->num_ign_indels=%d p->num_bases=%d p->cov=%d\n", 
+               p->target, p->pos+1,
+               p->del_quals.n, p->ins_quals.n,
+               p->num_dels, p->num_ins, p->num_ign_indels, p->num_bases, p->coverage_plp);
+#endif
+
+     /* don't call snvs on consensus indels. problem is we might not know there
+      * is one because indel qualities could be missing and we therefore didn't record
+      * anything etc. safest and easiest hack is to look at the
+      * difference between coverage and the number of bases (which might not work 
+      * if many bases were filtered)
+      *
+      * FIXME overhaul
+      */
+     if (! conf->only_indels && \
+         ! (p->cons_base[0] == '+' || p->cons_base[0] == '-') && \
+         ! (p->num_bases*2 < p->coverage_plp)) {
+          call_snvs(p, conf);
+     }
+
+}
+/* call_vars() */
+
+
+
+static void
+usage(const mplp_conf_t *mplp_conf, const varcall_conf_t *varcall_conf)
+{
+     fprintf(stderr, "%s: call variants from BAM file\n\n", MYNAME);
+
+     fprintf(stderr, "Usage: %s [options] in.bam\n\n", MYNAME);
+     fprintf(stderr, "Options:\n");
+
+     fprintf(stderr, "- Reference:\n");
+     fprintf(stderr, "       -f | --ref FILE              Indexed reference fasta file (gzip supported) [null]\n");
+
+     fprintf(stderr, "- Output:\n");
+     fprintf(stderr, "       -o | --out FILE              Vcf output file [- = stdout]\n");
+
+     fprintf(stderr, "- Regions:\n");
+     fprintf(stderr, "       -r | --region STR            Limit calls to this region (chrom:start-end) [null]\n");
+     fprintf(stderr, "       -l | --bed FILE              List of positions (chr pos) or regions (BED) [null]\n");
+
+     fprintf(stderr, "- Base-call quality:\n");
+     fprintf(stderr, "       -q | --min-bq INT            Skip any base with baseQ smaller than INT [%d]\n", varcall_conf->min_bq);
+     fprintf(stderr, "       -Q | --min-alt-bq INT        Skip alternate bases with baseQ smaller than INT [%d]\n", varcall_conf->min_alt_bq);
+     fprintf(stderr, "       -R | --def-alt-bq INT        Overwrite baseQs of alternate bases (that passed bq filter) with this value (-1: use median ref-bq; 0: keep) [%d]\n", varcall_conf->def_alt_bq);
+
+     fprintf(stderr, "       -j | --min-jq INT            Skip any base with joinedQ smaller than INT [%d]\n", varcall_conf->min_jq);
+     fprintf(stderr, "       -J | --min-alt-jq INT        Skip alternate bases with joinedQ smaller than INT [%d]\n", varcall_conf->min_alt_jq);
+     fprintf(stderr, "       -K | --def-alt-jq INT        Overwrite joinedQs of alternate bases (that passed jq filter) with this value (-1: use median ref-bq; 0: keep) [%d]\n", varcall_conf->def_alt_jq);
+
+     fprintf(stderr, "- Base-alignment (BAQ) and indel-aligment (IDAQ) qualities:\n");
+     fprintf(stderr, "       -B | --no-baq                Disable use of base-alignment quality (BAQ)\n");
+     fprintf(stderr, "       -A | --no-idaq               Don't use IDAQ values (NOT recommended under ANY circumstances other than debugging)\n");
+     fprintf(stderr, "       -D | --del-baq               Delete pre-existing BAQ values, i.e. compute even if already present in BAM\n");
+     fprintf(stderr, "       -e | --no-ext-baq            Use 'normal' BAQ (samtools default) instead of extended BAQ (both computed on the fly if not already present in %s tag)\n", BAQ_TAG);
+     fprintf(stderr, "- Mapping quality:\n");
+     fprintf(stderr, "       -m | --min-mq INT            Skip reads with mapping quality smaller than INT [%d]\n", mplp_conf->min_mq);
+     fprintf(stderr, "       -M | --max-mq INT            Cap mapping quality at INT [%d]\n", mplp_conf->max_mq);
+     fprintf(stderr, "       -N | --no-mq                 Don't merge mapping quality in LoFreq's model\n");
+
+     fprintf(stderr, "- Indels:\n");
+     fprintf(stderr, "            --call-indels           Enable indel calls (note: preprocess your file to include indel alignment qualities!)\n");
+     fprintf(stderr, "            --only-indels           Only call indels; no SNVs\n");
+
+     fprintf(stderr, "- Source quality:\n");
+     fprintf(stderr, "       -s | --src-qual              Enable computation of source quality\n");
+     fprintf(stderr, "       -S | --ign-vcf FILE          Ignore variants in this vcf file for source quality computation. Multiple files can be given separated by commas\n"),
+     fprintf(stderr, "       -T | --def-nm-q INT          If >= 0, then replace non-match base qualities with this default value [%d]\n", mplp_conf->def_nm_q);
+
+     fprintf(stderr, "- P-values:\n");
+     fprintf(stderr, "       -a | --sig                   P-Value cutoff / significance level [%f]\n", varcall_conf->sig);
+     fprintf(stderr, "       -b | --bonf                  Bonferroni factor. 'dynamic' (increase per actually performed test) or INT ['dynamic']\n");
+
+     fprintf(stderr, "- Misc.:\n");
+     fprintf(stderr, "       -C | --min-cov INT           Test only positions having at least this coverage [%d]\n", varcall_conf->min_cov);
+     fprintf(stderr, "                                    (note: without --no-default-filter default filters (incl. coverage) kick in after predictions are done)\n");
+     fprintf(stderr, "       -d | --max-depth INT         Cap coverage at this depth [%d]\n", mplp_conf->max_depth);
+     fprintf(stderr, "            --illumina-1.3          Assume the quality is Illumina-1.3-1.7/ASCII+64 encoded\n");
+     fprintf(stderr, "            --use-orphan            Count anomalous read pairs (i.e. where mate is not aligned properly)\n");
+     fprintf(stderr, "            --plp-summary-only      No variant calling. Just output pileup summary per column\n");
+     fprintf(stderr, "            --no-default-filter     Don't run default 'lofreq filter' automatically after calling variants\n");
+     fprintf(stderr, "            --verbose               Be verbose\n");
+     fprintf(stderr, "            --debug                 Enable debugging\n");
+}
+/* usage() */
+
+
+int
+main_call(int argc, char *argv[])
+{
+     /* based on bam_mpileup() */
+     int c, i;
+     static int use_orphan = 0;
+     static int only_indels = 0;
+     static int no_indels = 1;
+
+     static int plp_summary_only = 0;
+     static int no_default_filter = 0;
+     static int illumina_1_3 = 0;
+     char *bam_file = NULL;
+     char *bed_file = NULL;
+     char *vcf_out = NULL; /* == - == stdout */
+     char vcf_tmp_template[] = "/tmp/lofreq2-call-dyn-bonf.XXXXXX";
+     char *vcf_tmp_out = NULL; /* write to this file first, then filter */
+     mplp_conf_t mplp_conf;
+     varcall_conf_t varcall_conf;
+     /*void (*plp_proc_func)(const plp_col_t*, const varcall_conf_t*);*/
+     void (*plp_proc_func)(const plp_col_t*, void*);
+     int rc = 0;
+     char *ign_vcf = NULL;
+
+
+/* FIXME add sens test:
+construct p such with
+quality_range = [20, 25, 30, 35, 40]
+coverage_range = [10, 50, 100, 500, 1000, 5000, 10000]
+refbase = 'A'
+snpbase = 'C'
+for cov in coverage_range:
+    for q in quality_range:
+        num_noncons = 1
+        while True:
+            void call_snvs(const plp_col_t *p, &varcall_conf);
+            count snvs in output
+            if len(snps):
+                print num_noncons
+                break
+            num_noncons += 1
+            if num_noncons == cov:
+                break
+*/
+
+
+     for (i=0; i<argc; i++) {
+          LOG_DEBUG("arg %d: %s\n", i, argv[i]);
+     }
+
+     /* default pileup options */
+     init_mplp_conf(& mplp_conf);
+
+     /* default snvcall options */
+     init_varcall_conf(& varcall_conf);
+
+    /* keep in sync with long_opts_str and usage
+     *
+     * getopt is a pain in the whole when it comes to syncing of long
+     * and short args and usage. check out gopt, libcfu...
+     */
+    while (1) {
+         static struct option long_opts[] = {
+              /* see usage sync */
+              {"region", required_argument, NULL, 'r'},
+              {"bed", required_argument, NULL, 'l'}, /* changes here must be reflected in pseudo_parallel code as well */
+
+              {"ref", required_argument, NULL, 'f'},
+              {"call-indels", no_argument, &no_indels, 0},
+              {"only-indels", no_argument, &only_indels, 1},
+
+              {"out", required_argument, NULL, 'o'}, /* NOTE changes here must be reflected in pseudo_parallel code as well */
+
+              {"min-bq", required_argument, NULL, 'q'},
+              {"min-alt-bq", required_argument, NULL, 'Q'},
+              {"def-alt-bq", required_argument, NULL, 'R'},
+
+              {"min-jq", required_argument, NULL, 'j'},
+              {"min-alt-jq", required_argument, NULL, 'J'},
+              {"def-alt-jq", required_argument, NULL, 'K'},
+              {"del-baq", no_argument, NULL, 'D'},
+              {"no-ext-baq", no_argument, NULL, 'e'},
+              {"no-baq", no_argument, NULL, 'B'},
+              {"no-indel-aq", no_argument, NULL, 'A'},
+
+              {"min-mq", required_argument, NULL, 'm'},
+              {"max-mq", required_argument, NULL, 'M'},
+              {"no-mq", no_argument, NULL, 'N'},
+              {"src-qual", no_argument, NULL, 's'},
+              {"ign-vcf", required_argument, NULL, 'S'},
+              {"def-nm-q", required_argument, NULL, 'T'},
+              {"sig", required_argument, NULL, 'a'},
+              {"bonf", required_argument, NULL, 'b'}, /* NOTE changes here must be reflected in pseudo_parallel code as well */
+
+              {"min-cov", required_argument, NULL, 'C'},
+              {"maxdepth", required_argument, NULL, 'd'},
+
+              {"illumina-1.3", no_argument, &illumina_1_3, 1},
+              {"use-orphan", no_argument, &use_orphan, 1},
+              {"plp-summary-only", no_argument, &plp_summary_only, 1},
+              {"no-default-filter", no_argument, &no_default_filter, 1},
+              {"verbose", no_argument, &verbose, 1},
+              {"debug", no_argument, &debug, 1},
+              {"help", no_argument, NULL, 'h'},
+
+              {0, 0, 0, 0} /* sentinel */
+         };
+
+         /* keep in sync with long_opts and usage */
+         static const char *long_opts_str = "r:l:f:o:q:Q:R:j:J:K:DeBAm:M:NsS:T:a:b:C:d:h";
+         /* getopt_long stores the option index here. */
+         int long_opts_index = 0;
+         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
+                         long_opts_str, long_opts, & long_opts_index);
+         if (c == -1) {
+              break;
+         }
+
+         switch (c) {
+         /* see usage sync */
+         case 'r':
+              mplp_conf.reg = strdup(optarg);
+              /* FIXME you can enter lots of invalid stuff and libbam
+               * won't complain. add checks here or late */
+              break;
+
+         case 'l':
+              bed_file = strdup(optarg);
+              break;
+
+         case 'f':
+              if (! file_exists(optarg)) {
+                   LOG_FATAL("Reference fasta file '%s' does not exist. Exiting...\n", optarg);
+                   return 1;
+              }
+              mplp_conf.fa = strdup(optarg);
+              mplp_conf.fai = fai_load(optarg);
+              if (mplp_conf.fai == 0)  {
+                   free(mplp_conf.fa);
+                   return 1;
+              } else {
+                   /* if this was create with GATK (version?) then fai structure is different. 
+                      htslib happily parses it anyway but it's member values are all wrong (most
+                      telling offset etc). accessing them here for a check is tricky. easiest is
+                      to use API and check whether all length are identical which is another indicator */
+                   faidx_t *fai = mplp_conf.fai;
+                   int i;
+                   int all_same_len = 1;
+                   int prev_len = -1;
+                   for (i=0; i< faidx_nseq(fai); i++) {
+                        int cur_len = faidx_seq_len(fai, faidx_iseq(fai, i));
+                        if (i) {
+                             if (prev_len != cur_len) {
+                                  all_same_len = 0;
+                                  break;
+                             }
+                        }
+                        prev_len = cur_len;
+                   }
+                   /* only seen in human cases */
+                   if (i>20 && i<200 && all_same_len) {
+                        LOG_FATAL("Fasta index looks weird. Please try reindexing. Exiting...\n");
+                        return 1;
+                   }
+              }
+              warn_old_fai(mplp_conf.fa);
+              break;
+
+         case 'o':
+              if (0 != strcmp(optarg, "-")) {
+                   if (file_exists(optarg)) {
+                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                        return 1;
+                   }
+              }
+              vcf_out = strdup(optarg);
+              break;
+
+         case 'q':
+              varcall_conf.min_bq = atoi(optarg);
+              break;
+
+         case 'Q':
+              varcall_conf.min_alt_bq = atoi(optarg);
+              break;
+
+         case 'R':
+              varcall_conf.def_alt_bq = atoi(optarg);
+              break;
+
+         case 'j':
+              varcall_conf.min_jq = atoi(optarg);
+              break;
+
+         case 'J':
+              varcall_conf.min_alt_jq = atoi(optarg);
+              break;
+
+         case 'K':
+              varcall_conf.def_alt_jq = atoi(optarg);
+              if (-1 == varcall_conf.def_alt_jq) {
+                   LOG_FATAL("%s\n", "Sorry, use of median ref JQ implemented yet");/* FIXME */
+                   exit(1);
+              }
+              break;
+
+         case 'D':
+              mplp_conf.flag |= MPLP_REDO_BAQ;
+              break;
+
+         case 'e':
+              mplp_conf.flag &= ~MPLP_EXT_BAQ;
+              break;
+
+         case 'B':
+              mplp_conf.flag &= ~MPLP_BAQ;
+              varcall_conf.flag &= ~VARCALL_USE_BAQ;
+              break;
+
+         case 'A':
+              varcall_conf.flag &= ~VARCALL_USE_IDAQ;
+              mplp_conf.flag &= ~MPLP_IDAQ;
+              break;
+
+         case 'm':
+              mplp_conf.min_mq = atoi(optarg);
+              break;
+
+         case 'M':
+              mplp_conf.max_mq = atoi(optarg);
+              break;
+
+         case 'N':
+              varcall_conf.flag &= ~VARCALL_USE_MQ;
+              break;
+
+         case 's':
+              mplp_conf.flag |= MPLP_USE_SQ;
+              varcall_conf.flag |= VARCALL_USE_SQ;
+              break;
+
+         case 'S':
+              ign_vcf = strdup(optarg);
+              break;
+
+         case 'T':
+              mplp_conf.def_nm_q = atoi(optarg);
+              break;
+
+         case 'a':
+              varcall_conf.sig = strtof(optarg, (char **)NULL); /* atof */
+              if (0==varcall_conf.sig) {
+                   LOG_FATAL("%s\n", "Couldn't parse sign-threshold");
+                   return 1;
+              }
+              break;
+         case 'b':
+              if (0 == strncmp(optarg, "dynamic", 7)) {
+                   varcall_conf.bonf_dynamic = 1;
+
+              } else {
+                   varcall_conf.bonf_dynamic = 0;
+
+                   varcall_conf.bonf_subst = strtoll(optarg, (char **)NULL, 10); /* atol */
+                   if (1>varcall_conf.bonf_subst) {
+                        LOG_FATAL("%s\n", "Couldn't parse Bonferroni factor");
+                        return 1;
+                   }
+              }
+              break;
+
+         case 'C':
+              varcall_conf.min_cov = atoi(optarg);
+              break;
+
+         case 'd':
+              mplp_conf.max_depth = atoi(optarg);
+              break;
+
+         case 'h':
+              usage(& mplp_conf, & varcall_conf);
+              return 0; /* WARN: not printing defaults if some args where parsed */
+
+         case '?':
+              LOG_FATAL("%s\n", "unrecognized arguments found. Exiting...\n");
+              free(bed_file);
+              free(vcf_out);
+              return 1;
+#if 0
+         case 0:
+              fprintf(stderr, "ERROR: long opt (%s) not mapping to short option."
+                      " Exiting...\n", long_opts[long_opts_index].name);
+              return 1;
+#endif
+         default:
+              break;
+         }
+    }
+
+
+    varcall_conf.no_indels = no_indels;
+    varcall_conf.only_indels = only_indels;
+#ifdef DISABLE_INDELS
+    varcall_conf.no_indels = 1;
+#endif
+    /* if indels are not to be called, switch off idaq computation to
+     * save some time */
+    if (varcall_conf.no_indels) {
+         varcall_conf.flag &= ~VARCALL_USE_IDAQ;
+         mplp_conf.flag &= ~MPLP_IDAQ;
+    }
+
+    if (illumina_1_3) {
+         mplp_conf.flag |= MPLP_ILLUMINA13;
+    }
+
+    if (use_orphan) {
+         mplp_conf.flag &= ~MPLP_NO_ORPHAN;
+    }
+
+    if (no_indels && only_indels) {
+         LOG_FATAL("%s\n", "Invalid user request to predict no-indels *and* only-indels!? Exiting...\n");
+         return -1;
+    }
+
+    if (argc == 2) {
+        fprintf(stderr, "\n");
+        usage(& mplp_conf, & varcall_conf);
+        return 1;
+    }
+
+   /* get bam file argument
+    */
+    if (1 != argc - optind - 1) {
+         int i;
+         LOG_FATAL("%s\n", "Need exactly one BAM file as last argument");
+         for (i=optind+1; i<argc; i++) {
+              LOG_FATAL("Unknown arg: %s\n", argv[i]);
+         }
+         return 1;
+    }
+    bam_file = (argv + optind + 1)[0];
+        if (0 == strcmp(bam_file, "-")) {
+         if (mplp_conf.reg) {
+              LOG_FATAL("%s\n", "Need index if region was given and"
+                        " index file can't be provided when using stdin mode.");
+              return 1;
+         }
+    } else {
+         if (! file_exists(bam_file)) {
+              LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file);
+              return 1;
+         }
+    }
+
+
+    /* FIXME: implement function for checking user arg logic */
+    if (mplp_conf.min_mq > mplp_conf.max_mq) {
+         LOG_FATAL("Minimum mapping quality (%d) larger than maximum mapping quality (%d)\n",
+                   mplp_conf.min_mq, mplp_conf.max_mq);
+         return 1;
+    }
+    if (varcall_conf.min_bq > varcall_conf.min_alt_bq) {
+         LOG_FATAL("Minimum base-call quality for all bases (%d) larger than minimum base-call quality for alternate bases (%d)\n",
+                   varcall_conf.min_bq, varcall_conf.min_alt_bq);
+         return 1;
+    }
+    if (mplp_conf.flag & MPLP_BAQ && ! mplp_conf.fa && ! plp_summary_only) {
+         LOG_FATAL("%s\n", "Can't compute BAQ with no reference...\n");
+         return 1;
+    }
+    if ( ! mplp_conf.fa && ! plp_summary_only) {
+         LOG_FATAL("%s\n", "Need a reference for calling variants...\n");
+         return 1;
+    }
+
+    if (! plp_summary_only & ! mplp_conf.fa) {
+         LOG_WARN("%s\n", "Calling SNVs without reference\n");
+    }
+
+    /* if we don't apply a default filter and bonf is not dynamic then
+     * we can directly write to requested output file. otherwise we
+     * use a tmp file that gets filtered.
+     */
+    if (no_default_filter && ! varcall_conf.bonf_dynamic) {
+         if (NULL == vcf_out || 0 == strcmp(vcf_out, "-")) {
+              if (vcf_file_open(& varcall_conf.vcf_out, "-",
+                                0, 'w')) {
+                   LOG_ERROR("%s\n", "Couldn't open stdout");
+                   return 1;
+              }
+         } else {
+              if (vcf_file_open(& varcall_conf.vcf_out, vcf_out,
+                                HAS_GZIP_EXT(vcf_out), 'w')) {
+                   LOG_ERROR("Couldn't open %s\n", vcf_out);
+                   return 1;
+              }
+         }
+    } else {
+         vcf_tmp_out = strdup(mktemp(vcf_tmp_template));
+         if (NULL == vcf_tmp_out) {
+              LOG_FATAL("%s\n", "Couldn't create temporary vcf file");
+              return 1;
+         }
+         if (vcf_file_open(& varcall_conf.vcf_out, vcf_tmp_out,
+                           HAS_GZIP_EXT(vcf_tmp_out), 'w')) {
+              LOG_ERROR("Couldn't open %s\n", vcf_tmp_out);
+              free(vcf_tmp_out);
+              return 1;
+         }
+    }
+
+
+    /* save command-line for later reference */
+    mplp_conf.cmdline[0] = '\0';
+    for (i=0; i<argc; i++) {
+         strncat(mplp_conf.cmdline, argv[i],
+                 sizeof(mplp_conf.cmdline)-strlen(mplp_conf.cmdline)-2);
+         strcat(mplp_conf.cmdline, " ");
+    }
+
+    if (bed_file) {
+         mplp_conf.bed = bed_read(bed_file);
+         if (! mplp_conf.bed) {
+              LOG_ERROR("Couldn't read %s\n", bed_file);
+              free(vcf_tmp_out);
+              return 1;
+         }
+    }
+
+    if (debug) {
+         dump_mplp_conf(& mplp_conf, stderr);
+         dump_varcall_conf(& varcall_conf, stderr);
+    }
+
+    if (ign_vcf) {
+         /* note strtok destroys input i.e. ign_vcf */
+         char *f = strtok(ign_vcf, ",");
+         while (NULL != f) {
+              if (source_qual_load_ign_vcf(f, mplp_conf.bed)) {
+                   LOG_FATAL("Loading of ignore positions from %s failed.", f);
+                   free(vcf_tmp_out);
+                   return 1;
+              }
+              f = strtok(NULL, " ");
+         }
+         free(ign_vcf);
+    }
+
+    if (plp_summary_only) {
+         plp_proc_func = &plp_summary;
+
+    } else {
+         /* or use PACKAGE_STRING */
+         vcf_write_new_header(& varcall_conf.vcf_out,
+                              mplp_conf.cmdline, mplp_conf.fa);
+         plp_proc_func = &call_vars;
+    }
+
+    rc = mpileup(&mplp_conf, plp_proc_func, (void*)&varcall_conf,
+                 1, (const char **) argv + optind + 1);
+    if (rc) {
+         free(vcf_tmp_out);
+         return rc;
+    }
+
+    if (indel_calls_wo_idaq && varcall_conf.flag & VARCALL_USE_IDAQ) {
+         LOG_WARN("%ld indel calls (before filtering) were made without indel alignment-quality!"
+                  " Did you forget to indel alignment-quality to your bam-file?\n", indel_calls_wo_idaq);
+    }
+
+    vcf_file_close(& varcall_conf.vcf_out);
+
+    /* snv calling completed. now filter according to the following rules:
+     *  1. no_default_filter and ! dyn
+     *     just print
+     *  2 filter with
+     *     - no_default_filter, if set
+     *     - filter snvphred according to bonf, if dynamic
+     */
+    if (plp_summary_only) {
+         LOG_VERBOSE("%s\n", "No filtering needed: didn't run in SNV calling mode");
+
+    } else if (no_default_filter && ! varcall_conf.bonf_dynamic) {
+         /* vcf file needs no filtering and was already printed to
+          * final destination. already taken care of above. */
+         LOG_VERBOSE("%s\n", "No filtering needed or requested: variants already written to final destination");
+
+    } else {
+         char cmd[BUF_SIZE];
+         int len;
+
+         snprintf(cmd, BUF_SIZE,
+                  "lofreq filter -i %s -o %s",
+                  vcf_tmp_out, NULL==vcf_out ? "-" : vcf_out);
+         len = strlen(cmd);
+
+         if (no_default_filter) {
+              len += sprintf(cmd+len, " %s", "--no-defaults");
+         }
+
+         if (varcall_conf.bonf_dynamic) {
+              int snvqual_thresh = INT_MAX;
+              int indelqual_thresh = INT_MAX;
+
+              if (varcall_conf.bonf_subst) {
+                   snvqual_thresh = PROB_TO_PHREDQUAL(varcall_conf.sig/varcall_conf.bonf_subst);
+                   if (snvqual_thresh < 0) {
+                        snvqual_thresh = 0;
+                   }
+              }
+              if (varcall_conf.bonf_indel) {
+                   indelqual_thresh =  PROB_TO_PHREDQUAL(varcall_conf.sig/varcall_conf.bonf_indel);
+                   if (indelqual_thresh < 0) {
+                        indelqual_thresh = 0;
+                   }
+              }         
+                             
+              len += sprintf(cmd+len,/* appending to str with format. see http://stackoverflow.com/questions/14023024/strcat-for-formatted-strings */
+                             " --snvqual-thresh %d --indelqual-thresh %d",
+                             snvqual_thresh, indelqual_thresh);
+         } else {
+              LOG_VERBOSE("%s\n", "No SNV/indel-quality filtering needed (already applied during call since bonf was fixed)");
+         }
+
+         LOG_VERBOSE("Executing %s\n", cmd);
+         if (0 != (rc = system(cmd))) {
+              LOG_ERROR("The following command failed: %s\n", cmd);
+              rc = 1;
+
+         } else {
+              /*if (! debug)*/
+              (void) unlink(vcf_tmp_out);
+         }
+    }
+
+    if (! plp_summary_only && rc==0) {
+         /* output some stats. number of tests performed need for
+          * multiple testing correction. line will be parse by
+          * downstream script e.g. lofreq_somatic, so be careful when
+          * changing the format */
+         int org_verbose = verbose;
+         verbose = 1;
+         /* lofreq2_call_parallel.py and used by lofreq2_somatic.py */
+         LOG_VERBOSE("Number of substitution tests performed: %lld\n", num_snv_tests);
+         LOG_VERBOSE("Number of indel tests performed: %lld\n", num_indel_tests);
+         verbose = org_verbose;
+    }
+
+    source_qual_free_ign_vars();
+
+    free(vcf_tmp_out);
+    free(vcf_out);
+    free(mplp_conf.alnerrprof_file);
+    free(mplp_conf.reg);
+    free(mplp_conf.fa);
+    if (mplp_conf.fai) {
+         fai_destroy(mplp_conf.fai);
+    }
+    free(bed_file);
+    if (mplp_conf.bed) {
+         bed_destroy(mplp_conf.bed);
+    }
+
+    if (0==rc) {
+         LOG_VERBOSE("%s\n", "Successful exit.");
+    }
+
+    return rc;
+}
+/* main_call */
diff --git a/src/lofreq/lofreq_call.h b/src/lofreq/lofreq_call.h
new file mode 100644
index 0000000..1581a5e
--- /dev/null
+++ b/src/lofreq/lofreq_call.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOFREQ_CALL_H
+#define LOFREQ_CALL_H
+
+int main_call(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_checkref.c b/src/lofreq/lofreq_checkref.c
new file mode 100644
index 0000000..066c8d7
--- /dev/null
+++ b/src/lofreq/lofreq_checkref.c
@@ -0,0 +1,75 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+/* This is an almost one to one copy of the corresponding bits in
+ * samtools' bam_index.c */
+
+#include <ctype.h>
+#include <assert.h>
+
+
+/* lofreq includes */
+#include "log.h"
+#include "utils.h"
+#include "samutils.h"
+
+#define MYNAME "lofreq checkref"
+
+static void
+usage()
+{
+     fprintf(stderr,
+             "\n%s: Check whether given BAM file was created with given reference\n\n", MYNAME);
+     fprintf(stderr,"Usage: %s ref.fa in.bam\n\n", MYNAME);
+}
+
+
+int main_checkref(int argc, char *argv[])
+{
+     char *bam_file;
+     char *fasta_file;
+     
+     if (argc != 4) {
+         usage();
+         return 1;
+     }
+
+     /* get bam file argument
+      */
+    fasta_file = argv[2];
+    bam_file = argv[3];
+
+    if (checkref(fasta_file, bam_file)) {
+         printf("Failed\n");
+         return 1;
+    } else {
+         printf("OK\n");
+         return 0;
+    }
+}
diff --git a/src/lofreq/lofreq_checkref.h b/src/lofreq/lofreq_checkref.h
new file mode 100644
index 0000000..be36b44
--- /dev/null
+++ b/src/lofreq/lofreq_checkref.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOFREQ_CHECKREF_H
+#define LOFREQ_CHECKREF_H
+
+int main_checkref(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_filter.c b/src/lofreq/lofreq_filter.c
new file mode 100644
index 0000000..5953963
--- /dev/null
+++ b/src/lofreq/lofreq_filter.c
@@ -0,0 +1,1262 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <stdlib.h>
+
+/* lofreq includes */
+#include "lofreq_filter.h"
+#include "vcf.h"
+#include "log.h"
+#include "utils.h"
+#include "multtest.h"
+#include "defaults.h"
+
+
+#if 1
+#define MYNAME "lofreq filter"
+#else
+#define MYNAME PACKAGE
+#endif
+
+#define FILTER_ID_STRSIZE 64
+#define FILTER_STRSIZE 128
+
+#define ALT_STRAND_RATIO 0.85
+
+typedef struct {
+     int min;
+     char id_min[FILTER_ID_STRSIZE];
+     int max;
+     char id_max[FILTER_ID_STRSIZE];
+} dp_filter_t;
+
+typedef struct {
+     float min;
+     char id_min[FILTER_ID_STRSIZE];
+     float max;
+     char id_max[FILTER_ID_STRSIZE];
+} af_filter_t;
+
+typedef struct {
+     int thresh;/* use if > 0; otherwise use multiple testing correction that's if >0 */
+     int mtc_type;/* holm; holmbonf; fdr; none */
+     double alpha;
+     long int ntests;
+     char id[FILTER_ID_STRSIZE];
+     int no_compound; /* otherwise ALT_STRAND_RATIO of var bases have to be on one strand as well */
+     int incl_indels; /* if 1, also apply to indels */
+} sb_filter_t;
+
+typedef struct {
+     int thresh;/* use if > 0; otherwise use multiple testing correction that's if >0 */
+     int mtc_type;/* holm; holmbonf; fdr; none */
+     double alpha;
+     long int ntests;
+     char id[FILTER_ID_STRSIZE];
+} snvqual_filter_t;
+
+typedef struct {
+     int thresh;/* use if > 0; otherwise use multiple testing correction that's if >0 */
+     int mtc_type;/* holm; holmbonf; fdr; none */
+     double alpha;
+     long int ntests;
+     char id[FILTER_ID_STRSIZE];
+} indelqual_filter_t;
+
+typedef struct {
+     vcf_file_t vcf_in;
+     vcf_file_t vcf_out;
+     int print_only_passed;
+     int only_snvs;
+     int only_indels;
+
+     /* each allowed to be NULL if not set */
+     dp_filter_t dp_filter;
+     af_filter_t af_filter;
+     sb_filter_t sb_filter;
+     snvqual_filter_t snvqual_filter;
+     indelqual_filter_t indelqual_filter;
+} filter_conf_t;
+
+
+static int af_missing_warning_printed = 0;
+static int dp_missing_warning_printed = 0;
+static int dp4_missing_warning_printed = 0;
+static int sb_missing_warning_printed = 0;
+
+
+void
+dump_filter_conf(const filter_conf_t *cfg)
+{
+     fprintf(stderr, "filter_conf:\n");
+     fprintf(stderr, "  print_only_passed=%d\n", cfg->print_only_passed);
+     fprintf(stderr, "  only_snvs=%d\n", cfg->only_snvs);
+     fprintf(stderr, "  only_indels=%d\n", cfg->only_indels);
+
+     fprintf(stderr, "  dp_filter min=%d max=%d\n",
+             cfg->dp_filter.min, cfg->dp_filter.max);
+     fprintf(stderr, "  af_filter min=%f max=%f\n",
+             cfg->af_filter.min, cfg->af_filter.max);
+     fprintf(stderr, "  sb_filter thresh=%d mtc_type=%d|%s alpha=%f ntests=%ld no_compound=%d incl_indel=%d\n",
+             cfg->sb_filter.thresh, cfg->sb_filter.mtc_type, mtc_type_str[cfg->sb_filter.mtc_type],
+             cfg->sb_filter.alpha, cfg->sb_filter.ntests, cfg->sb_filter.no_compound, cfg->sb_filter.incl_indels);
+     fprintf(stderr, "  snvqual_filter thresh=%d mtc_type=%d|%s alpha=%f ntests=%ld\n",
+             cfg->snvqual_filter.thresh, cfg->snvqual_filter.mtc_type, mtc_type_str[cfg->snvqual_filter.mtc_type],
+             cfg->snvqual_filter.alpha, cfg->snvqual_filter.ntests);
+     fprintf(stderr, "  indelqual_filter thresh=%d mtc_type=%d|%s alpha=%f ntests=%ld\n",
+             cfg->indelqual_filter.thresh, cfg->indelqual_filter.mtc_type, mtc_type_str[cfg->indelqual_filter.mtc_type],
+             cfg->indelqual_filter.alpha, cfg->indelqual_filter.ntests);
+}
+
+
+static void
+usage(const filter_conf_t* filter_conf)
+{
+     fprintf(stderr, "%s: Filter variant parsed from vcf file\n\n", MYNAME);
+     fprintf(stderr, "Usage: %s [options] -i input.vcf -o output.vcf\n", MYNAME);
+
+     fprintf(stderr,"Options:\n");
+     fprintf(stderr, "  Files:\n");
+     fprintf(stderr, "  -i | --in FILE                 VCF input file (gzip supported)\n");
+     fprintf(stderr, "  -o | --out FILE                VCF output file (default: - for stdout; gzip supported).\n");
+
+     fprintf(stderr, "  Coverage (DP):\n");
+     fprintf(stderr, "  -v | --cov-min INT             Minimum coverage allowed (<1=off)\n");
+     fprintf(stderr, "  -V | --cov-max INT             Maximum coverage allowed (<1=off)\n");
+
+     fprintf(stderr, "  Allele Frequency (AF; neg. values = off):\n");
+     fprintf(stderr, "  -a | --af-min FLOAT            Minimum allele freq allowed (<1=off)\n");
+     fprintf(stderr, "  -A | --af-max FLOAT            Maximum allele freq allowed (<1=off)\n");
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Strand Bias (SB):\n");
+     fprintf(stderr, "  Note, variants are only filtered if their SB pvalue is below the threshold\n");
+     fprintf(stderr, "  AND %d%% of variant bases are on one strand (toggled with --sb-no-compound).\n", (int)(ALT_STRAND_RATIO*100));
+     fprintf(stderr, "  -B | --sb-thresh INT           Maximum phred-value allowed. Conflicts with -b.\n");
+     fprintf(stderr, "  -b | --sb-mtc STRING           Multiple testing correction type. One of 'bonf', 'holm' or 'fdr'. Conflicts with -B\n");
+     fprintf(stderr, "  -c | --sb-alpha FLOAT          Multiple testing correcion pvalue threshold\n");
+     fprintf(stderr, "       --sb-no-compound          Don't use compound filter\n");
+     fprintf(stderr, "       --sb-incl-indels          Apply SB filter to indels as well\n");
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  SNV Quality:\n");
+     fprintf(stderr, "  -Q | --snvqual-thresh INT      Minimum phred-value allowed. Conflicts with -q\n");
+     fprintf(stderr, "  -q | --snvqual-mtc STRING      Multiple testing correction type. One of 'bonf', 'holm' or 'fdr'. Conflicts with -Q\n");
+     fprintf(stderr, "  -r | --snvqual-alpha FLOAT     Multiple testing correcion pvalue threshold\n");
+     fprintf(stderr, "  -s | --snvqual-ntests INT      Multiple testing correcion pvalue threshold\n");
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Indels:\n");
+     fprintf(stderr, "  -K | --indelqual-thresh INT    Minimum phred-value allowed. Conflicts with -q\n");
+     fprintf(stderr, "  -k | --indelqual-mtc STRING    Multiple testing correction type. One of 'bonf', 'holm' or 'fdr'. Conflicts with -Q\n");
+     fprintf(stderr, "  -l | --indelqual-alpha FLOAT   Multiple testing correcion pvalue threshold\n");
+     fprintf(stderr, "  -m | --indelqual-ntests INT    Multiple testing correcion pvalue threshold\n");
+
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Misc.:\n");
+     fprintf(stderr, "       --only-indels             Keep InDels only\n");
+     fprintf(stderr, "       --only-snvs               Keep SNVs only\n");
+     fprintf(stderr, "       --print-all               Print all, not just passed variants\n");
+     fprintf(stderr, "       --no-defaults             Remove all default filter settings\n");
+     fprintf(stderr, "       --verbose                 Be verbose\n");
+     fprintf(stderr, "       --debug                   Enable debugging\n");
+     fprintf(stderr, "\nNOTE: without --no-defaults LoFreq's predefined filters are on (run with --verbose to see details)\n");
+     fprintf(stderr, "\n");
+}
+/* usage() */
+
+
+
+int alt_mostly_on_one_strand(var_t *var)
+{
+     dp4_counts_t dp4;
+     float ratio = 0.0;
+
+     if (vcf_get_dp4(&dp4, var)) {
+          if (! dp4_missing_warning_printed) {
+               LOG_WARN("%s\n", "DP4 info missing. Compound SB filter won't work");
+               dp4_missing_warning_printed = 1;
+          }
+          return 0;
+     }          
+
+     /* FIXME: also check whether ref and alt ration is opposite?
+        pro: that's the FPs we usually see
+        con: violating fisher's exact test and additional rather arbitrary filter  */
+
+     ratio = MAX(dp4.alt_fw, dp4.alt_rv)/(float)(dp4.alt_fw + dp4.alt_rv);
+#if 0
+     LOG_DEBUG("ratio for %s %d = %f\n", var->chrom, var->pos, ratio);
+#endif
+     if (ratio > ALT_STRAND_RATIO) {
+          return 1;
+     } else {
+          return 0;
+     }
+}
+
+
+void apply_af_filter(var_t *var, af_filter_t *af_filter)
+{
+     char *af_char = NULL;
+     float af;
+
+     if (af_missing_warning_printed) {
+          return;
+     }
+
+     if (af_filter->min > 0 || af_filter->max > 0) {
+          if ( ! vcf_var_has_info_key(&af_char, var, "AF")) {
+               if ( ! af_missing_warning_printed) {
+                    LOG_WARN("%s\n", "Requested AF filtering failed since AF tag is missing in variant");
+                    af_missing_warning_printed = 1;
+                    return;
+               }
+          }
+          af = strtof(af_char, (char **)NULL); /* atof */
+          if (errno==ERANGE) {
+               LOG_ERROR("Couldn't parse EF from af_char %s. Disabling AF filtering", af_char);
+               af_missing_warning_printed = 1;
+               return;
+          }
+          free(af_char);
+
+          if (af_filter->min > 0.0 && af < af_filter->min) {
+               vcf_var_add_to_filter(var, af_filter->id_min);
+          }
+          if (af_filter->max > 0.0 && af > af_filter->max) {
+               vcf_var_add_to_filter(var, af_filter->id_max);
+          }
+     }
+}
+
+
+void apply_dp_filter(var_t *var, dp_filter_t *dp_filter)
+{
+     char *dp_char = NULL;
+     int cov;
+
+     if (dp_missing_warning_printed) {
+          return;
+     }
+
+     if (dp_filter->min > 0 || dp_filter->max > 0) {
+          if ( ! vcf_var_has_info_key(&dp_char, var, "DP")) {
+               if ( ! dp_missing_warning_printed) {
+#ifdef DEBUG
+                    vcf_file_t f; f.fh = stderr; f.gz = 0; vcf_write_var(&f, var);
+#endif
+                    LOG_WARN("%s\n", "Requested coverage filtering failed since DP tag is missing in variant");
+                    dp_missing_warning_printed = 1;
+                    return;
+               }
+          }
+          errno = 0;
+          /*cov = atoi(dp_char);*/
+          cov = strtol(dp_char, (char **) NULL, 10);
+          if (errno) {
+               LOG_FATAL("%s\n", "errpr during int conversion");
+               exit(1);
+          }
+          free(dp_char);
+ 
+          if (dp_filter->min > 0 && cov < dp_filter->min) {
+               vcf_var_add_to_filter(var, dp_filter->id_min);
+          }
+          if (dp_filter->max > 0 && cov > dp_filter->max) {
+               vcf_var_add_to_filter(var, dp_filter->id_max);
+          }
+     }
+}
+
+
+void apply_snvqual_threshold(var_t *var, snvqual_filter_t *snvqual_filter)
+{
+     assert (! vcf_var_has_info_key(NULL, var, "INDEL"));
+     if (! snvqual_filter->thresh) {
+          return;
+     }
+     if (var->qual>-1 && var->qual<snvqual_filter->thresh) {
+          vcf_var_add_to_filter(var, snvqual_filter->id);
+     }
+}
+
+
+void apply_indelqual_threshold(var_t *var, indelqual_filter_t *indelqual_filter)
+{
+     assert (vcf_var_has_info_key(NULL, var, "INDEL"));
+     if (! indelqual_filter->thresh) {
+          return;
+     }
+     if (var->qual>-1 && var->qual<indelqual_filter->thresh) {
+          vcf_var_add_to_filter(var, indelqual_filter->id);
+     }
+}
+
+
+void apply_sb_threshold(var_t *var, sb_filter_t *sb_filter)
+{
+     char *sb_char = NULL;
+     int sb;
+
+     if (! sb_filter->thresh) {
+          return;
+     }
+
+     if ( ! vcf_var_has_info_key(&sb_char, var, "SB")) {
+          if ( ! sb_missing_warning_printed) {
+               LOG_WARN("%s\n", "Requested SB filtering failed since SB tag is missing in variant");
+               sb_missing_warning_printed = 1;
+          }
+          return;
+     }
+     sb = atoi(sb_char);
+     free(sb_char);
+
+     if (sb > sb_filter->thresh) {
+          if (sb_filter->no_compound || alt_mostly_on_one_strand(var)) {
+               vcf_var_add_to_filter(var, sb_filter->id);
+          }
+     }
+}
+
+
+/* returns -1 on error 
+ *
+ * filter everything that's not significant
+ * 
+ * Very similar to apply_sb_filter_mtc, but reverse testing logic and only looking at non consvars
+ *
+ * Will ignore indels
+ */
+int apply_snvqual_filter_mtc(snvqual_filter_t *snvqual_filter, var_t **vars, const long int num_vars)
+{
+     /* can only apply this logic to variants that are not consensus
+      * variants, i.e those that actually have a quality. therefore
+      * keep track of non cons var indeces */
+     long int *orig_idx = NULL; /* of size num_noncons_vars */
+     double *noncons_errprobs = NULL;
+     long int num_noncons_vars = 0;
+     long int i;
+
+     /* collect values from noncons vars only and keep track of their indeces
+      */
+     orig_idx = malloc(num_vars * sizeof(long int));
+     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
+     noncons_errprobs = malloc(num_vars * sizeof(double));
+     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; }
+
+     num_noncons_vars = 0;
+     for (i=0; i<num_vars; i++) {
+          if (vars[i]->qual>-1 && ! vcf_var_has_info_key(NULL, vars[i], "INDEL")) {
+               noncons_errprobs[num_noncons_vars] = PHREDQUAL_TO_PROB(vars[i]->qual);
+               orig_idx[num_noncons_vars] = i;
+               num_noncons_vars += 1;
+          }
+     }
+     if (! num_noncons_vars) {
+          free(noncons_errprobs);
+          free(orig_idx);
+          return 0;
+     }
+
+     if (snvqual_filter->ntests && (num_noncons_vars > snvqual_filter->ntests)) {
+          LOG_WARN("Number of (non consensus) variants larger than the number of predefined tests for snvqual filter (%ld > %ld). Are you sure that makes sense?\n",
+                   num_noncons_vars, snvqual_filter->ntests);
+     }
+
+     orig_idx = realloc(orig_idx, (num_noncons_vars * sizeof(long int)));
+     if (! orig_idx) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
+     noncons_errprobs = realloc(noncons_errprobs, (num_noncons_vars * sizeof(double)));
+     if (! noncons_errprobs) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
+
+     /* only now we can set the number of tests (if it wasn't set by
+      * caller) */
+     if (! snvqual_filter->ntests) {
+          snvqual_filter->ntests = num_noncons_vars;
+     }
+
+     LOG_DEBUG("updated ntests=%ld; num_noncons_vars=%ld\n", snvqual_filter->ntests, num_noncons_vars);
+
+     /* multiple testing correction
+      */
+     if (snvqual_filter->mtc_type == MTC_BONF) {
+          bonf_corr(noncons_errprobs, num_noncons_vars, 
+                    snvqual_filter->ntests);
+          
+     } else if (snvqual_filter->mtc_type == MTC_HOLMBONF) {
+          holm_bonf_corr(noncons_errprobs, num_noncons_vars, 
+                         snvqual_filter->alpha, snvqual_filter->ntests);
+          
+     } else if (snvqual_filter->mtc_type == MTC_FDR) {
+          long int num_rej = 0;
+          long int *idx_rej; /* indices of rejected i.e. significant values */
+          
+          num_rej = fdr(noncons_errprobs, num_noncons_vars, 
+                        snvqual_filter->alpha, snvqual_filter->ntests, 
+                        &idx_rej);
+          /* first pretend none are significant */
+          for (i=0; i<num_noncons_vars; i++) {
+               noncons_errprobs[i] = DBL_MAX;
+          }
+          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
+          for (i=0; i<num_rej; i++) {
+               long int idx = idx_rej[i];
+               noncons_errprobs[idx] = -1;
+          }
+          free(idx_rej);
+          
+     } else {
+          LOG_FATAL("Internal error: unknown MTC type %d\n", snvqual_filter->mtc_type);
+          return -1;
+     }
+     
+     for (i=0; i<num_noncons_vars; i++) {
+          if (noncons_errprobs[i] > snvqual_filter->alpha) {
+               vcf_var_add_to_filter(vars[orig_idx[i]], snvqual_filter->id);
+          }
+     }
+
+     free(orig_idx);
+     free(noncons_errprobs);
+
+     return 0;
+}
+
+
+
+/* returns -1 on error 
+ *
+ * filter everything that's not significant
+ * 
+ * Very similar to apply_sb_filter_mtc, but reverse testing logic and only looking at non consvars
+ *
+ */
+int apply_indelqual_filter_mtc(indelqual_filter_t *indelqual_filter, var_t **vars, const long int num_vars)
+{
+     /* can only apply this logic to variants that are not consensus
+      * variants, i.e those that actually have a quality. therefore
+      * keep track of non cons var indeces */
+     long int *orig_idx = NULL; /* of size num_noncons_vars */
+     double *noncons_errprobs = NULL;
+     long int num_noncons_vars = 0;
+     long int i;
+
+     /* FIXME function almost identical to apply_indelqual_filter_mtc just different filter can be easily merged by accepting both types of variants */
+
+     /* collect values from noncons vars only and keep track of their indeces
+      */
+     orig_idx = malloc(num_vars * sizeof(long int));
+     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
+     noncons_errprobs = malloc(num_vars * sizeof(double));
+     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1;
+     }
+     num_noncons_vars = 0;
+     for (i=0; i<num_vars; i++) {
+          if (vars[i]->qual>-1 && vcf_var_has_info_key(NULL, vars[i], "INDEL")) {
+               noncons_errprobs[num_noncons_vars] = PHREDQUAL_TO_PROB(vars[i]->qual);
+               orig_idx[num_noncons_vars] = i;
+               num_noncons_vars += 1;
+          }
+     }
+     if (! num_noncons_vars) {
+          free(noncons_errprobs);
+          free(orig_idx);
+          return 0;
+     }
+
+     if (indelqual_filter->ntests && num_noncons_vars > indelqual_filter->ntests) {
+          LOG_WARN("Number of (non consensus) variants larger than number of predefined tests for indelqual filter (%ld > %ld)! Are you sure that makes sense?\n", 
+                   num_noncons_vars, indelqual_filter->ntests);
+     }
+
+     orig_idx = realloc(orig_idx, (num_noncons_vars * sizeof(long int)));
+     if ( ! orig_idx) { LOG_FATAL("%s\n", "out of memory"); return -1; }
+     noncons_errprobs = realloc(noncons_errprobs, (num_noncons_vars * sizeof(double)));
+     if ( ! noncons_errprobs) { LOG_FATAL("%s\n", "out of memory"); return -1; }
+
+     /* only now we can set the number of tests (if it wasn't set by
+      * caller) */
+     if (! indelqual_filter->ntests) {
+          indelqual_filter->ntests = num_noncons_vars;
+     }
+
+     /* multiple testing correction
+      */
+     if (indelqual_filter->mtc_type == MTC_BONF) {
+          bonf_corr(noncons_errprobs, num_noncons_vars, 
+                    indelqual_filter->ntests);
+          
+     } else if (indelqual_filter->mtc_type == MTC_HOLMBONF) {
+          holm_bonf_corr(noncons_errprobs, num_noncons_vars, 
+                         indelqual_filter->alpha, indelqual_filter->ntests);
+          
+     } else if (indelqual_filter->mtc_type == MTC_FDR) {
+          long int num_rej = 0;
+          long int *idx_rej; /* indices of rejected i.e. significant values */
+          
+
+          num_rej = fdr(noncons_errprobs, num_noncons_vars, 
+                        indelqual_filter->alpha, indelqual_filter->ntests, 
+                        &idx_rej);
+
+          /* first pretend none are significant */
+          for (i=0; i<num_noncons_vars; i++) {
+               noncons_errprobs[i] = DBL_MAX;
+          }
+          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
+          for (i=0; i<num_rej; i++) {
+               long int idx = idx_rej[i];
+               noncons_errprobs[idx] = -1;
+          }
+          free(idx_rej);
+          
+     } else {
+          LOG_FATAL("Internal error: unknown MTC type %d\n", indelqual_filter->mtc_type);
+          free(orig_idx);
+          free(noncons_errprobs);
+          return -1;
+     }
+     
+     for (i=0; i<num_noncons_vars; i++) {
+          if (noncons_errprobs[i] > indelqual_filter->alpha) {
+               vcf_var_add_to_filter(vars[orig_idx[i]], indelqual_filter->id);
+          }
+     }
+
+     free(orig_idx);
+     free(noncons_errprobs);
+
+     return 0;
+}
+
+
+/* returns -1 on error 
+ *
+ * filter everything that's significant
+ *
+ * very similar to in apply_snvqual_filter_mtc, but reverse logic and looking at all vars
+ */
+int apply_sb_filter_mtc(sb_filter_t *sb_filter, var_t **vars, const long int num_vars)
+{
+     double *sb_probs = NULL;
+     long int i;
+     long int num_ign = 0;
+     long int *orig_idx = NULL;/* we might ignore some variants (missing values etc). keep track of real indices of kept vars */
+
+     
+     /* collect values from vars kept in mem
+      */
+     sb_probs = malloc(num_vars * sizeof(double));
+     if ( ! sb_probs) {LOG_FATAL("%s\n", "out of memory"); return -1;}
+     orig_idx = malloc(num_vars * sizeof(long int));
+     if ( ! orig_idx) {LOG_FATAL("%s\n", "out of memory"); return -1;}
+
+     num_ign = 0;
+     for (i=0; i<num_vars; i++) {
+          char *sb_char = NULL;
+          
+          /* ignore indels too if sb filter is not to be applied */
+          if (! sb_filter->incl_indels && vcf_var_is_indel(vars[i])) {
+               num_ign += 1;
+               continue;
+          }
+
+          if ( ! vcf_var_has_info_key(&sb_char, vars[i], "SB")) {
+               if ( ! sb_missing_warning_printed) {
+                    LOG_WARN("%s\n", "At least one variant has no SB tag! SB filtering will be incomplete");
+                    sb_missing_warning_printed = 1;
+               }
+               num_ign += 1;
+               continue;
+          }
+
+          sb_probs[i-num_ign] = PHREDQUAL_TO_PROB(atoi(sb_char));
+          orig_idx[i-num_ign] = i;
+          /*LOG_FIXME("orig_idx[i=%ld - num_ign=%ld = %ld] = i=%ld\n", i, num_ign, i-num_ign, i);*/
+          free(sb_char);
+     }
+     if (num_vars-num_ign <= 0) {
+          free(sb_probs);
+          free(orig_idx);
+          return 0;
+     }
+
+
+     /* realloc to smaller size apparently not guaranteed to free up space so no point really but let's make sure we don't use that memory */
+     sb_probs = realloc(sb_probs, (num_vars-num_ign) * sizeof(double));
+     if (! sb_probs) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
+     orig_idx = realloc(orig_idx, (num_vars-num_ign) * sizeof(long int));
+     if (! orig_idx) { LOG_FATAL("realloc failed. Exiting..."); return -1; }
+
+     if (! sb_filter->ntests) {
+          sb_filter->ntests = num_vars - num_ign;
+     } else {
+          if (num_vars-num_ign > sb_filter->ntests) {
+               LOG_WARN("%s\n", "Number of predefined tests for SB filter larger than number of variants! Are you sure that makes sense?");
+          }
+     }
+
+
+     /* multiple testing correction
+      */
+     if (sb_filter->mtc_type == MTC_BONF) {
+          bonf_corr(sb_probs, num_vars-num_ign, 
+                    sb_filter->ntests);
+          
+     } else if (sb_filter->mtc_type == MTC_HOLMBONF) {
+          holm_bonf_corr(sb_probs, num_vars-num_ign, 
+                         sb_filter->alpha, sb_filter->ntests);
+          
+     } else if (sb_filter->mtc_type == MTC_FDR) {
+          long int num_rej = 0;
+          long int *idx_rej; /* indices of rejected i.e. significant values */
+          
+          num_rej = fdr(sb_probs, num_vars-num_ign, 
+                        sb_filter->alpha, sb_filter->ntests, 
+                        &idx_rej);
+
+          /* first pretend none are significant */
+          for (i=0; i<num_vars-num_ign; i++) {
+               sb_probs[i] = DBL_MAX;
+          }
+          LOG_DEBUG("%ld results significant after fdr\n", num_rej);
+          for (i=0; i<num_rej; i++) {
+               long int idx = idx_rej[i];
+               sb_probs[idx] = -1;
+          }
+          free(idx_rej);
+          
+     } else {
+          LOG_FATAL("Internal error: unknown MTC type %d\n", sb_filter->mtc_type);
+          return -1;
+     }
+     
+     for (i=0; i<num_vars-num_ign; i++) {
+          if (sb_probs[i] < sb_filter->alpha) {
+               if (sb_filter->no_compound || alt_mostly_on_one_strand(vars[orig_idx[i]])) {
+                    vcf_var_add_to_filter(vars[orig_idx[i]], sb_filter->id);
+               }
+          }
+     }
+
+     free(orig_idx);
+     free(sb_probs);
+
+     return 0;
+}
+
+
+/* adds FILTER tags to vcf header based on config. also initializes
+ * filter ids!
+ */
+void cfg_filter_to_vcf_header(filter_conf_t *cfg, char **header)
+{
+     char full_filter_str[FILTER_STRSIZE];
+
+     /* for getting rid of all those trailing float zeros we might want to look at
+        http://stackoverflow.com/questions/277772/avoid-trailing-zeroes-in-printf */
+
+     if (cfg->af_filter.min > 0) {
+          snprintf(cfg->af_filter.id_min, FILTER_ID_STRSIZE, "min_af_%f", cfg->af_filter.min);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+                   "##FILTER=<ID=%s,Description=\"Minimum allele frequency %f\">\n",
+                   cfg->af_filter.id_min, cfg->af_filter.min);
+          vcf_header_add(header, full_filter_str);
+     }
+
+     if (cfg->af_filter.max > 0) {
+          snprintf(cfg->af_filter.id_max, FILTER_ID_STRSIZE, "max_af_%f", cfg->af_filter.max);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Maximum allele frequency %f\">\n",
+               cfg->af_filter.id_max, cfg->af_filter.max);
+          vcf_header_add(header, full_filter_str);
+     }
+
+     if (cfg->dp_filter.min > 0) {
+          snprintf(cfg->dp_filter.id_min, FILTER_ID_STRSIZE, "min_dp_%d", cfg->dp_filter.min);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Minimum Coverage %d\">\n",
+               cfg->dp_filter.id_min, cfg->dp_filter.min);
+          vcf_header_add(header, full_filter_str);
+     }
+     if (cfg->dp_filter.max > 0) {
+          snprintf(cfg->dp_filter.id_max, FILTER_ID_STRSIZE, "max_dp_%d", cfg->dp_filter.max);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Maximum Coverage %d\">\n",
+               cfg->dp_filter.id_max, cfg->dp_filter.max);
+          vcf_header_add(header, full_filter_str);
+     }
+
+     assert (! (cfg->sb_filter.thresh > 0 && cfg->sb_filter.mtc_type != MTC_NONE));
+     if (cfg->sb_filter.thresh > 0) {
+          snprintf(cfg->sb_filter.id, FILTER_ID_STRSIZE, "max_sb_%d", cfg->sb_filter.thresh);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Maximum Strand-Bias Phred %d\">\n",
+               cfg->sb_filter.id, cfg->sb_filter.thresh);
+          vcf_header_add(header, full_filter_str);
+          
+     } else if (cfg->sb_filter.mtc_type != MTC_NONE) {
+          char buf[64];
+          mtc_str(buf, cfg->sb_filter.mtc_type);
+          snprintf(cfg->sb_filter.id, FILTER_ID_STRSIZE, "sb_%s", buf);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Strand-Bias Multiple Testing Correction: %s corr. pvalue > %f\">\n",
+                   cfg->sb_filter.id, buf, cfg->sb_filter.alpha);
+          vcf_header_add(header, full_filter_str);
+     }
+
+     assert (! (cfg->snvqual_filter.thresh > 0 && cfg->snvqual_filter.mtc_type != MTC_NONE));
+     if (cfg->snvqual_filter.thresh > 0) {
+          snprintf(cfg->snvqual_filter.id, FILTER_ID_STRSIZE, "min_snvqual_%d", cfg->snvqual_filter.thresh);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Minimum SNV Quality (Phred) %d\">\n",
+               cfg->snvqual_filter.id, cfg->snvqual_filter.thresh);
+          vcf_header_add(header, full_filter_str);
+          
+     } else if (cfg->snvqual_filter.mtc_type != MTC_NONE) {
+          char buf[64];
+          mtc_str(buf, cfg->snvqual_filter.mtc_type);
+          snprintf(cfg->snvqual_filter.id, FILTER_ID_STRSIZE, "snvqual_%s", buf);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"SNV Quality Multiple Testing Correction: %s corr. pvalue < %f\">\n",
+                   cfg->snvqual_filter.id, buf, cfg->snvqual_filter.alpha);
+          vcf_header_add(header, full_filter_str);
+     }
+
+     assert (! (cfg->indelqual_filter.thresh > 0 && cfg->indelqual_filter.mtc_type != MTC_NONE));
+     if (cfg->indelqual_filter.thresh > 0) {
+          snprintf(cfg->indelqual_filter.id, FILTER_ID_STRSIZE, "min_indelqual_%d", cfg->indelqual_filter.thresh);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Minimum Indel Quality (Phred) %d\">\n",
+               cfg->indelqual_filter.id, cfg->indelqual_filter.thresh);
+          vcf_header_add(header, full_filter_str);
+          
+     } else if (cfg->indelqual_filter.mtc_type != MTC_NONE) {
+          char buf[64];
+          mtc_str(buf, cfg->indelqual_filter.mtc_type);
+          snprintf(cfg->indelqual_filter.id, FILTER_ID_STRSIZE, "indelqual_%s", buf);
+          snprintf(full_filter_str, FILTER_STRSIZE,
+               "##FILTER=<ID=%s,Description=\"Indel Quality Multiple Testing Correction: %s corr. pvalue < %f\">\n",
+                   cfg->indelqual_filter.id, buf, cfg->indelqual_filter.alpha);
+          vcf_header_add(header, full_filter_str);
+     }
+}
+
+
+int
+main_filter(int argc, char *argv[])
+{
+     filter_conf_t cfg;
+     char *vcf_in = NULL, *vcf_out = NULL;
+     static int print_only_passed = 1;
+     static int sb_filter_no_compound = 0;
+     static int sb_filter_incl_indels = 0;
+     static int only_indels = 0;
+     static int only_snvs = 0;
+     char *vcf_header = NULL;
+     var_t **vars = NULL;
+     long int num_vars = 0; /* isn't long overkill here ? */
+     long int vars_size = 0; /* keeping track of how much memory we've got pre-allocated */
+     long int i;
+     static int no_defaults = 0;
+
+     /* default filter options */
+     memset(&cfg, 0, sizeof(filter_conf_t));
+     cfg.dp_filter.min = cfg.dp_filter.max = -1;
+     cfg.af_filter.min = cfg.af_filter.max = -1;
+     cfg.sb_filter.alpha = DEFAULT_SIG;
+     cfg.snvqual_filter.alpha = DEFAULT_SIG;
+     cfg.indelqual_filter.alpha = DEFAULT_SIG;
+
+
+    /* keep in sync with long_opts_str and usage
+     *
+     * getopt is a pain in the whole when it comes to syncing of long
+     * and short args and usage. check out gopt, libcfu...
+     */
+    while (1) {
+         int c;
+         static struct option long_opts[] = {
+              /* see usage sync */
+              {"verbose", no_argument, &verbose, 1},
+              {"debug", no_argument, &debug, 1},
+              {"print-all", no_argument, &print_only_passed, 0},
+              {"no-defaults", no_argument, &no_defaults, 1},
+              {"only-indels", no_argument, &only_indels, 1},
+              {"only-snvs", no_argument, &only_snvs, 1},
+
+              {"help", no_argument, NULL, 'h'},
+              {"in", required_argument, NULL, 'i'},
+              {"out", required_argument, NULL, 'o'},
+
+              {"cov-min", required_argument, NULL, 'v'},
+              {"cov-max", required_argument, NULL, 'V'},
+
+              {"af-min", required_argument, NULL, 'a'},
+              {"af-max", required_argument, NULL, 'A'},
+
+              {"sb-thresh", required_argument, NULL, 'B'},
+              {"sb-mtc", required_argument, NULL, 'b'},
+              {"sb-alpha", required_argument, NULL, 'c'},
+              {"sb-no-compound", no_argument, &sb_filter_no_compound, 1},
+              {"sb-incl-indels", no_argument, &sb_filter_incl_indels, 1},
+
+              {"snvqual-thresh", required_argument, NULL, 'Q'},
+              {"snvqual-mtc", required_argument, NULL, 'q'},
+              {"snvqual-alpha", required_argument, NULL, 'r'},
+              {"snvqual-ntests", required_argument, NULL, 's'},
+
+              {"indelqual-thresh", required_argument, NULL, 'K'},
+              {"indelqual-mtc", required_argument, NULL, 'k'},
+              {"indelqual-alpha", required_argument, NULL, 'l'},
+              {"indelqual-ntests", required_argument, NULL, 'm'},
+
+              {0, 0, 0, 0} /* sentinel */
+         };
+
+         /* keep in sync with long_opts and usage */
+         static const char *long_opts_str = "hi:o:v:V:a:A:B:b:c:Q:q:r:s:K:k:l:m:";
+
+         /* getopt_long stores the option index here. */
+         int long_opts_index = 0;
+         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
+                         long_opts_str, long_opts, & long_opts_index);
+         if (c == -1) {
+              break;
+         }
+
+         switch (c) {
+         /* keep in sync with long_opts etc */
+         case 'h':
+              usage(& cfg);
+              return 0;
+
+         case 'i':
+              vcf_in = strdup(optarg);
+              break;
+         case 'o':
+              if (0 != strcmp(optarg, "-")) {
+                   if (file_exists(optarg)) {
+                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                        return 1;
+                   }
+              }
+              vcf_out = strdup(optarg);
+              break;
+
+         case 'v':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.dp_filter.min = atoi(optarg);
+              break;
+         case 'V':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.dp_filter.max = atoi(optarg);
+              break;
+
+         case 'a':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.af_filter.min = strtof(optarg, NULL);
+              break;
+         case 'A':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.af_filter.max = strtof(optarg, NULL);
+              break;
+
+         case 'B':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.sb_filter.thresh = atoi(optarg);
+              break;
+         case 'b':
+              cfg.sb_filter.mtc_type = mtc_str_to_type(optarg);
+              if (-1 == cfg.sb_filter.mtc_type) {
+                   LOG_FATAL("Unknown multiple testing correction type '%s' for strandbias filtering\n", optarg);
+                   return -1;
+              }
+              break;
+         case 'c':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.sb_filter.alpha = strtof(optarg, NULL);
+              break;
+
+         case 'Q':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.snvqual_filter.thresh = atoi(optarg);
+              break;
+         case 'q':
+              cfg.snvqual_filter.mtc_type = mtc_str_to_type(optarg);
+              if (-1 == cfg.snvqual_filter.mtc_type) {
+                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
+                   return -1;
+              }
+              break;
+         case 'r':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.snvqual_filter.alpha = strtof(optarg, NULL);
+              break;
+         case 's':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.snvqual_filter.ntests = atol(optarg);
+              break;
+
+         case 'K':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.indelqual_filter.thresh = atoi(optarg);
+              break;
+         case 'k':
+              cfg.indelqual_filter.mtc_type = mtc_str_to_type(optarg);
+              if (-1 == cfg.indelqual_filter.mtc_type) {
+                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
+                   return -1;
+              }
+              break;
+         case 'l':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.indelqual_filter.alpha = strtof(optarg, NULL);
+              break;
+         case 'm':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              cfg.indelqual_filter.ntests = atol(optarg);
+              break;
+
+         case '?':
+              LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
+              return 1;
+
+         default:
+              break;
+         }
+    }
+    cfg.print_only_passed = print_only_passed;
+    cfg.only_indels = only_indels;
+    cfg.only_snvs = only_snvs;
+    cfg.sb_filter.no_compound = sb_filter_no_compound;
+    cfg.sb_filter.incl_indels = sb_filter_incl_indels;
+
+    if (cfg.only_indels && cfg.only_snvs) {
+         LOG_FATAL("%s\n", "Can't keep only indels and only snvs");
+         return 1;
+    }
+    
+    if (! no_defaults) {
+         if (cfg.sb_filter.mtc_type==MTC_NONE && ! cfg.sb_filter.thresh) {
+              LOG_VERBOSE("%s\n", "Setting default SB filtering method to FDR");
+              cfg.sb_filter.mtc_type = MTC_FDR;
+              cfg.sb_filter.alpha = 0.001;
+         }
+         if (cfg.dp_filter.min<0) {
+              cfg.dp_filter.min = 10;
+              LOG_VERBOSE("Setting default minimum coverage to %d\n", cfg.dp_filter.min);
+         }
+    } else {
+         LOG_VERBOSE("%s\n", "Skipping default settings");
+    }
+
+    if (0 != argc - optind - 1) {/* FIXME needed at all? */
+         LOG_FATAL("%s\n", "Unrecognized argument found. Exiting...\n");
+         return 1;
+    }
+
+    /* logic check of command line parameters
+     */
+    if (cfg.dp_filter.max > 0 &&  cfg.dp_filter.max < cfg.dp_filter.min) {
+         LOG_FATAL("%s\n", "Invalid coverage-filter settings");
+         return 1;
+    }
+    if ((cfg.af_filter.max > 0 && cfg.af_filter.max < cfg.af_filter.min) ||
+        (cfg.af_filter.max > 1.0)) {
+         LOG_FATAL("%s\n", "Invalid AF-filter settings");
+         return 1;
+    }
+
+    if (cfg.sb_filter.thresh && cfg.sb_filter.mtc_type != MTC_NONE) {
+         LOG_FATAL("%s\n", "Can't use fixed strand-bias threshold *and* multiple testing correction.");
+         return 1;
+    }
+    if (cfg.snvqual_filter.thresh && cfg.snvqual_filter.mtc_type != MTC_NONE) {
+         LOG_FATAL("%s\n", "Can't use fixed SNV quality threshold *and* multiple testing correction.");
+         return 1;
+    }
+    if (cfg.indelqual_filter.thresh && cfg.indelqual_filter.mtc_type != MTC_NONE) {
+         LOG_FATAL("%s\n", "Can't use fixed indel quality threshold *and* multiple testing correction.");
+         return 1;
+    }
+
+    if (argc == 2) {
+        fprintf(stderr, "\n");
+        usage(& cfg);
+        return 1;
+    }
+
+    if (debug) {
+          dump_filter_conf(& cfg);
+     }
+
+    /* missing file args default to stdin and stdout
+     */
+    if  (! vcf_in) {
+         vcf_in = malloc(2 * sizeof(char));
+         strcpy(vcf_in, "-");
+    }
+    if  (! vcf_out) {
+         vcf_out = malloc(2 * sizeof(char));
+         strcpy(vcf_out, "-");
+    }
+    LOG_DEBUG("vcf_in=%s vcf_out=%s\n", vcf_in, vcf_out);
+
+
+    /* open vcf files
+     */
+    if (vcf_file_open(& cfg.vcf_in, vcf_in,
+                      HAS_GZIP_EXT(vcf_in), 'r')) {
+         LOG_ERROR("Couldn't open %s\n", vcf_in);
+         return 1;
+    }
+    if (vcf_file_open(& cfg.vcf_out, vcf_out,
+                      HAS_GZIP_EXT(vcf_out), 'w')) {
+         LOG_ERROR("Couldn't open %s\n", vcf_out);
+         return 1;
+    }
+    free(vcf_in);
+    free(vcf_out);
+
+    /* FIXME everything below here should go into a function with args:
+       - cfg
+       - ...what else?
+    */
+
+    /* print header
+     */
+    if (0 !=  vcf_parse_header(&vcf_header, & cfg.vcf_in)) {
+         /* LOG_WARN("%s\n", "vcf_parse_header() failed"); */
+         if (vcf_file_seek(& cfg.vcf_in, 0, SEEK_SET)) {
+              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
+                        " after header parsing failed");
+              return -1;
+         }
+    }
+    /* also sets filter names */
+    cfg_filter_to_vcf_header(& cfg, &vcf_header);
+    vcf_write_header(& cfg.vcf_out, vcf_header);
+    free(vcf_header);
+
+
+    /* read in variants. since many filters perform multiple testing
+     * correction and therefore need to look at all variants we keep
+     * it simple and load them all into memory. 
+     * 
+     * in theory we could apply all 'simple' filters directly within
+     * the loop here and depending on the result spit the variant out
+     * or not. only complex filters need to see all variants first to,
+     * e.g. apply multiple testing.
+     */
+    num_vars = 0;
+    while (1) {
+         var_t *var;
+         int rc;
+         int is_indel = 0;
+
+         vcf_new_var(&var);
+         rc = vcf_parse_var(& cfg.vcf_in, var);
+         if (rc) {
+              /* how to distinguish between error and EOF? */
+              free(var);
+              break;
+         }
+
+         is_indel = vcf_var_is_indel(var);
+
+         if (cfg.only_snvs && is_indel) {
+              free(var);
+              continue;
+         } else if (cfg.only_indels && ! is_indel) {
+              free(var);
+              continue;
+         }
+
+         /* read all in, no matter if already filtered. we keep adding filters */
+         num_vars +=1;
+         if (num_vars >= vars_size) {
+              const long incr = 128;
+              vars = realloc(vars, (vars_size+incr) * sizeof(var_t*));
+              vars_size += incr;
+         }
+         vars[num_vars-1] = var;
+#ifdef TRACE
+         {
+              char *key;
+              vcf_var_key(&key,  vars[num_vars-1]);
+              fprintf(stderr, "storing var %ld+1: %s\n", num_vars, key);
+              free(key);
+         }
+#endif
+
+         /* filters applying to all types of variants
+          */
+         apply_af_filter(var, & cfg.af_filter);
+         apply_dp_filter(var, & cfg.dp_filter);
+
+         /* quality threshold per variant type
+          */
+         if (! is_indel) {
+              if (cfg.snvqual_filter.thresh) {
+                   assert(cfg.snvqual_filter.mtc_type == MTC_NONE);
+                   apply_snvqual_threshold(var, & cfg.snvqual_filter);
+              }
+
+         } else {
+              if (cfg.indelqual_filter.thresh) {
+                   assert(cfg.indelqual_filter.mtc_type == MTC_NONE);
+                   apply_indelqual_threshold(var, & cfg.indelqual_filter);
+              }
+         }
+         
+         if (cfg.sb_filter.thresh) {
+              if (! is_indel || cfg.sb_filter.incl_indels) {
+                   assert(cfg.sb_filter.mtc_type == MTC_NONE);
+                   apply_sb_threshold(var, & cfg.sb_filter);
+              }
+         }
+    }
+
+    if (num_vars) {
+         vars = realloc(vars, (num_vars * sizeof(var_t*)));
+    }
+    vcf_file_close(& cfg.vcf_in);
+    LOG_VERBOSE("Parsed %ld variants\n", num_vars);
+
+
+    if (cfg.sb_filter.mtc_type != MTC_NONE) {
+         if (apply_sb_filter_mtc(& cfg.sb_filter, vars, num_vars)) {
+              LOG_FATAL("%s\n", "Multiple testing correction on strand-bias pvalues failed");
+              return -1;
+         }
+    }
+
+    if (cfg.snvqual_filter.mtc_type != MTC_NONE) {
+         if (apply_snvqual_filter_mtc(& cfg.snvqual_filter, vars, num_vars)) {
+              LOG_FATAL("%s\n", "Multiple testing correction on SNV qualities failed");
+              return -1;
+         }
+    }
+
+    if (cfg.indelqual_filter.mtc_type != MTC_NONE) {
+         if (apply_indelqual_filter_mtc(& cfg.indelqual_filter, vars, num_vars)) {
+              LOG_FATAL("%s\n", "Multiple testing correction on Indel qualities failed");
+              return -1;
+         }
+    }
+
+    /* output
+     */
+    for (i=0; i<num_vars; i++) {
+         var_t *v = vars[i];
+
+         if (cfg.print_only_passed && ! (VCF_VAR_PASSES(v))) {
+              continue;
+         }
+
+         /* add pass if no filters were set */
+         if (! v->filter || strlen(v->filter)<=1) {
+              char pass_str[] = "PASS";
+              if (v->filter) {
+                   free(v->filter);
+              }
+              v->filter = strdup(pass_str);
+         }
+
+         vcf_write_var(& cfg.vcf_out, v);
+    }
+    vcf_file_close(& cfg.vcf_out);
+
+
+    for (i=0; i<num_vars; i++) {
+         vcf_free_var(& vars[i]);
+    }
+    free(vars);
+
+    LOG_VERBOSE("%s\n", "Successful exit.");
+
+    return 0;
+}
+/* main_filter */
+
+
+/* gcc lofreq_filter.c -o lofreq_filter -I../lofreq_core -I../uthash/ ../lofreq_core/liblofreq_core.a   -lz -DMAIN_FILTER */
+#ifdef MAIN_FILTER
+
+int
+main(int argc, char *argv[])
+{
+     return main_filter(argc+1, argv-1);
+}
+#endif
diff --git a/src/lofreq/lofreq_filter.h b/src/lofreq/lofreq_filter.h
new file mode 100644
index 0000000..7989cb8
--- /dev/null
+++ b/src/lofreq/lofreq_filter.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOFREQ_FILTER_H
+#define LOFREQ_FILTER_H
+
+int main_filter(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_indelqual.c b/src/lofreq/lofreq_indelqual.c
new file mode 100644
index 0000000..df231d8
--- /dev/null
+++ b/src/lofreq/lofreq_indelqual.c
@@ -0,0 +1,453 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <getopt.h>
+
+#include "htslib/faidx.h"
+#include "sam.h" 
+#include "log.h"
+#include "utils.h"
+#include "defaults.h"
+#include "lofreq_indelqual.h"
+
+
+char DINDELQ[] = "!MMMLKEC@=<;:988776"; /* 1-based 18 */
+char DINDELQ2[] = "!CCCBA;963210/----,"; /* *10 */
+
+
+typedef struct {
+     samfile_t *in;
+     bamFile out;
+     int iq;
+     int dq;
+} data_t_uniform;
+
+
+typedef struct {
+     samfile_t *in;
+     bamFile out;
+     faidx_t *fai;
+     int *hpcount;
+     int rlen;
+     uint32_t tid;
+} data_t_dindel;
+
+
+#define ENCODE_Q(q) (uint8_t)(q < 33 ? '!' : (q > 126 ? '~' : q))
+
+
+static int uniform_fetch_func(bam1_t *b, void *data)
+{
+     uint8_t *to_delete;
+     data_t_uniform *tmp = (data_t_uniform*)data;
+     bam1_core_t *c = &b->core;
+     char *iq;
+     char *dq;
+
+     iq = malloc((c->l_qseq+1) * sizeof(char));
+     memset(iq, tmp->iq, c->l_qseq);
+     iq[c->l_qseq] = '\0';
+
+     to_delete = bam_aux_get(b, BI_TAG);
+     if (to_delete) {
+          bam_aux_del(b, to_delete);
+     }
+     bam_aux_append(b, BI_TAG, 'Z', c->l_qseq+1, (uint8_t*) iq);
+
+
+     dq = malloc((c->l_qseq+1) * sizeof(char));
+     memset(dq, tmp->dq, c->l_qseq);
+     dq[c->l_qseq] = '\0';
+
+     to_delete = bam_aux_get(b, BD_TAG);
+     if (to_delete) {
+          bam_aux_del(b, to_delete);
+     }
+     bam_aux_append(b, BD_TAG, 'Z', c->l_qseq+1, (uint8_t*) dq);
+
+     bam_write1(tmp->out, b);
+
+     free(iq);
+     free(dq);
+
+     return 0;
+}
+
+
+/* Stores an array of ints that corresponds to the length of the
+ * homopolymer at the start of each homopolymer*/
+int find_homopolymers(char *query, int *count, int qlen)
+{
+     int i, j;
+     int curr_i = 0;
+     int curr_count = 1;
+     for (i = 1; i < qlen; i++) {
+          if (query[i] == query[curr_i]) {
+                  curr_count += 1; /* keep incrementing count if in homopolymer region */
+          } else {
+                  count[curr_i] = curr_count; /* record length of homopolymer region */
+               for (j = curr_i+1; j < i; j++) {
+                       count[j] = 1; /* all other positions get a count of 1 */
+               }
+               curr_i = i;
+               curr_count = 1;
+          }
+     }
+     if (curr_i < i) { /* take care of edge case at the end of the read */
+          count[curr_i] = curr_count;
+          for (j = curr_i+1; j < i; j++) {
+               count[j] = 1;
+          }
+     }
+     return 0;
+}
+
+
+static int dindel_fetch_func(bam1_t *b, void *data)
+{
+     data_t_dindel *tmp = (data_t_dindel*)data;
+     bam1_core_t *c = &b->core;
+     int rlen;
+     uint8_t *to_delete;
+
+     /* don't change reads failing default mask: BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP */
+     if (c->flag & BAM_DEF_MASK) {
+          /* fprintf(stderr, "skipping read: %s at pos %d\n", bam1_qname(b), c->pos); */
+          bam_write1(tmp->out, b);
+          return 0;
+     }
+
+     /* get the reference sequence and compute homopolymer array */
+     if (tmp->tid != c->tid) {
+             /*fprintf(stderr, "fetching reference sequence %s\n",
+               tmp->in->header->target_name[c->tid]); */
+          char *ref = fai_fetch(tmp->fai, tmp->in->header->target_name[c->tid], &rlen);
+          strtoupper(ref);/* safeguard */
+          int rlen = strlen(ref);
+          tmp->tid = c->tid;
+          if (tmp->hpcount) free(tmp->hpcount);
+          tmp->hpcount = (int*)malloc(rlen*sizeof(int));
+          find_homopolymers(ref, tmp->hpcount, rlen);
+          free(ref);
+          tmp->rlen = rlen;
+          /* fprintf(stderr, "fetched reference sequence\n");*/
+     }
+
+     /* parse the cigar string */
+     uint32_t *cigar = bam1_cigar(b);
+     uint8_t indelq[c->l_qseq+1];
+     /* fprintf(stderr, "l_qseq:%d\n", c->l_qseq); */
+     int i;
+     int x = c->pos; /* coordinate on reference */
+     int y = 0; /* coordinate on query */
+     for (i = 0; i < c->n_cigar; ++i) {
+          int j, oplen = cigar[i]>>4, op = cigar[i]&0xf;
+          if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+               for (j = 0; j < oplen; j++) {
+                       /*fprintf(stderr, "query:%d, ref:%d, count:%d\n", 
+                         y, x, tmp->hpcount[x+1]); */
+                    /* FIXME clang complains: The left operand of '>' is a garbage value */
+                    indelq[y] = (x > tmp->rlen-2) ? DINDELQ[0] : (tmp->hpcount[x+1]>18 ?
+                         DINDELQ[0] : DINDELQ[tmp->hpcount[x+1]]);
+                    x++; 
+                    y++;
+               }
+          } else if (op == BAM_CHARD_CLIP) { /* do nothing */
+          } else if (op == BAM_CDEL) {
+               x += oplen;
+          } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { 
+               for (j = 0; j < oplen; j++) {
+                       /* fprintf(stderr, "query:%d, ref:%d\n", y, x); */
+                    indelq[y] = DINDELQ[0];
+                    y++;
+               }
+          } else {
+               LOG_FATAL("unknown op %d for read %s\n", op, bam1_qname(b));/* FIXME skip? seen this somewhere else properly handled */
+               exit(1);
+          }
+     }
+     indelq[y] = '\0';
+
+     to_delete = bam_aux_get(b, BI_TAG);
+     if (to_delete) {
+          bam_aux_del(b, to_delete);
+     }
+     bam_aux_append(b, BI_TAG, 'Z', c->l_qseq+1, indelq);
+
+     to_delete = bam_aux_get(b, BD_TAG);
+     if (to_delete) {
+          bam_aux_del(b, to_delete);
+     }
+     bam_aux_append(b, BD_TAG, 'Z', c->l_qseq+1, indelq);
+
+     bam_write1(tmp->out, b);
+     return 0;
+}
+
+
+int add_uniform(const char *bam_in, const char *bam_out,
+                const int ins_qual, const int del_qual)
+{
+	data_t_uniform tmp;
+    uint8_t iq = ENCODE_Q(ins_qual+33);
+    uint8_t dq = ENCODE_Q(del_qual+33);
+    bam1_t *b = NULL;
+    int count = 0;
+
+	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
+         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
+         return 1;
+    }
+
+    tmp.iq = iq;
+    tmp.dq = dq;
+
+    if (!bam_out || bam_out[0] == '-') {
+         tmp.out = bam_dopen(fileno(stdout), "w");
+    } else {
+         tmp.out = bam_open(bam_out, "w");
+    }
+    bam_header_write(tmp.out, tmp.in->header);
+    
+    b = bam_init1();
+    while (samread(tmp.in, b) >= 0) {
+         count++;
+         uniform_fetch_func(b, &tmp); 
+    }
+    bam_destroy1(b);
+    
+    samclose(tmp.in);
+    bam_close(tmp.out);
+    LOG_VERBOSE("Processed %d reads\n", count);
+    return 0;
+}
+
+
+int add_dindel(const char *bam_in, const char *bam_out, const char *ref)
+{
+	data_t_dindel tmp;
+    int count = 0;
+    bam1_t *b = NULL;
+
+	if ((tmp.in = samopen(bam_in, "rb", 0)) == 0) {
+         LOG_FATAL("Failed to open BAM file %s\n", bam_in);
+             return 1;
+        }
+    if ((tmp.fai = fai_load(ref)) == 0) {
+         LOG_FATAL("Failed to open reference file %s\n", ref);
+         return 1;
+    }
+    /*warn_old_fai(ref);*/
+
+    if (!bam_out || bam_out[0] == '-') {
+         tmp.out = bam_dopen(fileno(stdout), "w");
+    } else {
+         tmp.out = bam_open(bam_out, "w");
+    }
+    bam_header_write(tmp.out, tmp.in->header);
+    
+    b = bam_init1();
+    tmp.tid = -1;
+    tmp.hpcount = 0;
+    tmp.rlen = 0;
+    while (samread(tmp.in, b) >= 0) {
+         count++;
+         dindel_fetch_func(b, &tmp); 
+    }
+    bam_destroy1(b);
+    
+    if (tmp.hpcount) free(tmp.hpcount);
+    samclose(tmp.in);
+    bam_close(tmp.out);
+    fai_destroy(tmp.fai);
+	LOG_VERBOSE("Processed %d reads\n", count);
+	return 0;
+}
+
+
+static void
+usage()
+{
+     const char *myname = "lofreq indelqual";
+     fprintf(stderr, "%s: Insert indel qualities into BAM file (required for indel predictions)\n\n", myname);
+     fprintf(stderr, "Usage: %s [options] in.bam\n", myname);
+     fprintf(stderr,"Options:\n");
+     fprintf(stderr, "  -u | --uniform INT[,INT]  Add this indel quality uniformly to all bases.\n");
+     fprintf(stderr, "                            Use two comma separated values to specify\n");
+     fprintf(stderr, "                            insertion and deletion quality separately.\n");
+     fprintf(stderr, "                            (clashes with --dindel)\n");
+     fprintf(stderr, "       --dindel             Add Dindel's indel qualities (Illumina specific)\n");
+     fprintf(stderr, "                            (clashes with -u; needs --ref)\n");
+     fprintf(stderr, "  -f | --ref                Reference sequence used for mapping\n");
+     fprintf(stderr, "                            (Only required for --dindel)\n");
+     fprintf(stderr, "  -o | --out FILE           Output BAM file [- = stdout = default]\n");
+     fprintf(stderr, "       --verbose            Be verbose\n");
+     fprintf(stderr, "\n");
+     fprintf(stderr,
+             "The preferred way of inserting indel qualities should be via GATK's BQSR (>=2)" \
+             " If that's not possible, use this subcommand.\n"  \
+             "The command has two modes: 'uniform' and 'dindel':\n" \
+             "- 'uniform' will assign a given value uniformly, whereas\n"  \
+             "- 'dindel' will insert indel qualities based on Dindel (PMID 20980555).\n" \
+             "Both will overwrite any existing values.\n");
+     fprintf(stderr, "Do not realign your BAM file afterwards!\n");
+     fprintf(stderr, "\n");
+}
+
+
+void idq_from_arg(int *iq, int *dq, const char *arg) 
+{
+     char *arg2 = strdup(arg);
+     char *cpos = strchr(arg2, ',');
+     if (cpos) {
+          (*dq) = atoi(cpos+1);
+          (*cpos) = '\0';
+          (*iq) = atoi(arg2);
+     } else {
+          (*iq) = (*dq) = atoi(arg);
+     }
+     free(arg2);
+}
+
+int main_indelqual(int argc, char *argv[])
+{
+     char *bam_in = NULL;
+     char *bam_out = NULL; /* - == stdout */
+     char *ref = NULL;
+     int c;
+     static int dindel = 0;
+     int uni_iq = -1;
+     int uni_dq = -1;
+     while (1) {
+          static struct option long_opts[] = {
+               /* see usage sync */
+               {"help", no_argument, NULL, 'h'},
+               {"verbose", no_argument, &verbose, 1},
+               {"debug", no_argument, &debug, 1},
+               {"dindel", no_argument, &dindel, 1},
+               {"out", required_argument, NULL, 'o'},
+               {"uniform", required_argument, NULL, 'u'},
+               {"ref", required_argument, NULL, 'f'},
+               {0, 0, 0, 0} /* sentinel */
+          };
+          
+          /* keep in sync with long_opts and usage */
+          static const char *long_opts_str = "hu:f:o:";
+     
+          /* getopt_long stores the option index here. */
+          int long_opts_index = 0;
+          c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
+                          long_opts_str, long_opts, & long_opts_index);
+          if (c == -1) {
+               break;
+          }
+          switch (c) {
+               /* keep in sync with long_opts etc */
+          case 'h':
+               usage();
+               return 0;
+          case 'u':
+               idq_from_arg(& uni_iq, & uni_dq, optarg);
+               break;
+          case 'f':
+               if (! file_exists(optarg)) {
+                    LOG_FATAL("Reference fasta file '%s' does not exist. Exiting...\n", optarg);
+                    return 1;
+               }
+              ref = strdup(optarg);
+              break;
+          case 'o':
+               if (0 != strcmp(optarg, "-")) {
+                    if (file_exists(optarg)) {
+                         LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                         return 1;
+                    }
+               }
+               bam_out = strdup(optarg);
+               break;
+          case '?':
+               LOG_FATAL("%s\n", "unrecognized arguments found. Exiting...\n");
+               return 1;
+          default:
+               break;
+          }
+     }
+     if (1 != argc - optind - 1) {
+          fprintf(stderr, "FATAL: Need exactly one BAM file as last argument\n");
+          usage();
+          return 1;
+     }
+     bam_in = (argv + optind + 1)[0];
+     if ((0 != strcmp(bam_in, "-")) && ! file_exists(bam_in)) {
+          LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_in);
+          return -1;
+     }
+
+     if (! bam_out) {
+          bam_out = malloc(2 * sizeof(char));
+          strcpy(bam_out, "-");
+     }
+
+     LOG_DEBUG("uni_iq=%d\n", uni_iq);
+     LOG_DEBUG("uni_dq=%d\n", uni_dq);
+     LOG_DEBUG("bam_in=%s\n", bam_in);
+     LOG_DEBUG("bam_out=%s\n", bam_out);
+     LOG_DEBUG("ref=%s\n", ref);
+
+     if ((uni_iq != -1 && uni_dq == -1)
+         ||
+         (uni_iq == -1 && uni_dq != -1)) {
+          LOG_FATAL("internal logic error: uni_iq=%d uni_dq=%d\n", uni_iq, uni_dq);
+          exit(1);
+     }
+
+     if (uni_iq != -1 && uni_dq != -1) {
+          if (dindel) {
+               LOG_FATAL("%s\n", "Can't insert both, uniform and dindel qualities");
+               return -1;
+          }
+          return add_uniform(bam_in, bam_out, uni_iq, uni_dq);
+
+     } else if (dindel) {
+          if (! ref) {
+               LOG_FATAL("%s\n", "Need reference for Dindel model");
+               return -1;
+          }
+          return add_dindel(bam_in, bam_out, ref);          
+
+     } else {
+          LOG_FATAL("%s\n", "Please specify either dindel or uniform mode");
+          return -1;
+     }
+     free(ref);
+     free(bam_out);
+     return 0;
+}
diff --git a/src/lofreq/lofreq_indelqual.h b/src/lofreq/lofreq_indelqual.h
new file mode 100644
index 0000000..2216d91
--- /dev/null
+++ b/src/lofreq/lofreq_indelqual.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOFREQ_INDELQUAL
+#define LOFREQ_INDELQUAL
+
+int main_indelqual(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_index.c b/src/lofreq/lofreq_index.c
new file mode 100644
index 0000000..f14f074
--- /dev/null
+++ b/src/lofreq/lofreq_index.c
@@ -0,0 +1,52 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+
+/* This is an almost one to one copy of the corresponding bits in samtools */
+
+#include <ctype.h>
+#include <assert.h>
+
+/* samtools includes */
+#include "bam.h"
+#include "sam.h"
+
+/* bam_index actually part of API but bam_idxstats not */
+int bam_index(int argc, char *argv[]);
+int bam_idxstats(int argc, char *argv[]);
+
+/* lofreq includes */
+#include "log.h"
+
+
+#if 1
+#define MYNAME "lofreq"
+#else
+#define MYNAME PACKAGE
+#endif
+
+
+int main_faidx(int argc, char *argv[]) 
+{
+     char *fi; char *fa;
+     
+     fa = argv[2];
+     fi = samfaipath(fa);
+     if (! fi) {
+          return 1;
+     }
+
+     free(fi);
+     return 0;
+}
+
+int
+main_index(int argc, char *argv[])
+{
+     char *b = argv[2];
+     return bam_index_build(b);
+}
+
+int
+main_idxstats(int argc, char *argv[])
+{
+    return bam_idxstats(argc-1, argv+1);
+}
diff --git a/src/lofreq/lofreq_index.h b/src/lofreq/lofreq_index.h
new file mode 100644
index 0000000..c63e0da
--- /dev/null
+++ b/src/lofreq/lofreq_index.h
@@ -0,0 +1,8 @@
+#ifndef LOFREQ_INDEX_H
+#define LOFREQ_INDEX_H
+
+int main_faidx(int argc, char *argv[]);
+int main_index(int argc, char *argv[]);
+int main_idxstats(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_main.c b/src/lofreq/lofreq_main.c
new file mode 100644
index 0000000..00135f8
--- /dev/null
+++ b/src/lofreq/lofreq_main.c
@@ -0,0 +1,345 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <libgen.h>
+
+/* lofreq includes */
+#include "log.h"
+#include "utils.h"
+#ifdef USE_ALNERRPROF
+#include "lofreq_bamstats.h"
+#endif
+#include "lofreq_alnqual.h"
+#include "lofreq_checkref.h"
+#include "lofreq_filter.h"
+#include "lofreq_index.h"
+#include "lofreq_indelqual.h"
+#include "lofreq_call.h"
+#include "lofreq_uniq.h"
+#include "lofreq_vcfset.h"
+#include "lofreq_viterbi.h"
+
+#ifndef __DATE__
+__DATE__ = "NA";
+#endif
+
+static void prepend_dir_to_path(const char *dir_to_add)
+{
+     char *old_path = NULL;
+     char *new_path;
+     const char *PATH_NAME = "PATH";
+
+     old_path = getenv(PATH_NAME);
+     if (NULL == old_path) {
+          setenv(PATH_NAME, dir_to_add, 1);
+          return;
+     }
+
+     new_path = malloc((strlen(dir_to_add) + 1 + strlen(old_path) + 1) * sizeof(char));
+     sprintf(new_path, "%s:%s", dir_to_add, old_path);
+#if 0
+     LOG_WARN("old path: %s\n", old_path);
+     LOG_WARN("new path: %s\n", new_path);
+#endif
+     setenv(PATH_NAME, new_path, 1);
+     free(new_path);
+}
+
+
+
+/* prepend dirname(argv0) and python source dir to PATH. This way we
+ * make sure that package works even without properly installing it
+ * and that the binary can repeatedly can call itself it necessary
+ */
+void
+add_local_dir_to_path(char *argv0) {
+     char argv0_resolved[PATH_MAX];
+     char *dirname_argv0 = NULL;
+     int i;
+     char *extra_scripts[] = {/* used to determine directories to add */
+          "../scripts/lofreq2_somatic.py", "../tools/scripts/lofreq2_vcfplot.py", 
+          NULL};
+     
+
+     /* add lofreq dir
+      */
+     if (NULL == realpath(argv0, argv0_resolved)) {
+          return;
+     }
+     if (NULL == (dirname_argv0 = strdup(dirname(argv0_resolved)))) {
+          return;
+     }
+     prepend_dir_to_path(dirname_argv0);
+     LOG_DEBUG("Adding %s to PATH\n", dirname_argv0);
+
+
+     i=0;
+     while (extra_scripts[i]) {
+          char *abs_path = NULL;
+          char *rel_path = extra_scripts[i];
+          char *add_dir = NULL;
+          i++;
+
+          /* add local script dir if present
+           */
+          if (NULL == (abs_path = strdup(dirname_argv0))) {
+               free(dirname_argv0);
+               return;
+          }
+
+          if (NULL == join_paths(&abs_path, rel_path)) {
+#if 0
+               LOG_WARN("join_paths %s and %s failed\n", abs_path, rel_path);
+#endif
+               free(abs_path);
+               break;
+          }
+          if (! file_exists(abs_path)) {
+#if 0
+               LOG_WARN("%s doesnt' exist\n", abs_path);
+#endif
+               free(abs_path);
+               break;
+          }
+
+          add_dir = strdup(dirname(abs_path));
+          LOG_DEBUG("Adding %s to PATH\n", add_dir);
+          prepend_dir_to_path(add_dir);
+
+
+          free(abs_path);
+          free(add_dir);
+     }
+     
+     free(dirname_argv0);
+}
+
+
+
+static void usage(const char *myname)
+{
+
+     /*fprintf(stderr, "Version %s\n", PACKAGE_VERSION);*/
+     fprintf(stderr, "\n");
+     fprintf(stderr, ""/* see configure for unescaped version and source */
+"       |             ____|                 \n"
+"       |       _ \\   |     __|  _ \\   _` | \n"
+"       |      (   |  __|  |     __/  (   | \n"
+"      _____| \\___/  _|   _|   \\___| \\__, | \n"
+"                                        _| \n");
+     fprintf(stderr, "\n");
+     fprintf(stderr, "Fast and sensitive inference of SNVs and indels\n");     
+     fprintf(stderr, "\n");     
+     fprintf(stderr, "Usage: %s <command> [options]\n\n", myname);
+     fprintf(stderr, "  Main Commands:\n");
+     fprintf(stderr, "    call          : Call variants\n");
+     fprintf(stderr, "    call-parallel : Call variants in parallel\n");
+     fprintf(stderr, "    somatic       : Call somatic variants\n");
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Preprocessing Commands\n");
+     fprintf(stderr, "    viterbi       : Viterbi realignment\n");
+     fprintf(stderr, "    indelqual     : Insert indel qualities\n");
+     fprintf(stderr, "    alnqual       : Insert base and indel alignment qualities\n");
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Other Commands:\n");
+     fprintf(stderr, "    checkref      : Check that reference fasta and BAM file match\n");
+     fprintf(stderr, "    filter        : Filter variants in VCF file\n");
+     fprintf(stderr, "    uniq          : Test whether variants predicted in only one sample really are unique\n");
+     fprintf(stderr, "    plpsummary    : Print pileup summary per position\n");
+#ifdef USE_ALNERRPROF
+     fprintf(stderr, "    bamstats      : Collect BAM statistics\n");
+#endif
+     fprintf(stderr, "    vcfset        : VCF set operations\n");
+
+     fprintf(stderr, "    version       : Print version info\n");
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Samtools Clones:\n");
+     fprintf(stderr, "    faidx         : Create index for fasta file\n");
+     fprintf(stderr, "    index         : Create index for BAM file\n");
+     fprintf(stderr, "    idxstats      : Print stats for indexed BAM file\n");
+     fprintf(stderr, "\n");
+     fprintf(stderr, "  Extra Tools (if installed):\n");
+     fprintf(stderr, "    vcfplot       : Plot VCF statistics\n");
+     fprintf(stderr, "    cluster       : Cluster variants in VCF file (supports legacy SNP format)\n");
+     fprintf(stderr, "\n");
+
+     fprintf(stderr, "\n");
+}
+
+
+
+
+int main(int argc, char *argv[])
+{
+     add_local_dir_to_path(argv[0]);
+
+     if (argc < 2) {
+          usage(BASENAME(argv[0]));
+          return 1;
+     }
+     if (strcmp(argv[1], "call") == 0)  {
+          return main_call(argc, argv);
+
+     } else if (strcmp(argv[1], "uniq") == 0)  {
+          return main_uniq(argc, argv);
+
+     } else if (strcmp(argv[1], "vcfset") == 0)  {
+          return main_vcfset(argc, argv);
+
+     } else if (strcmp(argv[1], "viterbi") == 0){
+          return main_viterbi(argc,argv);
+
+     } else if (strcmp(argv[1], "faidx") == 0)  {
+          return main_faidx(argc, argv) ;
+
+     } else if (strcmp(argv[1], "index") == 0)  {
+          return main_index(argc, argv);
+
+     } else if (strcmp(argv[1], "indelqual") == 0){
+          return main_indelqual(argc, argv);
+
+     } else if (strcmp(argv[1], "alnqual") == 0)  {
+          return main_alnqual(argc-1, argv+1);
+
+     } else if (strcmp(argv[1], "idxstats") == 0)  {
+          return main_idxstats(argc, argv);
+
+     } else if (strcmp(argv[1], "checkref") == 0) {
+          return main_checkref(argc, argv);
+
+     } else if (strcmp(argv[1], "info") == 0) {
+          LOG_FIXME("%s\n", "NOT IMPLEMENTED YET: has BI, has BD, readlen, has extra BAQ. is_paired. all based on first, say, 10k read\n");
+          return 1;
+
+     } else if (strcmp(argv[1], "wizard") == 0) {
+          LOG_FIXME("%s\n", "NOT IMPLEMENTED YET\n");
+          return 1;
+
+     } else if (strcmp(argv[1], "filter") == 0) {
+          return main_filter(argc, argv);
+
+     } else if (strcmp(argv[1], "somatic") == 0 ||
+                strcmp(argv[1], "vcfplot") == 0 ||
+                strcmp(argv[1], "call-parallel") == 0 ||
+                strcmp(argv[1], "cluster") == 0) {
+          char **argv_execvp = calloc(argc, sizeof(char*));
+          int i;
+          char *somatic_script = "lofreq2_somatic.py";
+          char *parallel_script = "lofreq2_call_pparallel.py";
+          char *vcfset_script = "lofreq2_vcfset.py";
+          char *vcfplot_script = "lofreq2_vcfplot.py";
+          char *cluster_script = "lofreq2_cluster.py";
+          char *alnqual_binary = "lofreq2_alnqual";
+          char *script_to_call;
+
+          if (strcmp(argv[1], "somatic") == 0) {
+               script_to_call = somatic_script;
+          } else if (strcmp(argv[1], "call-parallel") == 0) {
+               script_to_call = parallel_script;
+          } else if (strcmp(argv[1], "vcfset") == 0) {
+               script_to_call = vcfset_script;
+          } else if (strcmp(argv[1], "vcfplot") == 0) {
+               script_to_call = vcfplot_script;
+          } else if (strcmp(argv[1], "cluster") == 0) {
+               script_to_call = cluster_script;
+          } else if (strcmp(argv[1], "alnqual") == 0) {
+               script_to_call = alnqual_binary;
+          } else {
+               LOG_FATAL("Internal error: unknown option: %s\n", argv[1]);
+               return 1;
+          }
+
+          argv_execvp[0] = argv[0];
+          for (i=2; i<argc; i++) {
+               argv_execvp[i-1] = argv[i];
+          }
+          argv_execvp[i-1] = NULL; /* sentinel */
+          if (execvp(script_to_call, argv_execvp)) {
+               perror("Calling external LoFreq script via execvp failed");
+               free(argv_execvp);
+               return 1;
+          } else {
+               free(argv_execvp);
+               return 0;
+          }
+#ifdef USE_ALNERRPROF
+     } else if (strcmp(argv[1], "bamstats") == 0) {
+          return main_bamstats(argc, argv);
+#endif
+     } else if (strcmp(argv[1], "plpsummary") == 0) {
+          /* modify args to  main_call() */
+          char **argv_tmp = calloc(argc+1, sizeof(char*));
+          int i, rc;
+          char plp_summary_arg[] = "--plp-summary-only";
+          /*char bam_stats_arg[] = "--bam-stats";*/
+          char *call_arg = NULL;
+
+          if (strcmp(argv[1], "plpsummary") == 0) {
+               call_arg = plp_summary_arg;
+          } else {
+               LOG_FATAL("%s\n", "Internal error: unknown option");
+               return 1;
+          }
+
+          LOG_VERBOSE("'%s' is just an alias for %s call %s"
+                      "  (ignoring all the snv-call specific options)\n",
+                      argv[1], BASENAME(argv[0]), call_arg);
+          argv_tmp[0] = argv[0];
+          argv_tmp[1] = "call";
+          argv_tmp[2] = call_arg;
+          for (i=2; i<argc; i++) {
+               argv_tmp[i+1] = argv[i];
+          }
+#ifdef TRACE
+          for (i=0; i<argc+1; i++) {
+               LOG_FIXME("argv[%d] = %s\n", i, argv_tmp[i]);
+          }
+          exit(1);
+#endif
+
+          rc = main_call(argc+1, argv_tmp);
+
+          free(argv_tmp);
+          return rc;
+
+     } else if (strcmp(argv[1], "version") == 0) {
+          fprintf(stdout, "version: %s\ncommit: %s\nbuild-date: %s\n",
+                  PACKAGE_VERSION, GIT_VERSION, __DATE__);
+          return 0;
+
+     } else {
+          LOG_FATAL("Unrecognized command '%s'\n", argv[1]);
+          return 1;
+     }
+     return 0;
+}
diff --git a/src/lofreq/lofreq_uniq.c b/src/lofreq/lofreq_uniq.c
new file mode 100644
index 0000000..8b3d4ee
--- /dev/null
+++ b/src/lofreq/lofreq_uniq.c
@@ -0,0 +1,778 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+/*
+ * Notes about a potentially more refined analysis: Currently we take the
+ * frequency for one of the SNVs as a given. Ideally we would
+ * integrate over the various values possible rather then just taking the
+ * maximum-likelihood value.
+ *
+ * Frequency estimate from first sample snv call: p^hat = k_1/n_1.
+ * Current test: P_bin(n_2, p^hat) (X<=k_2).
+ *
+ * Better: Sum over k=0 to n_1 ( P_bin(n_2, k/n^1) (X<=k) ) * P(k).
+ *
+ * P(k) is prior from Binomial proportion confidence distribution
+ *
+ * TODO: find Binomial proportion confidence distribution to get prior
+ *
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <float.h>
+#include <getopt.h>
+#include <stdlib.h>
+
+/* lofreq includes */
+#include "vcf.h"
+#include "utils.h"
+#include "log.h"
+#include "binom.h"
+#include "plp.h"
+#include "defaults.h"
+#include "snpcaller.h"
+#include "multtest.h"
+
+#if 1
+#define MYNAME "lofreq uniq"
+#else
+#define MYNAME PACKAGE
+#endif
+
+#define DEFAULT_UNI_FREQ -1.0
+
+#define BUF_SIZE 1<<16
+
+#define FILTER_ID_STRSIZE 64
+#define FILTER_STRSIZE 128
+
+const char *uniq_flag = "UNIQ";
+
+const char *uniq_phred_tag = "UQ";
+
+
+typedef struct {
+     int thresh;/* use if > 0; otherwise use multiple testing correction that's if >0 */
+     int mtc_type;/* holm; holmbonf; fdr; none */
+     double alpha;
+     long int ntests;
+     char id[FILTER_ID_STRSIZE];
+} uniq_filter_t;
+
+
+typedef struct {
+     float uni_freq;
+     vcf_file_t vcf_out;
+     vcf_file_t vcf_in;
+     int use_det_lim;
+     int output_all; /* catch! doesn't actually work if there's no coverage in BAM because mpileup will skip target function */
+     uniq_filter_t uniq_filter;
+     /* changing per pos: the var to test */
+     var_t *var;
+} uniq_conf_t;
+
+
+
+
+
+int
+uniq_phred_from_var(var_t *var) {
+     char *uq_char = NULL;
+     if ( ! vcf_var_has_info_key(&uq_char, var, uniq_phred_tag)) {
+          /* missing because no coverage or other reasons. not unique anyway */
+          return 0;
+     } else {
+          int uq = (int) strtol(uq_char, (char **)NULL, 10);/* atoi replacement */
+          free(uq_char);
+          return uq;
+     }          
+}
+
+
+void apply_uniq_threshold(var_t *var, uniq_filter_t *uniq_filter)
+{
+     if (! uniq_filter->thresh) {
+          return;
+     }
+
+     if (uniq_phred_from_var(var) < uniq_filter->thresh) {
+          vcf_var_add_to_filter(var, uniq_filter->id);
+     }
+}
+
+
+/* returns -1 on error 
+ *
+ * filter everything that's not significant
+ * 
+ * FIXME should be part of lofreq filter.
+ *
+ */
+int 
+apply_uniq_filter_mtc(uniq_filter_t *uniq_filter, var_t **vars, const int num_vars)
+{
+     double *uniq_probs = NULL;
+     int i;
+
+     if (uniq_filter->ntests && num_vars > uniq_filter->ntests) {
+         LOG_WARN("%s\n", "Number of predefined tests for uniq filter larger than number of variants! Are you sure that makes sense?");
+     }
+
+     if (! uniq_filter->ntests) {
+          uniq_filter->ntests = num_vars;
+     }
+
+     /* collect uniq error probs
+      */
+     uniq_probs = malloc(num_vars * sizeof(double));
+     if ( ! uniq_probs) {
+          LOG_FATAL("%s\n", "out of memory");
+          exit(1);
+     }
+     for (i=0; i<num_vars; i++) {
+          uniq_probs[i] = PHREDQUAL_TO_PROB(uniq_phred_from_var(vars[i]));
+     }
+
+     /* multiple testing correction
+      */
+     if (uniq_filter->mtc_type == MTC_BONF) {
+          bonf_corr(uniq_probs, num_vars, 
+                    uniq_filter->ntests);
+          
+     } else if (uniq_filter->mtc_type == MTC_HOLMBONF) {
+          holm_bonf_corr(uniq_probs, num_vars, 
+                         uniq_filter->alpha, uniq_filter->ntests);
+          
+     } else if (uniq_filter->mtc_type == MTC_FDR) {
+          int num_rej = 0;
+          long int *idx_rej; /* indices of rejected i.e. significant values */
+          int i;
+          
+          num_rej = fdr(uniq_probs, num_vars, 
+                        uniq_filter->alpha, uniq_filter->ntests, 
+                        &idx_rej);
+          for (i=0; i<num_rej; i++) {
+               int idx = idx_rej[i];
+               uniq_probs[idx] = -1;
+          }
+          free(idx_rej);
+          
+     } else {
+          LOG_FATAL("Internal error: unknown MTC type %d\n", uniq_filter->mtc_type);
+          return -1;
+     }
+
+     for (i=0; i<num_vars; i++) {
+          if (uniq_probs[i] > uniq_filter->alpha) {
+               vcf_var_add_to_filter(vars[i], uniq_filter->id);
+          }
+     }
+
+     free(uniq_probs);
+
+     return 0;
+}
+
+
+
+/* used as pileup callback function which is not ideal since this can
+ * only work on one position (has to be ensured by caller).
+ *
+ * No cov means I won't be called through mpileup and no output will
+ * be generated. Non-sig pv means I'm not sure and no ouput will be
+ * generated. Only if pv is sig we will print the var
+ *
+ * needs to return void to be used as function pointer to mpileup
+ */
+void
+uniq_snv(const plp_col_t *p, void *confp)
+{
+     uniq_conf_t *conf = (uniq_conf_t *)confp;
+     char *af_char = NULL;
+     float af;
+     int is_uniq = 0;
+     int is_indel;
+     int coverage;
+
+     is_indel =  vcf_var_is_indel(conf->var);
+
+#ifdef DISABLE_INDELS
+     if (is_indel) {
+          LOG_WARN("uniq logic can't be applied to indels."
+                   " Skipping indel var at %s %d\n",
+                   conf->var->chrom, conf->var->pos+1);
+          return;
+     }
+#endif
+
+     if (0 != strcmp(p->target, conf->var->chrom) || p->pos != conf->var->pos) {
+          LOG_ERROR("wrong pileup for var. pileup for %s %d. var for %s %d\n",
+                    p->target, p->pos+1, conf->var->chrom, conf->var->pos+1);
+          return;
+     }
+
+     coverage = p->coverage_plp;
+     if (is_indel) {
+          coverage -= p->num_tails;
+     }
+     if (1 > coverage) {
+          return;
+     }
+
+     if (conf->uni_freq <= 0.0) {
+          if (! vcf_var_has_info_key(&af_char, conf->var, "AF")) {
+               LOG_FATAL("%s\n", "Couldn't parse AF (key not found) from variant");
+               /* hard to catch error later */
+               exit(1);
+          }
+          af = strtof(af_char, (char **)NULL); /* atof */
+          free(af_char);
+          if (af < 0.0 || af > 1.0) {
+               float new_af;
+               new_af = af<0.0 ? 0.01 : 1.0;
+               /* hard to catch error later */
+               LOG_FATAL("Invalid (value out of bound) AF %f in variant. Resetting to %f\n", af, new_af);
+               af = new_af;
+          }
+
+     } else {
+          assert(conf->uni_freq <= 1.0);
+          af = conf->uni_freq;
+     }
+
+
+     if (conf->use_det_lim) {
+          /* given the current base counts and their error probs,
+           * would we've been able to detect at given frequency.
+           */
+          long double pvalues[NUM_NONCONS_BASES];
+          double *err_probs; /* error probs (qualities) passed down to snpcaller */
+          int num_err_probs;
+
+          int alt_bases[NUM_NONCONS_BASES];/* actual alt bases */
+          int alt_counts[NUM_NONCONS_BASES]; /* counts for alt bases handed down to snpcaller */
+          int alt_raw_counts[NUM_NONCONS_BASES]; /* raw, unfiltered alt-counts */
+          varcall_conf_t varcall_conf;
+
+          int bonf = 1;
+          float alpha = 0.01;
+
+          init_varcall_conf(&varcall_conf);
+          if (debug) {
+               dump_varcall_conf(&varcall_conf, stderr);
+          }
+
+          plp_to_errprobs(&err_probs, &num_err_probs,
+                          alt_bases, alt_counts, alt_raw_counts,
+                          p, &varcall_conf);
+          LOG_DEBUG("at %s:%d with cov %d and num_err_probs %d\n", 
+              p->target, p->pos, coverage, num_err_probs);
+
+          /* Now pretend we see AF(SNV-to-test)*coverage variant
+           * bases. Truncate to int, i.e err on the side of caution
+           * during rounding (assume fewer alt bases) */
+          alt_counts[0] = af * num_err_probs; /* don't use coverage as that is before filtering */
+          alt_counts[1] = alt_counts[2] = 0;
+
+          if (snpcaller(pvalues, err_probs, num_err_probs,
+                        alt_counts, bonf, alpha)) {
+               fprintf(stderr, "FATAL: snpcaller() failed at %s:%s():%d\n",
+                       __FILE__, __FUNCTION__, __LINE__);
+               free(err_probs);
+               return;
+          }
+
+          /* only need to test first pv */
+          if (pvalues[0] * (float)bonf < alpha) {
+              /* significant value means given the counts and
+               * qualities we would have been able to detect this
+               * uncalled SNV had it been present at the given
+               * frequency. But since we didn't this is a uniq
+               * variant.
+               * 
+               * No point in adding this as phred qual because it
+               * means the opposite of UQ
+               */
+
+               vcf_var_add_to_info(conf->var, uniq_flag);
+          }
+
+          LOG_VERBOSE("%s %d num_quals=%d assumed-var-counts=%d would-have-been-detectable=%d\n",
+               conf->var->chrom, conf->var->pos+1, num_err_probs, alt_counts[0], is_uniq);
+          free(err_probs);
+          
+     } else {
+          int alt_count;
+          double pvalue;
+          char info_str[128];
+
+          if (is_indel) {
+               int ref_len = strlen(conf->var->ref);
+               int alt_len = strlen(conf->var->alt);
+               if (ref_len > alt_len) { /* deletion */
+                    char *del_key = malloc((strlen(conf->var->ref)+1)*sizeof(char));
+                    strcpy(del_key, conf->var->ref+1);
+                    del_event *it_del = find_del_sequence(&p->del_event_counts, del_key);
+                    if (it_del) {
+                         alt_count = it_del->count;
+                    } else {
+                         alt_count = 0;
+                    }
+                    /* LOG_DEBUG("%s>%s k:%s c:%d\n", conf->var->ref, conf->var->alt, del_key, alt_count); */
+                    free(del_key);
+               } else { /* insertion */
+                    char *ins_key = malloc((strlen(conf->var->alt)+1)*sizeof(char));
+                    strcpy(ins_key, conf->var->alt+1);
+                    ins_event *it_ins = find_ins_sequence(&p->ins_event_counts, ins_key);
+                    if (it_ins) {
+                         alt_count = it_ins->count;
+                    } else {
+                         alt_count = 0;
+                    }
+                    /* LOG_DEBUG("%s>%s k:%s c:%d\n", conf->var->ref, conf->var->alt, ins_key, alt_count);*/
+                    free(ins_key);
+               }
+
+          } else {
+               alt_count = base_count(p, conf->var->alt[0]);
+          }
+
+
+#ifdef DEBUG
+          LOG_DEBUG("Now testing af=%f cov=%d alt_count=%d at %s %d for var:",
+                    af, coverage, alt_count, p->target, p->pos+1);
+#endif
+          
+          /* this is a one sided test */
+          if (0 != binom(&pvalue, NULL, coverage, alt_count, af)) {
+               LOG_ERROR("%s\n", "binom() failed");
+               return;
+          }
+
+          snprintf(info_str, 128, "%s=%d", uniq_phred_tag, PROB_TO_PHREDQUAL_SAFE(pvalue));
+          vcf_var_add_to_info(conf->var, info_str);
+
+          LOG_DEBUG("%s %d %s>%s AF=%f | %s (p-value=%g) | BAM alt_count=%d cov=%d (freq=%f)\n",
+                      conf->var->chrom, conf->var->pos+1, conf->var->ref, conf->var->alt, af,
+                      is_uniq ? "unique" : "not necessarily unique", pvalue,
+                      alt_count, coverage, alt_count/(float)coverage);
+     }
+}
+
+
+static void
+usage(const uniq_conf_t* uniq_conf)
+{
+     fprintf(stderr,
+                  "\n%s: Checks whether variants predicted in one sample (listed in vcf input)" \
+                  " are unique to this sample or if they were not called in other sample due" \
+                  " to coverage issues. This is done by using a Binomial test with alternate"\
+                  " and reference counts from the BAM and the variant frequency (i.e it's testing"\
+                  " differences in frequencies. Alternatively, the logic can be changed to" \
+                  " check whether the variant frequency would have been above LoFreq's" \
+                  " detection limit given the BAM coverage and base-qualities."\
+                  "\n\n" \
+                  "Assigns UNIQ tag to variants considered unique."\
+                  " Will ignore filtered input variants and will by default only report uniq variants.\n\n", MYNAME);
+
+     fprintf(stderr,"Usage: %s [options] indexed-in.bam\n\n", MYNAME);
+     fprintf(stderr,"Options:\n");
+     fprintf(stderr, "  -v | --vcf-in FILE      Input vcf file listing variants [- = stdin; gzip supported]\n");
+     fprintf(stderr, "  -o | --vcf-out FILE     Output vcf file [- = stdout; gzip supported]\n");
+     fprintf(stderr, "  -f | --uni-freq         Assume variants have uniform test frequency of this value (unused if <=0) [%f]\n", uniq_conf->uni_freq);
+     fprintf(stderr, "  -t | --uniq-thresh INT  Minimum uniq phred-value required. Conflicts with -m. 0 for off (default=%d)\n", uniq_conf->uniq_filter.thresh);
+     fprintf(stderr, "  -m | --uniq-mtc STRING  Uniq multiple testing correction type. One of 'bonf', 'holm' or 'fdr'. (default=%s)\n", mtc_type_str[uniq_conf->uniq_filter.mtc_type]);
+     fprintf(stderr, "  -a | --uniq-alpha FLOAT Uniq Multiple testing correction p-value threshold (default=%f)\n", uniq_conf->uniq_filter.alpha); 
+     fprintf(stderr, "  -n | --uniq-ntests INT  Uniq multiple testing correction p-value threshold (default=#vars)\n");
+     fprintf(stderr, "       --output-all       Report all variants instead of only the ones, marked unique.\n");
+     fprintf(stderr, "                          Note, that variants already filtered in input will not be printed.\n");
+     fprintf(stderr, "       --use-det-lim      Report variants if they are above implied detection limit\n");
+     fprintf(stderr, "                          Default is to use binomial test to check for frequency differences\n");
+     fprintf(stderr, "       --use-orphan       Don't ignore anomalous read pairs / orphan reads\n");
+     fprintf(stderr, "       --verbose          Be verbose\n");
+     fprintf(stderr, "       --debug            Enable debugging\n");
+}
+/* usage() */
+
+
+int
+main_uniq(int argc, char *argv[])
+{
+     int c, i;
+     char *bam_file = NULL;
+     char *vcf_in = NULL; /* - == stdout */
+     char *vcf_out = NULL; /* - == stdout */
+     mplp_conf_t mplp_conf;
+     uniq_conf_t uniq_conf;
+     void (*plp_proc_func)(const plp_col_t*, void*);
+     int rc = 0;
+     var_t **vars = NULL;
+     int num_vars = 0;
+     char *vcf_header = NULL;
+     static int use_det_lim = 0;
+     static int use_orphan = 0;
+     static int output_all = 0;
+
+     /* default uniq options */
+     memset(&uniq_conf, 0, sizeof(uniq_conf_t));
+     uniq_conf.uni_freq = DEFAULT_UNI_FREQ;
+     uniq_conf.use_det_lim = 0;
+
+     uniq_conf.uniq_filter.mtc_type = MTC_FDR;
+     uniq_conf.uniq_filter.alpha = 0.001;
+
+     /* default pileup options */
+     memset(&mplp_conf, 0, sizeof(mplp_conf_t));
+     mplp_conf.max_mq = DEFAULT_MAX_MQ;
+     mplp_conf.min_mq = 1;
+     mplp_conf.min_plp_bq = DEFAULT_MIN_PLP_BQ;
+     mplp_conf.max_depth = DEFAULT_MAX_PLP_DEPTH;
+     mplp_conf.flag = MPLP_NO_ORPHAN;
+
+
+    /* keep in sync with long_opts_str and usage
+     *
+     * getopt is a pain in the whole when it comes to syncing of long
+     * and short args and usage. check out gopt, libcfu...
+     */
+    while (1) {
+         static struct option long_opts[] = {
+              /* see usage sync */
+              {"help", no_argument, NULL, 'h'},
+              {"verbose", no_argument, &verbose, 1},
+              {"debug", no_argument, &debug, 1},
+              {"use-det-lim", no_argument, &use_det_lim, 1},
+              {"use-orphan", no_argument, &use_orphan, 1},
+              {"output-all", no_argument, &output_all, 1},
+
+              {"vcf-in", required_argument, NULL, 'v'},
+              {"vcf-out", required_argument, NULL, 'o'},
+
+              {"uni-freq", required_argument, NULL, 'f'},
+
+              {"uniq-thresh", required_argument, NULL, 't'},
+              {"uniq-mtc", required_argument, NULL, 'm'},
+              {"uniq-alpha", required_argument, NULL, 'a'},
+              {"uniq-ntests", required_argument, NULL, 'n'},
+
+              {0, 0, 0, 0} /* sentinel */
+         };
+
+         /* keep in sync with long_opts and usage */
+         static const char *long_opts_str = "hv:o:f:t:m:a:n:";
+
+         /* getopt_long stores the option index here. */
+         int long_opts_index = 0;
+         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
+                         long_opts_str, long_opts, & long_opts_index);
+         if (c == -1) {
+              break;
+         }
+
+         switch (c) {
+         /* keep in sync with long_opts etc */
+         case 'h':
+              usage(& uniq_conf);
+              return 0;
+
+         case 'v':
+              if (0 != strcmp(optarg, "-")) {
+                   if (! file_exists(optarg)) {
+                        LOG_FATAL("Input file '%s' does not exist. Exiting...\n", optarg);
+                        return 1;
+                   }
+              }
+              vcf_in = strdup(optarg);
+              break;
+
+         case 'o':
+              if (0 != strcmp(optarg, "-")) {
+                   if (file_exists(optarg)) {
+                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                        return 1;
+                   }
+              }
+              vcf_out = strdup(optarg);
+              break;
+
+         case 'f':
+              uniq_conf.uni_freq = strtof(optarg, (char **)NULL); /* atof */
+              if (uniq_conf.uni_freq<=0) {
+                   LOG_WARN("%s\n", "Ignoring uni-freq option");
+              }
+              if (uniq_conf.uni_freq>1.0) {
+                   LOG_FATAL("%s\n", "Value for uni-freq has to be <1.0");
+                   return 1;
+              }
+              break;
+
+         case 't':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              uniq_conf.uniq_filter.thresh = atoi(optarg);
+              uniq_conf.uniq_filter.mtc_type = MTC_NONE;
+              break;
+         case 'm':
+              uniq_conf.uniq_filter.mtc_type = mtc_str_to_type(optarg);
+              if (-1 == uniq_conf.uniq_filter.mtc_type) {
+                   LOG_FATAL("Unknown multiple testing correction type '%s' for snv quality filtering\n", optarg);
+                   return -1;
+              }
+              break;
+         case 'a':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              uniq_conf.uniq_filter.alpha = strtof(optarg, NULL);
+              break;
+         case 'n':
+              if (! isdigit(optarg[0])) {
+                   LOG_FATAL("Non-numeric argument provided: %s\n", optarg);
+                   return -1;
+              }
+              uniq_conf.uniq_filter.ntests = atol(optarg);
+              break;
+
+         case '?':
+              LOG_FATAL("%s\n", "unrecognized arguments found. Exiting...\n");
+              return 1;
+         default:
+              break;
+         }
+    }
+    if (use_orphan) {
+         mplp_conf.flag &= ~MPLP_NO_ORPHAN;
+    }
+    if (debug) {
+         dump_mplp_conf(& mplp_conf, stderr);
+    }
+    uniq_conf.output_all = output_all;
+    uniq_conf.use_det_lim = use_det_lim;
+
+
+#if DEBUG
+    LOG_DEBUG("uniq_conf.uniq_filter.thresh = %d\n", uniq_conf.uniq_filter.thresh);
+    LOG_DEBUG("uniq_conf.uniq_filter.mtc_type = %d\n", uniq_conf.uniq_filter.mtc_type);
+    LOG_DEBUG("uniq_conf.uniq_filter.alpha = %f\n", uniq_conf.uniq_filter.alpha);
+    LOG_DEBUG("uniq_conf.uniq_filter.ntests = %d\n", uniq_conf.uniq_filter.ntests);
+#endif
+    
+    if (uniq_conf.uniq_filter.thresh && uniq_conf.uniq_filter.mtc_type != MTC_NONE) {
+         LOG_FATAL("%s\n", "Can't use fixed Unique quality threshold *and* multiple testing correction.");
+         return 1;
+    }
+
+    if (argc == 2) {
+        fprintf(stderr, "\n");
+        usage(& uniq_conf);
+        return 1;
+    }
+
+    if (1 != argc - optind - 1) {
+        fprintf(stderr, "Need exactly one BAM file as last argument\n");
+        return 1;
+    }
+    bam_file = (argv + optind + 1)[0];
+    if (! file_exists(bam_file)) {
+         LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file);
+         return -1;
+    }
+
+
+    if (! vcf_in) {
+#if 0
+         vcf_in = malloc(2 * sizeof(char));
+         strcpy(vcf_in, "-");
+#else
+         LOG_FATAL("%s\n", "No input vcf specified. Exiting...");
+         return -1;
+#endif
+    }
+    if (! vcf_out) {
+         vcf_out = malloc(2 * sizeof(char));
+         strcpy(vcf_out, "-");
+    }
+
+    if (vcf_file_open(& uniq_conf.vcf_in, vcf_in,
+                      HAS_GZIP_EXT(vcf_in), 'r')) {
+         LOG_ERROR("Couldn't open %s\n", vcf_in);
+         return 1;
+    }
+
+    if (vcf_file_open(& uniq_conf.vcf_out, vcf_out,
+                      HAS_GZIP_EXT(vcf_out), 'w')) {
+         LOG_ERROR("Couldn't open %s\n", vcf_out);
+         return 1;
+    }
+
+    if (0 != vcf_parse_header(&vcf_header, & uniq_conf.vcf_in)) {
+         LOG_WARN("%s\n", "vcf_parse_header() failed. trying to rewind to start...");
+         if (vcf_file_seek(& uniq_conf.vcf_in, 0, SEEK_SET)) {
+              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
+                        " after header parsing failed");
+              return 1;
+         }
+    } else {
+         vcf_header_add(&vcf_header, "##INFO=<ID=UNIQ,Number=0,Type=Flag,Description=\"Unique, i.e. not detectable in paired sample\">\n");
+         vcf_header_add(&vcf_header, "##INFO=<ID=UQ,Number=1,Type=Integer,Description=\"Phred-scaled uniq score at this position\">\n");
+
+
+         if (! uniq_conf.use_det_lim) {
+              char full_filter_str[FILTER_STRSIZE];
+              if (uniq_conf.uniq_filter.thresh > 0) {
+                   snprintf(uniq_conf.uniq_filter.id, FILTER_ID_STRSIZE, "min_uq_%d", uniq_conf.uniq_filter.thresh);
+                   snprintf(full_filter_str, FILTER_STRSIZE,
+                            "##FILTER=<ID=%s,Description=\"Minimum Uniq Phred %d\">\n",
+                            uniq_conf.uniq_filter.id, uniq_conf.uniq_filter.thresh);
+                   vcf_header_add(&vcf_header, full_filter_str);
+                   
+              } else if (uniq_conf.uniq_filter.mtc_type != MTC_NONE) {
+                   char buf[64];
+                   mtc_str(buf, uniq_conf.uniq_filter.mtc_type);
+                   snprintf(uniq_conf.uniq_filter.id, FILTER_ID_STRSIZE, "uq_%s", buf);
+                   snprintf(full_filter_str, FILTER_STRSIZE,
+                            "##FILTER=<ID=%s,Description=\"Uniq Multiple Testing Correction: %s corr. pvalue < %f\">\n",
+                            uniq_conf.uniq_filter.id, buf, uniq_conf.uniq_filter.alpha);
+                   vcf_header_add(& vcf_header, full_filter_str);
+              }
+         }
+
+         vcf_write_header(& uniq_conf.vcf_out, vcf_header);
+         free(vcf_header);
+    }
+
+    num_vars = vcf_parse_vars(&vars, & uniq_conf.vcf_in, 1);
+    if (0 == num_vars) {
+         LOG_WARN("%s\n", "Didn't find any variants in input");
+         goto clean_and_exit;
+    }
+    if (! uniq_conf.uniq_filter.ntests) {
+         uniq_conf.uniq_filter.ntests = num_vars;
+    }
+
+    plp_proc_func = &uniq_snv;
+
+    for (i=0; i<num_vars; i++) {
+         char reg_buf[BUF_SIZE];
+         if (i%100==0) {
+              LOG_VERBOSE("Processing variant %d of %d\n", i+1, num_vars);
+         }
+         uniq_conf.var = vars[i];
+
+         snprintf(reg_buf, BUF_SIZE, "%s:%ld-%ld",
+                  vars[i]->chrom, vars[i]->pos+1, vars[i]->pos+1);
+         mplp_conf.reg = strdup(reg_buf);
+
+         LOG_DEBUG("pileup for var no %d at %s %d\n",
+                   i+1, uniq_conf.var->chrom, uniq_conf.var->pos+1);
+#ifdef DISABLE_INDELS
+         if (vcf_var_has_info_key(NULL, uniq_conf.var, "INDEL")) {
+              LOG_WARN("Skipping indel var at %s %d\n",
+                       uniq_conf.var->chrom, uniq_conf.var->pos+1);
+              free(mplp_conf.reg);
+              mplp_conf.reg = NULL;
+              continue;
+         }
+#endif
+         /* no need to check for filter because done by parse_vars */
+
+         rc = mpileup(&mplp_conf, plp_proc_func, (void*)&uniq_conf,
+                      1, (const char **) argv + optind + 1);
+
+         if (uniq_conf.uniq_filter.thresh) {
+              apply_uniq_threshold(uniq_conf.var, & uniq_conf.uniq_filter);
+         }
+
+         free(mplp_conf.reg);
+         mplp_conf.reg = NULL;
+    }
+    uniq_conf.var = NULL;/* just be sure to not use it accidentally again */
+
+
+    /* print whatever we've got. there's no UQ to test or we
+     * are supposed to print all 
+     */
+    if (uniq_conf.use_det_lim) {
+         for (i=0; i<num_vars; i++) {
+              var_t *var = vars[i];
+              vcf_write_var(& uniq_conf.vcf_out, var);
+         }
+         /* all done */
+         goto clean_and_exit;
+    }
+
+
+
+    if (uniq_conf.uniq_filter.mtc_type != MTC_NONE) {
+         if (apply_uniq_filter_mtc(& uniq_conf.uniq_filter, vars, num_vars)) {
+              LOG_FATAL("%s\n", "Multiple testing correction on uniq pvalues failed");
+              return -1;
+         }
+    }
+    
+    for (i=0; i<num_vars; i++) {
+         var_t *var = vars[i];
+         if (VCF_VAR_PASSES(var) || uniq_conf.output_all) {
+              vcf_write_var(& uniq_conf.vcf_out, var);
+         }
+    }
+
+clean_and_exit:
+
+    vcf_file_close(& uniq_conf.vcf_in);
+    vcf_file_close(& uniq_conf.vcf_out);
+
+    for (i=0; i<num_vars; i++) {
+         vcf_free_var(& vars[i]);
+    }
+    free(vars);
+
+    free(vcf_in);
+    free(vcf_out);
+
+    if (0==rc) {
+         LOG_VERBOSE("%s\n", "Successful exit.");
+    }
+    /* LOG_FIXME("%s\n", "allow user setting of -S and -J. Currently just using default") */
+
+    return rc;
+}
+/* main_uniq */
+
diff --git a/src/lofreq/lofreq_uniq.h b/src/lofreq/lofreq_uniq.h
new file mode 100644
index 0000000..f378f8b
--- /dev/null
+++ b/src/lofreq/lofreq_uniq.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOFREQ_UNIQ_H
+#define LOFREQ_UNIQ_H
+
+int main_uniq(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_vcfset.c b/src/lofreq/lofreq_vcfset.c
new file mode 100644
index 0000000..e053f16
--- /dev/null
+++ b/src/lofreq/lofreq_vcfset.c
@@ -0,0 +1,539 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <stdlib.h>
+
+#include "htslib/kstring.h"
+#include "htslib/tbx.h"
+
+/* lofreq includes */
+#include "lofreq_vcfset.h"
+#include "vcf.h"
+#include "log.h"
+#include "utils.h"
+
+
+
+#if 1
+#define MYNAME "lofreq vcfset"
+#else
+#define MYNAME PACKAGE
+#endif
+
+
+typedef enum {
+     SETOP_UNKNOWN,
+     SETOP_INTERSECT,
+     SETOP_COMPLEMENT, 
+     SETOP_CONCAT
+} vcfset_op_t;
+
+typedef struct {
+     vcf_file_t vcf_in1;
+     vcf_file_t vcf_in2;
+     vcf_file_t vcf_out;
+     vcfset_op_t vcf_setop;
+     int only_passed; /* if 1, ignore any filtered variant */
+     int only_pos; /* 0: allele aware. if 1, ignore ref and alt base during comparisons.  */
+     int only_snvs;
+     int only_indels;
+} vcfset_conf_t;
+
+
+
+static void
+usage(const vcfset_conf_t* vcfset_conf)
+{
+     fprintf(stderr, "%s: Perform set operations on two vcf files\n\n", MYNAME);
+     fprintf(stderr, "Usage: %s [options] -a op -1 1.vcf -2 2.vcf \n", MYNAME);
+
+     fprintf(stderr,"Options:\n");
+     fprintf(stderr, "  -1 | --vcf1 FILE      1st VCF input file (bgzip supported)\n");
+     fprintf(stderr, "  -2 | --vcf2 FILE      2nd VCF input file (mandatory - except for concat - and needs to be tabix indexed)\n");
+     fprintf(stderr, "  -o | --vcfout         VCF output file (default: - for stdout; gzip supported).\n");
+     fprintf(stderr, "  -a | --action         Set operation to perform: intersect, complement or concat.\n"
+             "                        - intersect = vcf1 AND vcf2.\n"
+             "                        - complement = vcf1 \\ vcf2.\n"
+             "                        - concat = vcf1 + vcf2 ... vcfn (output as in file order, i.e. output not necessarily sorted!)\n");
+     fprintf(stderr, "  -I | --add-info STR   Add info field, e.g. 'SOMATIC'\n");
+     fprintf(stderr, "       --count-only     Don't print bases, just numbers\n");
+     fprintf(stderr, "       --only-pos       Disable allele-awareness by using position only (ignoring bases) as key for storing and comparison\n");
+     fprintf(stderr, "       --only-passed    Ignore variants marked as filtered\n");
+     fprintf(stderr, "       --only-snvs      Ignore anything but SNVs in both input files\n");
+     fprintf(stderr, "       --only-indels    Ignore anything but indels in both input files\n");
+     fprintf(stderr, "       --verbose        Be verbose\n");
+     fprintf(stderr, "       --debug          Enable debugging\n");
+
+     fprintf(stderr, "\nNote, vcf1 is always fully parsed, whereas indexing is used for vcf2.\n");
+     fprintf(stderr, "Therefore, use the bigger file as vcf2 to speed things up.\n");
+     fprintf(stderr, "Header/meta-data for the output file is taken from vcf1\n");
+}
+/* usage() */
+
+
+
+
+int 
+main_vcfset(int argc, char *argv[])
+{
+     vcfset_conf_t vcfset_conf;
+     char *vcf_header = NULL;
+     int rc = 0;
+     char *vcf_in1, *vcf_in2, *vcf_out;
+     long int num_vars_vcf1;
+     long int num_vars_vcf1_ign, num_vars_out;
+     static int only_passed = 0;
+     static int only_pos = 0;
+     static int only_snvs = 0;
+     static int only_indels = 0;
+     static int count_only = 0;
+     tbx_t *vcf2_tbx = NULL; /* index for second vcf file */
+     htsFile *vcf2_hts = NULL;
+     char *add_info_field = NULL;
+     int vcf_concat_findex = 0;
+     vcf_in1 = vcf_in2 = vcf_out = NULL;
+     num_vars_vcf1 = 0;
+     num_vars_vcf1_ign = num_vars_out = 0;
+
+     /* default vcfset options */
+     memset(&vcfset_conf, 0, sizeof(vcfset_conf_t));
+     /* vcfset_conf.vcf_in1 = NULL; */
+     /* vcfset_conf.vcf_in2 = NULL; */
+     /* vcfset_conf.vcf_out = stdout;*/
+
+
+    /* keep in sync with long_opts_str and usage 
+     *
+     * getopt is a pain in the whole when it comes to syncing of long
+     * and short args and usage. check out gopt, libcfu...
+     */
+    while (1) {
+         int c;
+         static struct option long_opts[] = {
+              /* see usage sync */
+              {"help", no_argument, NULL, 'h'},
+              {"verbose", no_argument, &verbose, 1},
+              {"debug", no_argument, &debug, 1},
+              {"only-passed", no_argument, &only_passed, 1},
+              {"only-pos", no_argument, &only_pos, 1},
+              {"only-indels", no_argument, &only_indels, 1},
+              {"only-snvs", no_argument, &only_snvs, 1},
+              {"count-only", no_argument, &count_only, 1},
+
+              {"vcf1", required_argument, NULL, '1'},
+              {"vcf2", required_argument, NULL, '2'},
+              {"vcfout", required_argument, NULL, 'o'},
+              {"action", required_argument, NULL, 'a'},
+              {"add-info", required_argument, NULL, 'I'},
+
+              {0, 0, 0, 0} /* sentinel */
+         };
+
+         /* keep in sync with long_opts and usage */
+         static const char *long_opts_str = "h1:2:o:a:I:";
+
+         /* getopt_long stores the option index here. */
+         int long_opts_index = 0;
+         c = getopt_long(argc-1, argv+1, /* skipping 'lofreq', just leaving 'command', i.e. call */
+                         long_opts_str, long_opts, & long_opts_index);
+         if (c == -1) {
+              break;
+         }
+
+         switch (c) {
+         /* keep in sync with long_opts etc */
+         case 'h': 
+              usage(& vcfset_conf); 
+              free(vcf_in1); free(vcf_in2); free(vcf_out);
+              return 0;
+
+         case '1': 
+              vcf_in1 = strdup(optarg);
+              break;
+
+         case '2': 
+              vcf_in2 = strdup(optarg);
+              break;
+
+         case 'o':
+              if (0 != strcmp(optarg, "-")) {
+                   if (file_exists(optarg)) {
+                        LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                        free(vcf_in1); free(vcf_in2);
+                        return 1;
+                   }
+              }
+              vcf_out = strdup(optarg);
+              break;
+
+         case 'a': 
+              if (0 == strcmp(optarg, "intersect")) {
+                   vcfset_conf.vcf_setop = SETOP_INTERSECT;
+
+              } else if (0 == strcmp(optarg, "complement")) {
+                   vcfset_conf.vcf_setop = SETOP_COMPLEMENT;
+
+              } else if (0 == strcmp(optarg, "concat")) {
+                   vcfset_conf.vcf_setop = SETOP_CONCAT;
+
+              } else {
+                   LOG_FATAL("Unknown action '%s'. Exiting...\n", optarg);
+                   free(vcf_in1); free(vcf_in2); free(vcf_out);
+                   return 1;
+              }
+              break;
+
+         case 'I': 
+              add_info_field = strdup(optarg);
+              break;
+
+         case '?': 
+              LOG_FATAL("%s\n", "unrecognized arguments found. Exiting...\n"); 
+              free(vcf_in1); free(vcf_in2); free(vcf_out);
+              return 1;
+
+         default:
+              break;
+         }
+    }
+
+    vcfset_conf.only_passed = only_passed;
+    vcfset_conf.only_pos = only_pos;
+    vcfset_conf.only_snvs = only_snvs;
+    vcfset_conf.only_indels = only_indels;
+
+    if (vcfset_conf.only_indels && vcfset_conf.only_snvs) {
+         LOG_FATAL("%s\n", "Can't take only indels *and* only snvs into account");
+         return 1;
+    }
+
+    if (0 != argc - optind - 1) {
+         if (vcfset_conf.vcf_setop == SETOP_CONCAT) {
+              vcf_concat_findex = optind;
+         } else {
+              LOG_FATAL("%s\n", "Unrecognized arguments found\n");
+              return 1;
+         }
+    } else {
+         if (vcfset_conf.vcf_setop == SETOP_CONCAT) {
+              LOG_FATAL("%s\n", "No extra files for concat given\n");
+              return 1;
+         }
+    }
+#if 0
+    int i; for (i=optind+1; i<argc; i++) {
+         LOG_FIXME("argv[%d]=%s\n", i, argv[i]);
+    }
+#endif
+
+    if (argc == 2) {
+        fprintf(stderr, "\n");
+        usage(& vcfset_conf);
+        free(vcf_in1); free(vcf_in2); free(vcf_out);
+        return 1;
+    }
+
+    if (vcfset_conf.vcf_setop == SETOP_UNKNOWN) {
+         LOG_FATAL("%s\n", "No set operation specified");
+         usage(& vcfset_conf);
+         free(vcf_in1); free(vcf_in2); free(vcf_out);
+         return 1;
+    }
+
+    if  (vcf_in1 == NULL || (vcf_in2 == NULL && vcfset_conf.vcf_setop != SETOP_CONCAT)) {
+         LOG_FATAL("%s\n\n", "At least one vcf input file not specified");
+         usage(& vcfset_conf);
+         free(vcf_in1); free(vcf_in2); free(vcf_out);
+         return 1;
+    }
+    if (vcf_in2 != NULL && vcfset_conf.vcf_setop == SETOP_CONCAT) {
+         LOG_FATAL("%s\n\n", "For concat just use the -1 option followed by all other vcf files instead of using -2");
+         usage(& vcfset_conf);
+         free(vcf_in1); free(vcf_in2); free(vcf_out);
+         return 1;         
+    }
+
+    if (vcf_file_open(& vcfset_conf.vcf_in1, vcf_in1, 
+                      HAS_GZIP_EXT(vcf_in1), 'r')) {
+         LOG_ERROR("Couldn't open %s\n", vcf_in1);
+         free(vcf_in1); free(vcf_in2); free(vcf_out);
+         return 1;
+    }
+
+    if (vcf_in2) {
+         vcf2_hts = hts_open(vcf_in2, "r");
+         if (!vcf2_hts) {
+              LOG_FATAL("Couldn't load %s\n", vcf_in2);
+              return 1;
+         }
+         vcf2_tbx = tbx_index_load(vcf_in2);
+         if (!vcf2_tbx) {
+              LOG_FATAL("Couldn't load tabix index for %s\n", vcf_in2);
+              return 1;
+         }
+    }
+
+    /* vcf_out default if not set: stdout==- */
+    if (! vcf_out) {
+         vcf_out = malloc(2 * sizeof(char));
+         strcpy(vcf_out, "-");
+    }
+
+    if (! count_only) {
+         if (vcf_file_open(& vcfset_conf.vcf_out, vcf_out, 
+                           HAS_GZIP_EXT(vcf_out), 'w')) {
+              LOG_ERROR("Couldn't open %s\n", vcf_out);
+              free(vcf_in1); free(vcf_in2); free(vcf_out);
+              return 1;
+         }
+    }
+
+    /* use meta-data/header of vcf_in1 for output
+     */
+    LOG_DEBUG("Getting header from %s\n", vcf_in1);
+    if (0 !=  vcf_parse_header(&vcf_header, & vcfset_conf.vcf_in1)) {
+         LOG_WARN("%s\n", "vcf_parse_header() failed");
+         if (vcf_file_seek(& vcfset_conf.vcf_in1, 0, SEEK_SET)) {
+              LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
+                        " after header parsing failed");
+              return -1;
+         }
+    } else {
+         if (! count_only) {
+              /* vcf_write_header would write *default* header */
+              vcf_write_header(& vcfset_conf.vcf_out, vcf_header);
+         }
+         free(vcf_header);
+    }
+
+    
+    /* parse first vcf file
+     */
+    LOG_DEBUG("Starting to parse variants from %s\n", vcf_in1);
+    while (1) {
+         var_t *var1 = NULL;
+         int rc;
+         int is_indel;
+         kstring_t var2_kstr = {0, 0, 0};
+         hts_itr_t *var2_itr = NULL;
+         char regbuf[1024];
+         int var2_match = 0;
+
+         vcf_new_var(&var1);
+         rc = vcf_parse_var(& vcfset_conf.vcf_in1, var1);
+         if (rc) {
+              free(var1);
+              
+              if (vcfset_conf.vcf_setop != SETOP_CONCAT) {
+                   break;
+              } else {
+                   vcf_concat_findex++;
+                   if (vcf_concat_findex==argc) {
+                        break;
+                   }
+                   /* set vcf1 up anew and simply continue as if nothing happened 
+                    */
+                   vcf_file_close(& vcfset_conf.vcf_in1);
+                   free(vcf_in1);
+
+                   vcf_in1 = strdup(argv[vcf_concat_findex]);
+                   LOG_DEBUG("updated vcf_in1 = %s\n", vcf_in1);
+                   if (vcf_file_open(& vcfset_conf.vcf_in1, vcf_in1, 
+                                     HAS_GZIP_EXT(vcf_in1), 'r')) {
+                        LOG_ERROR("Couldn't open %s\n", vcf_in1);
+                        free(vcf_in1); free(vcf_in2); free(vcf_out);
+                        return 1;
+                   }
+                   if (0 != vcf_skip_header(& vcfset_conf.vcf_in1)) {
+                        LOG_WARN("skip header failed for %s\n", vcf_in1);
+                   }
+                   continue;
+              }
+         }
+
+         is_indel = vcf_var_is_indel(var1);
+         if (vcfset_conf.only_snvs && is_indel) {
+              free(var1);
+              continue;
+         } else if (vcfset_conf.only_indels && ! is_indel) {
+              free(var1);
+              continue;
+         }
+
+         if (! vcfset_conf.only_pos && NULL != strchr(var1->alt, ',')) {
+              LOG_FATAL("%s\n", "No support for multi-allelic SNVs in vcf1");
+              return -1;
+         }
+         if (vcfset_conf.only_passed && ! VCF_VAR_PASSES(var1)) {
+#ifdef TRACE
+              LOG_DEBUG("Skipping non-passing var1 %s:%d\n", var1->chrom, var1->pos);
+#endif
+              num_vars_vcf1_ign += 1;
+              vcf_free_var(& var1);
+              continue;
+         }
+         if (add_info_field) {
+              vcf_var_add_to_info(var1, add_info_field);
+         }
+         num_vars_vcf1 += 1;
+#ifdef TRACE
+         LOG_DEBUG("Got passing var1 %s:%d\n", var1->chrom, var1->pos);
+#endif
+
+         if (vcfset_conf.vcf_setop == SETOP_CONCAT) {
+              num_vars_out += 1;
+              if (! count_only) {
+                   vcf_write_var(& vcfset_conf.vcf_out, var1);
+              }
+              vcf_free_var(& var1);
+              /* skip comparison against vcf2 */
+              continue;
+         }
+
+         /* use index access to vcf2 */
+         snprintf(regbuf, 1024, "%s:%ld-%ld", var1->chrom, var1->pos+1, var1->pos+1);
+         var2_itr = tbx_itr_querys(vcf2_tbx, regbuf);
+         if (! var2_itr) {
+              var2_match = 0;
+         } else {
+              var2_match = 0;
+              while (tbx_itr_next(vcf2_hts, vcf2_tbx, var2_itr, &var2_kstr) >= 0) {
+                   var_t *var2 = NULL;
+                   int var2_is_indel = 0;
+
+                   vcf_new_var(&var2);
+                   rc = vcf_parse_var_from_line(var2_kstr.s, var2);
+                   /* LOG_FIXME("%d:%s>%s looking at var2 %d:%s>%s (reg %s)\n", 
+                             var1->pos+1, var1->ref, var1->alt,
+                             var2->pos+1, var2->ref, var2->alt, regbuf); */
+                   if (rc) {
+                        LOG_FATAL("%s\n", "Error while parsing variant returned from tabix");
+                        return -1;
+                   }
+
+                   var2_is_indel = vcf_var_is_indel(var2);
+
+                   /* iterator returns anything overlapping with that 
+                    * position, i.e. this also includes up/downstream
+                    * indels, so make sure actual position matches */
+                   if (var1->pos != var2->pos) {
+                        var2_match = 0;
+
+                   } else if (vcfset_conf.only_passed && ! VCF_VAR_PASSES(var2)) {
+                        var2_match = 0;
+
+                   } else if (vcfset_conf.only_snvs && var2_is_indel) {
+                        var2_match = 0;
+
+                   } else if (vcfset_conf.only_indels && ! var2_is_indel) {
+                        var2_match = 0;
+
+                   } else if (vcfset_conf.only_pos) {
+#ifdef TRACE
+                        LOG_DEBUG("Pos match for var2 %s:%d\n", var2->chrom, var2->pos);
+#endif
+                        var2_match = 1;
+
+                   } else {
+                        if (0==strcmp(var1->ref, var2->ref) && 0==strcmp(var1->alt, var2->alt)) {
+#ifdef TRACE
+                             LOG_DEBUG("Full match for var2 %s:%d\n", var2->chrom, var2->pos);
+#endif
+                             var2_match = 1;/* FIXME: check type as well i.e. snv vs indel */                             
+                        }
+                   }
+                   vcf_free_var(&var2);
+                   if (var2_match) {
+                        break;/* no need to continue */
+                   }
+              }
+         }
+
+         if (vcfset_conf.vcf_setop == SETOP_COMPLEMENT) {
+              /* relative complement : elements in A but not B */
+              if (!var2_match) {
+                   num_vars_out += 1;
+                   if (! count_only) {
+                        vcf_write_var(& vcfset_conf.vcf_out, var1);
+                   }
+              }
+         } else if (vcfset_conf.vcf_setop == SETOP_INTERSECT) {
+              if (var2_match) {
+                   num_vars_out += 1;
+                   if (! count_only) {
+                        vcf_write_var(& vcfset_conf.vcf_out, var1);
+                   }
+              }
+
+         } else {
+              LOG_FATAL("Internal error: unsupported vcf_setop %d\n", vcfset_conf.vcf_setop);
+              return 1;
+         }
+
+         vcf_free_var(& var1);
+         tbx_itr_destroy(var2_itr);
+    }/* while (1) */
+
+    vcf_file_close(& vcfset_conf.vcf_in1);
+    if (vcf_in2) {
+         hts_close(vcf2_hts);
+         tbx_destroy(vcf2_tbx);
+    }
+    LOG_VERBOSE("Parsed %d variants from 1st vcf file (ignoring %d non-passed of those)\n", 
+                num_vars_vcf1 + num_vars_vcf1_ign, num_vars_vcf1_ign);
+    LOG_VERBOSE("Wrote %d variants to output\n", 
+                num_vars_out);
+    if (! count_only) {
+         vcf_file_close(& vcfset_conf.vcf_out);
+    }
+
+    if (0==rc) {
+         if (count_only) {
+              printf("%ld\n", num_vars_out);
+         }
+
+         LOG_VERBOSE("%s\n", "Successful exit.");
+    }
+
+    free(vcf_in1);
+    free(vcf_in2);
+    free(vcf_out);
+
+
+    return rc;
+}
+/* main_vcfset */
+
diff --git a/src/lofreq/lofreq_vcfset.h b/src/lofreq/lofreq_vcfset.h
new file mode 100644
index 0000000..4838fe6
--- /dev/null
+++ b/src/lofreq/lofreq_vcfset.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOFREQ_VCFSET_H
+#define LOFREQ_VCFSET_H
+
+int main_vcfset(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/lofreq_viterbi.c b/src/lofreq/lofreq_viterbi.c
new file mode 100644
index 0000000..fd33d2f
--- /dev/null
+++ b/src/lofreq/lofreq_viterbi.c
@@ -0,0 +1,477 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#include <ctype.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+
+#include "htslib/faidx.h"
+#include "sam.h"
+#include "viterbi.h"
+#include "log.h"
+#include "lofreq_viterbi.h"
+#include "utils.h"
+
+#define SANGERQUAL_TO_PHRED(c) ((int)(c)-33)
+
+/* FIXME: implement auto clipping of Q2 tails */
+
+#define RWIN 10
+
+typedef struct {
+     samfile_t *in;
+     bamFile out;
+     faidx_t *fai;
+     uint32_t tid;
+     char *ref;
+     int reflen;
+} tmpstruct_t;
+
+static void replace_cigar(bam1_t *b, int n, uint32_t *cigar)
+{
+    if (n != b->core.n_cigar) {
+        int o = b->core.l_qname + b->core.n_cigar * 4;
+        if (b->data_len + (n - b->core.n_cigar) * 4 > b->m_data) {
+            b->m_data = b->data_len + (n - b->core.n_cigar) * 4;
+            kroundup32(b->m_data);
+            b->data = (uint8_t*)realloc(b->data, b->m_data);
+        }
+        memmove(b->data + b->core.l_qname + n * 4, b->data + o, b->data_len - o);
+        memcpy(b->data + b->core.l_qname, cigar, n * 4);
+        b->data_len += (n - b->core.n_cigar) * 4;
+        b->core.n_cigar = n;
+    } else memcpy(b->data + b->core.l_qname, cigar, n * 4);
+}
+
+
+/* function checks if alignment is made of all Q2s
+ * if not, returns remaining values so that median 
+ *  can be calculated
+ */
+int check_Q2(char *bqual, int *num){
+    int is_all_Q2 = 1;
+    int i, pom = 0;
+    int l = strlen(bqual);
+    *num = 0;
+    for (i=0; i<l; i++){
+            if (SANGERQUAL_TO_PHRED(bqual[i]) != 2){
+                    pom++;
+                    is_all_Q2 = 0;
+            }
+    }       
+    *num = pom;
+    return is_all_Q2;
+}
+
+void remain(char *bqual, int *remaining){
+     int pom = 0;
+     int i, q;
+     int l = strlen(bqual);
+     for (i=0; i<l; i++){
+          q = SANGERQUAL_TO_PHRED(bqual[i]);
+          if (q != 2){
+               remaining[pom] = q;
+               pom++;
+          }
+     }   
+}
+
+static int fetch_func(bam1_t *b, void *data, int del_flag, int q2def, int reclip)
+{
+     /* see
+      https://github.com/lh3/bwa/blob/426e54740ca2b9b08e013f28560d01a570a0ab15/ksw.c
+      for optimizations and speedups
+     */
+     tmpstruct_t *tmp = (tmpstruct_t*)data;
+     bam1_core_t *c = &b->core;
+     uint8_t *seq = bam1_seq(b);
+     uint32_t *cigar = bam1_cigar(b);
+     int reflen;
+    
+     if (del_flag) {
+          uint8_t *old_nm;
+          uint8_t *old_mc;
+          uint8_t *old_md;
+          uint8_t *old_as;
+
+          /* once you bam_aux_del b will change and all pointers to it, so don't use bam_aux_get again too early */
+          
+          old_nm = bam_aux_get(b, "NM");          
+          if (old_nm) {          
+               bam_aux_del(b, old_nm);
+          }
+
+          old_mc = bam_aux_get(b, "MC");          
+          if (old_mc) {          
+                bam_aux_del(b, old_mc);          
+          }
+
+          old_md = bam_aux_get(b, "MD");          
+          if (old_md) {          
+               bam_aux_del(b, old_md);
+          }
+          
+          old_as = bam_aux_get(b, "AS");                    
+          if (old_as) {
+               bam_aux_del(b, old_as);
+          }
+     }
+
+     if (c->flag & BAM_FUNMAP) {
+          bam_write1(tmp->out, b);
+          return 0;
+     }
+
+     /* fetch reference sequence if incorrect tid */
+     if (tmp->tid != c->tid) {
+          if (tmp->ref) free(tmp->ref);
+          if ((tmp->ref = 
+               fai_fetch(tmp->fai, tmp->in->header->target_name[c->tid], &reflen)) == 0) {
+               fprintf(stderr, "failed to find reference sequence %s\n", 
+                                tmp->in->header->target_name[c->tid]);
+          }
+          strtoupper(tmp->ref);/* safeguard */
+          tmp->tid = c->tid;
+          tmp->reflen = reflen;
+     }
+     int i;
+
+     // remove soft clipped bases
+     char query[c->l_qseq+1];
+     char bqual[c->l_qseq+1];
+
+     int x = c->pos; // coordinate on reference
+     int y = 0; // coordinate on query
+     int z = 0; // coordinate on query w/o softclip
+
+     int indels = 0;
+
+     // parse cigar string
+     for (i = 0; i < c->n_cigar; ++i) {
+          int j, oplen = cigar[i] >> 4, op = cigar[i]&0xf;
+          if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
+               for (j = 0; j < oplen; j++) {
+                    query[z] = bam_nt16_rev_table[bam1_seqi(seq, y)];
+                    bqual[z] = (char)bam1_qual(b)[y]+33;
+                    x++;
+                    y++;
+                    z++;
+               }
+          } else if (op == BAM_CHARD_CLIP) {
+               /* in theory we should do nothing here but hard clipping info gets lost here FIXME
+                */               
+               bam_write1(tmp->out, b);
+               return 1;
+          } else if (op == BAM_CDEL) {
+               x += oplen;
+               indels += 1;
+          } else if (op == BAM_CINS) {
+               for (j = 0; j < oplen; j++) {
+                    query[z] = bam_nt16_rev_table[bam1_seqi(seq, y)];
+                    bqual[z] = (char)bam1_qual(b)[y]+33;
+                    y++;
+                    z++;
+               }
+               indels += 1;
+          } else if (op == BAM_CSOFT_CLIP) {
+               for (j = 0; j < oplen; j++) {
+                    y++;
+               }
+          } else {
+               LOG_WARN("Unknown cigar op %d. Not touching read %s\n", op, bam1_qname(b));
+               bam_write1(tmp->out, b);
+               return 1;
+          }
+     }
+     query[z] = bqual[z] = '\0';
+
+     if (indels == 0) {
+          bam_write1(tmp->out, b);
+          return 0;
+     }
+    int len_remaining = 0;
+    if (check_Q2(bqual, &len_remaining)) {
+		if (reclip){
+			// check if first op or last op is I and replace with S
+			 int curr_oplen_check = cigar[0] >> 4;
+			 int curr_op_check = cigar[0]&0xf;
+			 if (curr_op_check == BAM_CINS){
+				curr_op_check = BAM_CSOFT_CLIP;
+				cigar[0] = curr_oplen_check <<4 | curr_op_check;
+			}
+			 curr_oplen_check = cigar[c->n_cigar-1] >> 4;
+			 curr_op_check = cigar[c->n_cigar-1]&0xf;
+
+			 if (curr_op_check == BAM_CINS){
+				curr_op_check = BAM_CSOFT_CLIP;
+				cigar[c->n_cigar-1] = curr_oplen_check <<4 | curr_op_check;
+			}
+			
+			replace_cigar(b,c->n_cigar,cigar);
+		}
+        bam_write1(tmp->out, b);
+        return 0;
+    }
+    int remaining[len_remaining+1];
+    remain(bqual, remaining);
+    remaining[len_remaining] = '\0';
+    if (q2def < 0) {
+        q2def = int_median(remaining, len_remaining);
+    }
+    
+     /* get reference with RWIN padding */
+     char ref[c->l_qseq+1+indels+RWIN*2];
+     int lower = c->pos - RWIN;
+     lower = lower < 0? 0: lower;
+     int upper = x + RWIN;
+     upper = upper > tmp->reflen? tmp->reflen: upper;
+     for (z = 0, i = lower; i < upper; z++, i++) {
+          ref[z] = tmp->ref[i];
+     }
+     ref[z] = '\0';
+
+     /* run viterbi */
+     char *aln = malloc(sizeof(char)*(2*(c->l_qseq)));
+     int shift = viterbi(ref, query, bqual, aln, q2def);
+
+     /* convert to cigar */
+     uint32_t *realn_cigar = 0;
+     int realn_n_cigar = 0;
+     
+     /* check if soft-clipped in the front */
+     int curr_oplen = cigar[0] >> 4; 
+     int curr_op = cigar[0]&0xf;
+     if (curr_op == BAM_CSOFT_CLIP) {
+          realn_cigar = realloc(realn_cigar, (realn_n_cigar+1)*sizeof(uint32_t));
+          realn_cigar[realn_n_cigar] = curr_oplen<<4 | curr_op;
+          realn_n_cigar += 1;
+     }
+     
+     /* get cigar of the realigned query */
+     curr_op = aln[0] == 'M' ? 0 : (aln[0] == 'I'? 1 : 2);
+     curr_oplen = 1;
+     for (i = 1; i < strlen(aln); i++) {
+          int this_op = aln[i] == 'M' ? 0 : (aln[i] == 'I' ? 1 : 2);
+          if (this_op != curr_op) {
+               realn_cigar = realloc(realn_cigar, (realn_n_cigar+1)*sizeof(uint32_t));
+               realn_cigar[realn_n_cigar] = curr_oplen<<4 | curr_op;
+               realn_n_cigar += 1;
+               curr_op = this_op;
+               curr_oplen = 1;
+          } else {
+               curr_oplen += 1;
+          }
+     }
+     realn_cigar = realloc(realn_cigar, (realn_n_cigar+1)*sizeof(uint32_t));
+     realn_cigar[realn_n_cigar] = curr_oplen<<4 | curr_op;
+     realn_n_cigar += 1; 
+    
+     /* check if soft-clipped in the back */
+     curr_oplen = cigar[c->n_cigar-1] >> 4; 
+     curr_op = cigar[c->n_cigar-1]&0xf;
+     if (curr_op == BAM_CSOFT_CLIP) {
+          realn_cigar = realloc(realn_cigar, (realn_n_cigar+1)*sizeof(uint32_t));
+          realn_cigar[realn_n_cigar] = curr_oplen<<4 | curr_op;
+          realn_n_cigar += 1;
+     }
+
+#if 0
+     int j;
+     for (j = 0; j < realn_n_cigar; j++) {
+          curr_oplen = realn_cigar[j] >> 4;
+          curr_op = realn_cigar[j]&0xf;
+          fprintf(stderr, "op:%d oplen:%d\n", curr_op, curr_oplen);
+     }
+#endif
+
+     /* check if read was shifted */
+     if (shift-(c->pos-lower) != 0) {
+          LOG_VERBOSE("Read %s with shift of %d at original pos %s:%d\n", 
+                      bam1_qname(b), shift-(c->pos-lower),
+                      tmp->in->header->target_name[c->tid], c->pos);
+          c->pos = c->pos + (shift - (c->pos - lower));
+     }
+     
+	 if (reclip){
+		 // check if first op or last op is I and replace with S
+		 int curr_oplen_reclip = realn_cigar[0] >> 4;
+		 int curr_op_reclip = realn_cigar[0]&0xf;
+
+		 if (curr_op_reclip == BAM_CINS){
+			curr_op_reclip = BAM_CSOFT_CLIP;
+			realn_cigar[0] = curr_oplen_reclip <<4 | curr_op_reclip;
+		}
+		 curr_oplen_reclip = realn_cigar[realn_n_cigar-1] >> 4;
+		 curr_op_reclip = realn_cigar[realn_n_cigar-1]&0xf;
+
+		 if (curr_op_reclip == BAM_CINS){
+			curr_op_reclip = BAM_CSOFT_CLIP;
+			realn_cigar[realn_n_cigar-1] = curr_oplen_reclip <<4 | curr_op_reclip;
+		}
+	}
+     replace_cigar(b, realn_n_cigar, realn_cigar);
+     bam_write1(tmp->out, b);
+     free(aln);
+     free(realn_cigar);
+     return 0;
+}
+
+static void usage()
+{
+     fprintf(stderr, "Usage: lofreq viterbi [options] in.bam\n");
+     fprintf(stderr, "Options:\n");
+     fprintf(stderr, "     -f | --ref FILE     Indexed reference fasta file [null]\n");
+     fprintf(stderr, "     -k | --keepflags    Don't delete flags MC, MD, NM and A, which are all prone to change during realignment.\n");
+     fprintf(stderr, "     -q | --defqual INT  Assume INT as quality for all bases with BQ2. Default (=-1) is to use median quality of bases in read.\n");
+/* experimental. keep enabled but don't tell user about it */
+#if 0
+     fprintf(stderr, "     -r | --reclip       Reclip insertions and/or deletions on the beginning and end of read to soft clip\n");
+     fprintf(stderr, "                         FILE HAS TO BE PREVIOUSLY UNCLIPPED!!!\n");
+#endif
+     fprintf(stderr, "     -o | --out FILE     Output BAM file [- = stdout = default]\n");
+     fprintf(stderr, "          --verbose      Be verbose\n");
+     fprintf(stderr, "\n");
+     fprintf(stderr, "NOTE: Output BAM file will (likely) be unsorted (use samtools sort, e.g. lofreq viterbi ... | samtools sort -')\n");
+}
+
+
+int main_viterbi(int argc, char *argv[])
+{
+     tmpstruct_t tmp = {0};
+     static int del_flag = 1;
+     static int q2default = -1;
+	 static int reclip = 0;
+     char *bam_out = NULL;
+     bam1_t *b = NULL;
+ 
+     if (argc == 2) {
+          usage();
+          return 1;
+     }
+
+     while (1) {
+          int c;
+
+          static struct option long_options[] = {
+               {"ref", required_argument, NULL, 'f'},
+               {"verbose", no_argument, &verbose, 1},
+               {"keepflags", no_argument, NULL, 'k'},
+			   {"reclip",	no_argument, NULL, 'r'},
+               {"out", required_argument, NULL, 'o'},
+               {"defqual", required_argument, NULL, 'q'},
+               {0,0,0,0}
+          };
+          
+          static const char *long_opts_str = "rkf:q:o:";
+          int long_option_index = 0;
+
+          c = getopt_long(argc-1, argv+1, long_opts_str, long_options, &long_option_index);
+
+          if (c == -1) {
+               break;
+          }
+          switch (c){
+          case 'f':
+               if (! file_exists(optarg)) {
+                    LOG_FATAL("Reference fasta file %s does not exist. Exiting...\n", optarg);
+                    return 1;
+               }
+               tmp.fai = fai_load(optarg);	
+               break;
+          case 'k':
+               del_flag = 0;
+               break;
+          case 'q':
+               q2default = atoi(optarg);
+               break;
+		  case 'r':
+				reclip = 1;
+				break;
+          case 'o':
+               if (0 != strcmp(optarg, "-")) {
+                    if (file_exists(optarg)) {
+                         LOG_FATAL("Cowardly refusing to overwrite file '%s'. Exiting...\n", optarg);
+                         return 1;
+                    }
+               }
+               bam_out = strdup(optarg);
+               break;
+          case '?':
+               LOG_FATAL("%s\n", "Unrecognized arguments found. Exiting\n");
+               usage();
+               break;
+          default:
+               break;
+          }
+     }
+
+
+     if (! tmp.fai) {
+          LOG_FATAL("%s\n", "Couldn't load reference fasta file\n");
+          usage();
+          return 1;
+     }
+
+     /* get bam file */
+     if (1 != argc-optind-1){
+          LOG_FATAL("%s\n", "Need exactly one BAM file as last argument\n");
+          return 1;
+     }
+     if ((tmp.in = samopen((argv+optind+1)[0], "rb",0)) == 0){
+          LOG_FATAL("Failed to open BAM file %s. Exiting...\n", (argv+optind+1)[0]);
+          return 1;
+     }
+
+     if (!bam_out || bam_out[0] == '-') {
+          tmp.out = bam_dopen(fileno(stdout), "w");
+     } else {
+          tmp.out = bam_open(bam_out, "w");
+     }
+     bam_header_write(tmp.out, tmp.in->header);
+     
+     b = bam_init1();
+     tmp.tid = -1;
+     tmp.ref = 0;
+     while (samread(tmp.in, b) >= 0){
+          fetch_func(b, &tmp, del_flag, q2default, reclip);
+     }
+     bam_destroy1(b);
+     
+     samclose(tmp.in);
+     bam_close(tmp.out);
+     if (tmp.ref)
+          free(tmp.ref);
+     fai_destroy(tmp.fai);
+     free(bam_out);
+
+     LOG_VERBOSE("%s\n", "NOTE: Output BAM file will be unsorted (use samtools sort, e.g. samtools sort -')");
+
+     return 0;
+}
diff --git a/src/lofreq/lofreq_viterbi.h b/src/lofreq/lofreq_viterbi.h
new file mode 100644
index 0000000..8de8f10
--- /dev/null
+++ b/src/lofreq/lofreq_viterbi.h
@@ -0,0 +1,35 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#ifndef LOFREQ_VITERBI_FILE
+#define LOFREQ_VITERBI_FILE
+
+/* funcion prototypes here */
+int main_viterbi(int argc, char *argv[]);
+
+#endif
diff --git a/src/lofreq/log.c b/src/lofreq/log.c
new file mode 100644
index 0000000..bba1841
--- /dev/null
+++ b/src/lofreq/log.c
@@ -0,0 +1,48 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#include "log.h"
+
+int debug = 0;
+int verbose = 0;
+
+/* Taken from the Linux kernel source and slightly modified.
+ */
+int
+vout(FILE *stream, const char *fmt, ...)
+{                
+     va_list args;
+     int rc;
+     
+     va_start(args, fmt);
+     rc = vfprintf(stream, fmt, args);
+     va_end(args);
+     return rc;
+}
+
diff --git a/src/lofreq/log.h b/src/lofreq/log.h
new file mode 100644
index 0000000..2405392
--- /dev/null
+++ b/src/lofreq/log.h
@@ -0,0 +1,55 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef LOG_H
+#define LOG_H
+
+#include <stdarg.h>
+#include <stdio.h>
+
+extern int debug;
+extern int verbose;
+
+int
+vout(FILE *stream, const char *fmt, ...);
+
+/* print only if debug is true*/
+#define LOG_DEBUG(fmt, args...)    {if (debug) {(void)vout(stderr, "DEBUG(%s|%s): " fmt, __FILE__, __FUNCTION__, ## args);}}
+/* print only if verbose is true*/
+#define LOG_VERBOSE(fmt, args...)  {if (verbose) {(void)vout(stderr, fmt, ## args);}}
+/* always warn to stderr */
+#define LOG_WARN(fmt, args...)     (void)vout(stderr, "WARNING(%s|%s): " fmt, __FILE__, __FUNCTION__, ## args)
+/* always print errors to stderr*/
+#define LOG_ERROR(fmt, args...)    (void)vout(stderr, "ERROR(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args)
+/* always print errors to stderr*/
+#define LOG_FATAL(fmt, args...)    (void)vout(stderr, "FATAL(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args)
+/* always print fixme's */
+#define LOG_FIXME(fmt, args...)    (void)vout(stderr, "FIXME(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args)
+/* always print notes */
+#define LOG_NOTE(fmt, args...)     (void)vout(stderr, "NOTE(%s|%s:%d): " fmt, __FILE__, __FUNCTION__, __LINE__, ## args)
+
+#endif
diff --git a/src/lofreq/multtest.c b/src/lofreq/multtest.c
new file mode 100644
index 0000000..cdcd7ac
--- /dev/null
+++ b/src/lofreq/multtest.c
@@ -0,0 +1,603 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <ctype.h>
+
+/* lofreq includes */
+#include "utils.h"
+#include "multtest.h"
+#ifdef MULTTEST_TEST
+#include "time.h"
+#endif
+
+
+typedef struct {
+     double p;
+     long int i;
+} ixp_t; /* indexed p-value */
+
+int
+ixp_dbl_cmp(const void *a, const void *b)
+{
+     const ixp_t *ia = (const ixp_t *)a;
+     const ixp_t *ib = (const ixp_t *)b;
+     return dbl_cmp(&ia->p, &ib->p);
+}
+
+
+/* writes corrected values to given data array 
+ * 
+ * data is of size size. alpha = type-1 error cutoff for each test
+ *
+ * will use size as bonferroni factor if num_test (AKA bonf fac) < 1
+ */
+void
+bonf_corr(double data[], long int size, long int num_tests)
+{
+     long int i;
+     long int bonf_fac;
+
+     if (num_tests<1) {
+          bonf_fac = size;
+     } else {
+          bonf_fac = num_tests;
+     }
+
+     for (i=0; i<size; i++) {
+          data[i] *= bonf_fac;
+     }
+}
+
+
+/* writes corrected values to array
+ *
+ * data is of size size. alpha = type-1 error cutoff for each test 
+ *
+ * NOTE: only values that were originally below alpha are corrected!
+ * 
+ */
+void
+holm_bonf_corr(double data[], long int size, double alpha, long int num_tests)
+{
+     long int i;
+     long int lp = size;
+     double tp;
+     double pp;
+     ixp_t *iarr;
+
+     iarr = malloc(size * sizeof(ixp_t));
+     
+     if (num_tests<1) {
+          lp = size;
+     } else {
+          lp = num_tests;
+     }
+
+     /* first index the pvalues and store them in the ixp data */
+     for (i = 0; i < size; i++) {
+          iarr[i].i = i;
+          iarr[i].p = data[i];
+     }
+     qsort(iarr, size, sizeof(ixp_t), ixp_dbl_cmp);
+
+     pp = iarr[0].p;
+     for (i = 0; i < size; i++) {
+          /* if the pvalue is different update lp according to how
+           * many pvalues have been seen and update the previous
+           * pvalue seen */
+          if (dbl_cmp(&iarr[i].p, &pp) != 0) {
+               /* lp = size - i; */
+               if (num_tests<1) {
+                    lp = size-i;
+               } else {
+                    lp = num_tests-i;
+               }
+
+               pp = iarr[i].p;
+          }
+          /* if below alpha, correct the pvalue in the original
+           * data */
+          tp = iarr[i].p * 1. / lp;
+          if (dbl_cmp(&tp, &alpha) < 0) {
+               data[iarr[i].i] = iarr[i].p * lp;
+          }
+     }
+     free(iarr);
+}
+
+
+/* will use size instead of num_tests if num_tests>0.
+ *
+ * irejected is a pointer to a 1D-array of indices of rejected (i..e
+ * significant values). It is allocated here, i.e. user has to free.
+ *
+ * content of data will not be overwritten
+ */
+long int
+fdr(double data[], long int size, double alpha, long int num_tests, long int **irejected)
+{
+
+     ixp_t *iarr;
+     long int i;
+     long int nrejected = 0;
+     long int n;
+
+     iarr = malloc(size * sizeof(ixp_t));
+     if (num_tests<1) {
+          n = size;
+     } else {
+          n = num_tests;
+     }
+     /* first index the pvalues and store them in ixp data to sort indices*/
+     for (i = 0; i < size; i++) {
+          iarr[i].i = i;
+          iarr[i].p = data[i];
+     }
+     qsort(iarr, size, sizeof(ixp_t), ixp_dbl_cmp);
+
+     /* starting from the largest rank, evaluate p(m) < alpha * (m/M) where m is the
+      * rank and M is the total number of pvalues. If true, reject pvals 1..m */
+     for (i = size; i > 0; i--) { /* ranks are 1-based */
+          if (iarr[i-1].p < (alpha*i/(float)n)) {
+               nrejected = i; /* therefore, nrejected includes first rejected (0-based) */
+               break;
+          }
+     }
+
+     /* return data of indices to rejected pvalues */
+     *irejected = NULL;
+     if (nrejected) {
+          (*irejected) = (long int*) malloc(nrejected * sizeof(long int));
+          for (i = 0; i < nrejected; i++) {
+               /* printf("%d\t%f\t%d\n", iarr[i].i, iarr[i].p); */
+               (*irejected)[i] = iarr[i].i;
+          }
+     }
+     free(iarr);
+     return nrejected;
+}
+
+int
+mtc_str_to_type(char *t) {
+     if (0 == strcmp(t, "bonf") || 0 == strcmp(t, "bonferroni")) {
+          return MTC_BONF;
+     } else if (0 == strcmp(t, "holm") || 0 == strcmp(t, "holmbonf") ||  0 == strcmp(t, "holm-bonf") || 0 == strcmp(t, "holmbonferroni")) {
+          return MTC_HOLMBONF;
+     } else if (0 == strcmp(t, "fdr")) {
+          return MTC_FDR;
+     } else {
+          return -1;
+     }
+}
+
+
+void
+mtc_str(char *buf, int mtc_type) {
+     char *str;
+     int i;
+     str = mtc_type_str[mtc_type];
+     strcpy(buf, &(str[4]));
+     for (i=0; i<strlen(buf); i++) {
+          buf[i] = tolower(buf[i]);
+     }
+}
+
+
+/* stand alone test
+
+# R results as reference:
+> p = c(2.354054e-07,2.101590e-05,2.576842e-05,9.814783e-05,1.052610e-04,1.241481e-04,1.325988e-04,1.568503e-04,2.254557e-04,3.795380e-04,6.114943e-04,1.613954e-03,3.302430e-03,3.538342e-03,5.236997e-03,6.831909e-03,7.059226e-03,8.805129e-03,9.401040e-03,1.129798e-02,2.115017e-02,4.922736e-02,6.053298e-02,6.262239e-02,7.395153e-02,8.281103e-02,8.633331e-02,1.190654e-01,1.890796e-01,2.058494e-01,2.209214e-01,2.856000e-01,3.048895e-01,4.660682e-01,4.830809e-01,4.921755e-01,5.319453e-01,5.7 [...]
+> sum(p < 0.05)
+[1] 22
+> sum(p.adjust(p, "BH") < 0.05)
+[1] 20
+> sum(p.adjust(p, "BH", 1000) < 0.05)
+[1] 10
+> sum(p.adjust(p, "BH", 100) < 0.001)
+[1] 3
+sum(p.adjust(p, "BH", 10000) < 1)
+[1] 11
+
+# Results from standalone binary:
+ps="2.354054e-07 2.101590e-05 2.576842e-05 9.814783e-05 1.052610e-04  1.241481e-04 1.325988e-04 1.568503e-04 2.254557e-04 3.795380e-04 6.114943e-04 1.613954e-03 3.302430e-03 3.538342e-03 5.236997e-03 6.831909e-03 7.059226e-03 8.805129e-03 9.401040e-03 1.129798e-02 2.115017e-02 4.922736e-02 6.053298e-02 6.262239e-02 7.395153e-02 8.281103e-02 8.633331e-02 1.190654e-01 1.890796e-01 2.058494e-01 2.209214e-01 2.856000e-01 3.048895e-01 4.660682e-01 4.830809e-01 4.921755e-01 5.319453e-01 5.7515 [...]
+$./multtest 50 0.05 $ps
+ 20 rejected with alpha 0.050000 and 50 tests
+$ ./multtest 1000 0.05 $ps
+ 10 rejected with alpha 0.050000 and 1000 tests
+$ ./multtest 100 0.001 $ps
+ 3 rejected with alpha 0.001000 and 100 tests
+$ ./multtest 10000 1 $ps
+ 11 rejected with alpha 1.000000 and 10000 tests
+
+
+ gcc -o multtest multtest.c utils.c log.c -ansi -Wall -DMULTTEST_STANDALONE -I../uthash/
+*/
+#ifdef MULTTEST_STANDALONE
+int main(int argc, char *argv[])
+{
+     int i;
+     int ntests;
+     float alpha;
+     double *data;
+     int data_size;
+     int nrejected;
+     long int *irejected;/* indices of rejected i.e. significant values */
+
+
+     if (argc<4) {
+          fprintf(stderr, "Usage: %s numtests alpha p1 ... pn\n", argv[0]);
+          exit(1);
+     }
+     /*fprintf(stderr, "argc=%d\n", argc);*/
+     ntests = atoi(argv[1]);
+     alpha = atof(argv[2]);
+     fprintf(stderr, "ntests=%d alpha=%f\n", ntests, alpha);
+     data_size = argc-3;
+     if (ntests<data_size) {
+          fprintf(stderr, "FATAL: ntests=%d < data_size=%d\n", ntests, data_size);
+          exit(1);
+     }
+     data = malloc((data_size) * sizeof(double));
+     for (i=0; i<data_size; i++) {
+          data[i] = atof(argv[i+3]);
+          fprintf(stderr, "DEBUG data[%d]=%f\n", i, data[i]);
+     }
+
+     nrejected = fdr(data, data_size, alpha, ntests, &irejected);
+     
+     printf ("%d rejected with alpha %f and %d tests: ", nrejected, alpha, ntests);
+     if (nrejected) {
+          for (i = 0; i < nrejected; i++) {
+               printf("%f, ", data[irejected[i]]);
+          }
+     } else {
+          printf("None");
+     }
+     printf ("\n\n");
+     
+     free(data);
+     free(irejected);
+     exit(0);
+}
+#endif
+
+
+/* gcc -o multtest multtest.c utils.c log.c -ansi -Wall -DMULTTEST_TEST */
+#ifdef MULTTEST_TEST
+
+
+/* http://stackoverflow.com/questions/6127503/shuffle-array-in-c
+ * answer by John Leehey
+ *
+ * arrange the N elements of ARRAY in random order.
+ * Only effective if N is much smaller than RAND_MAX;
+ * if this may not be the case, use a better random
+ * number generator. */
+#define NELEMS(x)  (sizeof(x) / sizeof(x[0]))
+static void shuffle(void *array, size_t n, size_t size) {
+    char tmp[size];
+    char *arr = array;
+    size_t stride = size * sizeof(char);
+
+    if (n > 1) {
+        size_t i;
+        for (i = 0; i < n - 1; ++i) {
+            size_t rnd = (size_t) rand();
+            size_t j = i + rnd / (RAND_MAX / (n - i) + 1);
+
+            memcpy(tmp, arr + j * stride, size);
+            memcpy(arr + j * stride, arr + i * stride, size);
+            memcpy(arr + i * stride, tmp, size);
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+     int i;
+     int ntests;
+     srand(time(NULL));
+
+
+     {
+          double data[] = {0.000001, 0.008, 0.039, 0.041, 0.042, 0.06, 0.074, 0.205, 0.212, 0.216, 0.222, 0.251, 0.269, 0.275, 0.34, 0.341, 0.384, 0.569, 0.594, 0.696, 0.762, 0.94, 0.942, 0.975, 0.986};
+          int data_len = 25;
+          float alpha = 0.25;
+          int num_tests = data_len;
+          long int* irejected;/* indices of rejected i.e. significant values */
+          int nrejected;
+          int exp_sig = 5;
+
+          /* FIXME use also for bonferroni */
+          printf("*** FDR test with data from http://www.biostathandbook.com/multiplecomparisons.html\n\n");
+
+          nrejected = fdr(data, data_len, alpha, num_tests, &irejected);
+          printf ("original:\t");
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n");
+                     
+          if (nrejected==exp_sig) {/* FIXME should also test values */
+               printf("PASS\n\n");
+          } else {
+               printf("FAIL\n\n");
+               exit(1);
+          }
+          free(irejected);
+
+
+
+          int cap = 10;
+          printf ("capped to %d:\t", cap);
+          nrejected = fdr(data, cap, alpha, num_tests, &irejected);
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n");
+                     
+          if (nrejected==exp_sig) {/* FIXME should also test values */
+               printf("PASS\n\n");
+          } else {
+               printf("FAIL\n\n");
+               exit(1);
+          }
+          free(irejected);
+
+
+          printf ("shuffled:\t");
+          shuffle(data, NELEMS(data), sizeof(data[0]));
+          nrejected = fdr(data, data_len, alpha, num_tests, &irejected);
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n");
+                     
+          if (nrejected==exp_sig) {/* FIXME should also test values */
+               printf("PASS\n\n");
+          } else {
+               printf("FAIL\n\n");
+               exit(1);
+          }
+          free(irejected);
+     }
+
+     {
+          double data[] = {0.010,  0.013, 0.014, 0.190, 0.350, 0.500, 0.630, 0.670, 0.750, 0.810};
+          int data_len = 10;
+          float alpha = 0.05;
+          int num_tests = data_len;
+          long int* irejected;/* indices of rejected i.e. significant values */
+          int nrejected;
+          int exp_sig = 3;
+
+          printf("*** FDR test with data from http://www.unc.edu/courses/2007spring/biol/145/001/docs/lectures/Nov12.html\n\n");
+
+          nrejected = fdr(data, data_len, alpha, num_tests, &irejected);
+          printf ("original:\t");
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n");
+                     
+          if (nrejected==exp_sig) {/* FIXME should also test values */
+               printf("PASS\n\n");
+          } else {
+               printf("FAIL\n\n");
+               exit(1);
+          }
+          free(irejected);   
+
+
+          int cap = 10;
+          printf ("capped to %d:\t", cap);
+          nrejected = fdr(data, cap, alpha, num_tests, &irejected);
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n");
+                     
+          if (nrejected==exp_sig) {/* FIXME should also test values */
+               printf("PASS\n\n");
+          } else {
+               printf("FAIL\n\n");
+          }
+          free(irejected);   
+
+          printf ("shuffled:\t");
+          shuffle(data, NELEMS(data), sizeof(data[0]));
+          nrejected = fdr(data, data_len, alpha, num_tests, &irejected);
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n");
+                     
+          if (nrejected==exp_sig) {/* FIXME should also test values */
+               printf("PASS\n");
+          } else {
+               printf("FAIL\n");
+          }
+          free(irejected);   
+
+          printf ("\n");
+     }
+
+
+     exit(1);
+
+     /* output values according to python implementation 
+      *
+
+      # HolmBonferroni
+      #
+      >>> pvals = [0.010000, 0.010000, 0.030000, 0.050000, 0.005000]
+      >>> multiple_testing.HolmBonferroni(pvals).corrected_pvals
+      # [0.04, 0.04, 0.06, 0.05, 0.025]      
+      >>> multiple_testing.HolmBonferroni(pvals, n=999).corrected_pvals
+      # [9.98, 9.98, 29.88, 49.75, 4.995]
+
+
+      # Bonferroni
+      #
+      >>> pvals = [0.010000, 0.010000, 0.030000, 0.050000, 0.005000]
+      >>> multiple_testing.Bonferroni(pvals).corrected_pvals
+      # [0.05, 0.05, 0.15, 0.25, 0.025]
+      >>> multiple_testing.Bonferroni(pvals, n=999).corrected_pvals
+      # [9.99, 9.99, 29.97, 49.95, 4.995]
+
+
+      # FDR
+      #
+      >>> pvals = [0.600000, 0.070000, 0.490000, 0.200000, 0.480000, 0.740000, 0.680000, 0.010000, 0.970000, 0.380000, 0.032000, 0.070000]
+      >>> sorted([pvals[i] for i in fdr.fdr(pvals, a=0.20)])
+      # [0.01, 0.032]
+
+      */
+     for (ntests=-1; ntests<1000; ntests+=1000) {
+#if 1
+          double data[] = {0.01, 0.01, 0.03, 0.05, 0.005};
+          int arrlen = 5;
+#else
+          double data[] = {0.308799, 0.297089, 0.150936, 0.518048, 0.929977, 0.533344, 0.215166, 0.046900, 0.559717, 0.963137, 0.271862, 0.081730, 0.127197, 0.472981, 0.872270, 0.830436, 0.008807, 0.512721, 0.798971, 0.102211, 0.223481, 0.877118, 0.895051, 0.258633, 0.262466, 0.971096, 0.933613, 0.684905, 0.370404, 0.620535, 0.329913, 0.589691, 0.327275, 0.493765, 0.298086, 0.596776, 0.441615, 0.843791, 0.790259, 0.605214, 0.681068, 0.253347, 0.079298, 0.010991, 0.120785, 0.885083, 0.587 [...]
+          int arrlen = 100;
+#endif
+          float alpha = 0.05;
+          
+          printf("Testing holm-bonferroni with ntests=%d alpha=%f\n", ntests, alpha);
+
+          printf ("in: ");
+          for (i = 0; i < arrlen; i++) {
+               printf("%f, ", data[i]);
+          }
+          printf ("\n");
+
+          holm_bonf_corr(data, arrlen, alpha, ntests);
+          
+          printf ("out: ");
+          for (i = 0; i < arrlen; i++) {
+               printf("%f, ", data[i]);
+          }
+          printf ("\n\n");
+     }
+
+     for (ntests=-1; ntests<1000; ntests+=1000) {
+          double data[] = {0.01, 0.01, 0.03, 0.05, 0.005};
+          int arrlen = 5;
+          /* float alpha = 0.05; */
+
+          printf("Testing bonferroni with ntests=%d...\n", ntests);
+
+          printf ("in: ");
+          for (i = 0; i < arrlen; i++) {
+               printf("%f, ", data[i]);
+          }
+          printf ("\n");
+          
+          bonf_corr(data, arrlen, ntests);
+          
+          printf ("out: ");
+          for (i = 0; i < arrlen; i++) {
+               printf("%f, ", data[i]);
+          }
+          printf ("\n\n");
+     }
+
+
+     for (ntests=-1; ntests<1000; ntests+=1000) {
+          /* Test data from : http://udel.edu/~mcdonald/statmultcomp.html
+           */          
+#if 1
+          double data[] = {0.6, 0.07, 0.49, 0.2, 0.48, 0.74,
+                            0.68, 0.01, 0.97, 0.38, 0.032, 0.07};
+          int arrlen = 12;
+          float alpha = 0.20;
+#else
+          double data[] = {0.308799, 0.297089, 0.150936, 0.518048, 0.929977, 0.533344, 0.215166, 0.046900, 0.559717, 0.963137, 0.271862, 0.081730, 0.127197, 0.472981, 0.872270, 0.830436, 0.008807, 0.512721, 0.798971, 0.102211, 0.223481, 0.877118, 0.895051, 0.258633, 0.262466, 0.971096, 0.933613, 0.684905, 0.370404, 0.620535, 0.329913, 0.589691, 0.327275, 0.493765, 0.298086, 0.596776, 0.441615, 0.843791, 0.790259, 0.605214, 0.681068, 0.253347, 0.079298, 0.010991, 0.120785, 0.885083, 0.587 [...]
+          int arrlen = 100;
+          float alpha = 10;
+#endif
+
+          int nrejected;
+          long int* irejected;/* indices of rejected i.e. significant values */
+
+          printf("Testing fdr with ntests=%d alpha=%f...\n", ntests, alpha);
+
+          printf ("in: ");
+          for (i = 0; i < arrlen; i++) {
+               printf("%f, ", data[i]);
+          }
+          printf ("\n");
+
+          nrejected = fdr(data, arrlen, alpha, ntests, &irejected);
+          
+          printf ("out rejected: ");
+          if (nrejected) {
+               for (i = 0; i < nrejected; i++) {
+                    printf("%f, ", data[irejected[i]]);
+               }
+          } else {
+               printf("None");
+          }
+          printf ("\n\n");
+          
+          free(irejected);
+     }
+
+     return EXIT_SUCCESS;
+}
+#endif
diff --git a/src/lofreq/multtest.h b/src/lofreq/multtest.h
new file mode 100644
index 0000000..6afd5d6
--- /dev/null
+++ b/src/lofreq/multtest.h
@@ -0,0 +1,67 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef MULTTEST_H
+#define MULTTEST_H
+
+
+typedef enum
+{
+     MTC_NONE,
+     MTC_BONF,
+     MTC_HOLMBONF,
+     MTC_FDR
+} mtc_type_t;
+
+
+#define STR(name) # name
+
+static char *mtc_type_str[] = {
+    STR(MTC_NONE),
+    STR(MTC_BONF),
+    STR(MTC_HOLMBONF),
+    STR(MTC_FDR),
+};
+
+
+void
+bonf_corr(double data[], long int size, long int num_tests);
+
+void
+holm_bonf_corr(double data[], long int size, double alpha, long int num_tests);
+
+long int
+fdr(double data[], long int size, double alpha, long int num_tests, long int **irejected);
+
+int
+mtc_str_to_type(char *t);
+
+void
+mtc_str(char *buf, int mtc_type);
+
+#endif
diff --git a/src/lofreq/plp.c b/src/lofreq/plp.c
new file mode 100644
index 0000000..5330399
--- /dev/null
+++ b/src/lofreq/plp.c
@@ -0,0 +1,1455 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+/* This file is partially based on samtools' bam_plcmd.c and very
+ * likely needs an update whenever samtools/libbam is updated
+ *
+ */
+
+#include <ctype.h>
+#include <assert.h>
+#include <errno.h>
+#include <fenv.h>
+
+#include "htslib/kstring.h"
+#include "sam.h"
+
+#include "log.h"
+#include "plp.h"
+#include "vcf.h"
+#include "samutils.h"
+#include "snpcaller.h"
+#include "bam_md_ext.h"
+
+/* bam_md.c
+const char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
+*/
+extern const char bam_nt16_nt4_table[];
+const char *bam_nt4_rev_table = "ACGTN";
+
+
+int missing_baq_warning_printed = 0;
+
+/* from bedidx.c */
+void *bed_read(const char *fn);
+void bed_destroy(void *_h);
+int bed_overlap(const void *_h, const char *chr, int beg, int end);
+
+/* From the SAM spec: "tags starting with `X', `Y' and `Z' or tags
+ * containing lowercase letters in either position are reserved for
+ * local use".
+*/
+#define SRC_QUAL_TAG "sq"
+
+/* results on icga dream syn1.2 suggest that somatic calls made extra
+ * with this settings are likely fp whereas the ones missing a likely
+ * tp, therefore disabled */
+#undef MERGEQ_FOR_CONS_CALL
+
+
+const unsigned char bam_nt4_table[256] = {
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,0,4,1,4,4,4,2,4,4,4,4,4,4,4,4,
+     4,4,4,4,3,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+     4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+};
+
+typedef struct {
+     bamFile fp;
+     bam_iter_t iter;
+     bam_header_t *h;
+     int ref_id;
+     char *ref;
+     const mplp_conf_t *conf;
+} mplp_aux_t;
+
+typedef struct {
+    int n;
+    int *n_plp, *m_plp;
+    bam_pileup1_t **plp;
+} mplp_pileup_t;
+
+
+#ifdef USE_ALNERRPROF
+static alnerrprof_t *alnerrprof = NULL;
+#endif
+
+
+/* initialize members of preallocated mplp_conf */
+void init_mplp_conf(mplp_conf_t *c)
+{
+     memset(c, 0, sizeof(mplp_conf_t));
+     c->max_mq = DEFAULT_MAX_MQ;
+     c->min_mq = DEFAULT_MIN_MQ;
+     c->def_nm_q = DEFAULT_DEF_NM_QUAL;
+     c->min_plp_bq = DEFAULT_MIN_PLP_BQ;/* note: different from DEFAULT_MIN_BQ */
+     c->min_plp_idq = DEFAULT_MIN_PLP_IDQ;
+     c->max_depth = DEFAULT_MAX_PLP_DEPTH;
+     c->flag = MPLP_NO_ORPHAN | MPLP_BAQ | MPLP_EXT_BAQ | MPLP_IDAQ;
+}
+
+
+
+/* convenience function */
+int
+base_count(const plp_col_t *p, char base)
+{
+     int b = bam_nt4_table[(int)base];
+     return p->fw_counts[b] + p->rv_counts[b];
+}
+
+
+
+void
+plp_col_init(plp_col_t *p) {
+    int i;
+
+    const int grow_by_size = 1000;
+
+    p->target =  NULL;
+    p->pos = -INT_MAX;
+    p->ref_base = '\0';
+    p->cons_base[0] = 'N'; p->cons_base[1] = '\0';
+    p->coverage_plp = 0;
+    p->num_bases = 0;
+    p->num_ign_indels = 0;
+    p->num_non_indels = 0;
+    for (i=0; i<NUM_NT4; i++) {
+         int_varray_init(& p->base_quals[i], grow_by_size);
+         int_varray_init(& p->baq_quals[i], grow_by_size);
+         int_varray_init(& p->map_quals[i], grow_by_size);
+         int_varray_init(& p->source_quals[i], grow_by_size);
+#ifdef USE_ALNERRPROF
+         int_varray_init(& p->alnerr_qual[i], grow_by_size);
+#endif
+         p->fw_counts[i] = 0;
+         p->rv_counts[i] = 0;
+    }
+
+    p->num_heads = p->num_tails = 0;
+
+    p->num_ins = p->sum_ins = 0;
+    int_varray_init(& p->ins_quals, 0);
+    int_varray_init(& p->ins_map_quals, 0);
+    int_varray_init(& p->ins_source_quals, 0);
+    p->ins_event_counts = NULL;
+
+    p->num_dels = p->sum_dels = 0;
+    int_varray_init(& p->del_quals, 0);
+    int_varray_init(& p->del_map_quals, 0);
+    int_varray_init(& p->del_source_quals, 0);
+    p->del_event_counts = NULL;
+
+    p->non_ins_fw_rv[0] = p->non_ins_fw_rv[1] = 0;
+    p->non_del_fw_rv[0] = p->non_del_fw_rv[1] = 0;
+
+    p->has_indel_aqs = 0;
+    p->hrun = 0;
+}
+
+
+void
+plp_col_free(plp_col_t *p) {
+    int i;
+
+    free(p->target);
+    for (i=0; i<NUM_NT4; i++) {
+         int_varray_free(& p->base_quals[i]);
+         int_varray_free(& p->baq_quals[i]);
+         int_varray_free(& p->map_quals[i]);
+         int_varray_free(& p->source_quals[i]);
+#ifdef USE_ALNERRPROF
+         int_varray_free(& p->alnerr_qual[i]);
+#endif
+    }
+
+    int_varray_free(& p->ins_quals);
+    int_varray_free(& p->ins_map_quals);
+    int_varray_free(& p->ins_source_quals);
+    int_varray_free(& p->del_quals);
+    int_varray_free(& p->del_map_quals);
+    int_varray_free(& p->del_source_quals);
+
+    destruct_ins_event_counts(&p->ins_event_counts);
+    destruct_del_event_counts(&p->del_event_counts);
+}
+
+
+void plp_col_debug_print(const plp_col_t *p, FILE *stream)
+{
+     int i;
+
+     fprintf(stream, "%s\t%d\t%c\t%s\tcounts:rv/fw",
+             p->target, p->pos+1, p->ref_base, p->cons_base);
+     for (i=0; i<NUM_NT4; i++) {
+          fprintf(stream, " %c:%lu/%lu",
+                  bam_nt4_rev_table[i],
+                  p->fw_counts[i],
+                  p->rv_counts[i]);
+     }
+
+     fprintf(stream, " heads:%d tails:%d", p->num_heads, p->num_tails);
+     fprintf(stream, " ins:%d del:%d", p->num_ins, p->num_dels);
+     fprintf(stream, " hrun=%d", p->hrun);
+     fprintf(stream, "\n");
+
+#if 0
+     for (i=0; i<NUM_NT4; i++) {
+          int j;
+          fprintf(stream, "%c BQs (%lu): " , bam_nt4_rev_table[i], p->base_quals[i].n);
+          for (j=0; j<p->base_quals[i].n; j++) {
+               fprintf(stream, " %d", p->base_quals[i].data[j]);
+          }
+          fprintf(stream, "\n");
+     }
+#endif
+}
+
+/* attempt to keep a function in here that produces output similar to
+ * the last pre-c version which can be easily parsed from Python. Note
+ * however, that defaults have changed and that filtering was done differently before.
+ */
+void
+plp_col_mpileup_print(const plp_col_t *p, FILE *stream)
+{
+     int i, j;
+
+     fprintf(stream, "%s\t%d\t%c\t%d\t",
+             p->target, p->pos+1, p->ref_base,p->coverage_plp);
+     for (i=0; i<NUM_NT4; i++) {
+          for (j=0; j<p->base_quals[i].n; j++) {
+               fprintf(stream, "%c%c",
+                       bam_nt4_rev_table[i],  p->base_quals[i].data[j]+33);
+          }
+     }
+
+     fprintf(stream, "\t#heads=%d #tails=%d #ins=%d ins_len=%.1f #del=%d del_len=%.1f\n",
+          p->num_heads, p->num_tails,
+          p->num_ins, p->num_ins ? p->sum_ins/(float)p->num_ins : 0,
+          p->num_dels, p->num_dels ? p->sum_dels/(float)p->num_dels : 0);
+}
+/* plp_col_mpileup_print() */
+
+
+
+void
+dump_mplp_conf(const mplp_conf_t *c, FILE *stream)
+{
+     fprintf(stream, "mplp options\n");
+     fprintf(stream, "  max_mq       = %d\n", c->max_mq);
+     fprintf(stream, "  min_mq       = %d\n", c->min_mq);
+     fprintf(stream, "  flag         = %d\n", c->flag);
+
+     fprintf(stream, "  flag & MPLP_NO_ORPHAN  = %d\n", c->flag & MPLP_NO_ORPHAN ? 1:0);
+     fprintf(stream, "  flag & MPLP_BAQ        = %d\n", c->flag & MPLP_BAQ ? 1:0);
+     fprintf(stream, "  flag & MPLP_REDO_BAQ   = %d\n", c->flag & MPLP_REDO_BAQ ? 1:0);
+     fprintf(stream, "  flag & MPLP_EXT_BAQ    = %d\n", c->flag & MPLP_EXT_BAQ ? 1:0);
+     fprintf(stream, "  flag & MPLP_IDAQ       = %d\n", c->flag & MPLP_IDAQ ? 1:0);
+     fprintf(stream, "  flag & MPLP_REDO_IDAQ = %d\n", c->flag & MPLP_REDO_IDAQ ? 1:0);
+     fprintf(stream, "  flag & MPLP_USE_SQ     = %d\n", c->flag & MPLP_USE_SQ ? 1:0);
+     fprintf(stream, "  flag & MPLP_ILLUMINA13 = %d\n", c->flag & MPLP_ILLUMINA13 ? 1:0);
+
+     fprintf(stream, "  max_depth    = %d\n", c->max_depth);
+     fprintf(stream, "  min_plp_bq   = %d\n", c->min_plp_bq);
+     fprintf(stream, "  min_plp_idq  = %d\n", c->min_plp_idq);
+     fprintf(stream, "  def_nm_q     = %d\n", c->def_nm_q);
+     fprintf(stream, "  reg          = %s\n", c->reg);
+     fprintf(stream, "  fa           = %p\n", c->fa);
+     /*fprintf(stream, "  fai          = %p\n", c->fai);*/
+     fprintf(stream, "  bed          = %p\n", c->bed);
+     fprintf(stream, "  cmdline      = %s\n", c->cmdline);
+}
+/* dump_mplp_conf() */
+
+
+
+
+
+static var_hash_t *source_qual_ign_vars_hash = NULL; /* must be declared NULL ! */
+
+
+int
+var_in_ign_list(var_t *var) {
+     char *key = NULL;
+     var_hash_t *match = NULL;
+
+     /* using key_pos_only i.e. chrom and pos only
+      *
+      * NOTE: source quality will pass down fake vars without ref and
+      * alt so only vcf_var_key_pos_only will work!
+      */
+     vcf_var_key_pos_only(&key, var);
+     HASH_FIND_STR(source_qual_ign_vars_hash, key, match);
+     free(key);
+
+     if (NULL == match) {
+          return 0;
+     } else {
+          return 1;
+     }
+}
+
+
+void
+source_qual_free_ign_vars()
+{
+     var_hash_free_table(source_qual_ign_vars_hash);
+}
+
+
+/* FIXME ignore variants outside given region  (mplp_conf.reg)
+ (on top of bed as well)
+ * and use tabix API if indexed */
+int
+source_qual_load_ign_vcf(const char *vcf_path, void *bed)
+{
+     vcf_file_t vcf_file;
+     const int read_only_passed = 0;
+     unsigned int num_total_vars = 0;
+     unsigned int num_kept_vars = 0;
+
+     if (vcf_file_open(& vcf_file, vcf_path,
+                      HAS_GZIP_EXT(vcf_path), 'r')) {
+         LOG_ERROR("Couldn't open %s\n", vcf_path);
+         return 1;
+     }
+
+     if (0 !=  vcf_skip_header(& vcf_file)) {
+         LOG_WARN("%s\n", "vcf_skip_header() failed");
+         return 1;
+     }
+
+    /* as in lofreq_vcfset.c
+     * WARN: partial code duplication
+     */
+    while (1) {
+         var_t *var;
+         char *key;
+         int rc;
+
+         vcf_new_var(&var);
+         rc = vcf_parse_var(& vcf_file, var);
+         if (rc) {
+              vcf_free_var(&var);
+              break;
+         }
+         num_total_vars += 1;
+
+         if (read_only_passed && ! VCF_VAR_PASSES(var)) {
+              vcf_free_var(&var);
+              continue;
+         }
+
+         if (bed && ! bed_overlap(bed, var->chrom, var->pos, var->pos+1)) {
+              vcf_free_var(&var);
+              continue;
+         }
+
+         /* using key_pos_only i.e. chrom and pos only */
+         vcf_var_key_pos_only(&key, var);
+         /* since we only need the key and no other info we do
+          * not need to save the var (and save NULL instead) */
+         vcf_free_var(&var);
+         var_hash_add(& source_qual_ign_vars_hash, key, NULL);
+         /* FIXME key used within vcf_free_var; dont free! */
+    }
+
+    num_kept_vars = HASH_COUNT(source_qual_ign_vars_hash);
+    if (num_kept_vars) {
+         LOG_VERBOSE("Ignoring %d variants for SQ computation after reading %s\n",
+                     num_kept_vars, vcf_path);
+    } else {
+         LOG_WARN("None of the %d variants in %s were kept\n",
+                  num_total_vars, vcf_path);
+    }
+    vcf_file_close(& vcf_file);
+
+    return 0;
+}
+
+
+
+/* Estimate as to how likely it is that this read, given the mapping,
+ * comes from this reference genome. P(r not from g|mapping) = 1 - P(r
+ * from g).
+ *
+ * Use base-qualities and poisson-binomial dist, similar to core SNV
+ * calling, but return prob instead of pvalue (and subtract one
+ * mismatch which is the SNV we are checking for the benefit of doubt;
+ * affectively also means that prob MM=1 eg prob MM=0). If
+ * nonmatch_qual is < 0, then keep all qualities as they are, i.e.
+ * don't replace mismatches with lower values. Rationale: higher SNV
+ * quals, means higher chance SNVs are real, therefore higher prob.
+ * read does not come from genome. Otherwise use this phredscore as
+ * default.
+ *
+ * Assuming independence of errors is okay, because if they are not
+ * independent, then the prediction made is conservative.
+ *
+ * If target is non-NULL will ignore SNPs via var_in_ign_list
+ *
+ * Returns -1 on error or if NA. otherwise source quality
+ *
+ */
+int
+source_qual(const bam1_t *b, const char *ref,
+            const int nonmatch_qual, char *target, int min_bq)
+{
+     int op_counts[NUM_OP_CATS];
+     int **op_quals = NULL;
+
+     double *probvec = NULL;
+     int num_non_matches = -1; /* non-matching operations */
+     int orig_num_non_matches = -1;
+     double *err_probs = NULL; /* error probs (qualities) passed down to snpcaller. one for each op, no matter if matching or not */
+     int num_err_probs; /* #elements in err_probs */
+
+     long double unused_pval;
+     int src_qual = -1;
+     double src_prob = -1; /* prob of this read coming from genome */
+     int err_prob_idx;
+     int i, j;
+
+     int qlen = b->core.l_qseq;
+
+     /* alloc op_quals
+      */
+     if (NULL == (op_quals = malloc(NUM_OP_CATS * sizeof(int *)))) {
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          exit(1);
+     }
+     for (i=0; i<NUM_OP_CATS; i++) {
+          if (NULL == (op_quals[i] = malloc(qlen * sizeof(int)))) {/* over allocating */
+               fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                       __FILE__, __FUNCTION__, __LINE__);
+               free(op_quals);
+               exit(1);
+          }
+     }
+
+     /* count match operations and get qualities for them
+      */
+     /* LOG_FIXME("%s\n", "Don't know ref name in count_cigar_ops which would be needed as hash key");*/
+     num_err_probs = count_cigar_ops(op_counts, op_quals, b, ref, min_bq, target);
+     if (1 > num_err_probs) {
+#ifdef TRACE
+          LOG_DEBUG("count_cigar_ops returned %d counts on read %s\n", num_err_probs, bam1_qname(b));
+#endif
+          src_qual = -1;
+          goto free_and_exit;
+     }
+
+     /* alloc and fill err_probs with quals returned per op-cat from
+      * count_cigar_ops
+      */
+     if (NULL == (err_probs = malloc(num_err_probs * sizeof(double)))) {
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          exit(1);
+     }
+     num_non_matches = 0;
+     err_prob_idx = 0;
+     for (i=0; i<NUM_OP_CATS; i++) {
+#ifdef SOURCEQUAL_IGNORES_INDELS
+          /* pretend it never happened: remove counts and ignore qualities */
+          if (i==OP_INS || i==OP_DEL) {
+               num_err_probs -= op_counts[i];
+               continue;
+          }
+#endif
+          if (i!=OP_MATCH) {
+               num_non_matches += op_counts[i];
+          }
+          for (j=0; j<op_counts[i]; j++) {
+               int qual;
+               if (nonmatch_qual >= 0) {
+                    qual = nonmatch_qual;
+               } else {
+                    qual = op_quals[i][j];
+               }
+               err_probs[err_prob_idx] = PHREDQUAL_TO_PROB(qual);
+               /*LOG_FIXME("err_probs[%d] = %f (nonmatch_qual=%d op_quals[i=%d][j=%d]=%d)\n", err_prob_idx, err_probs[err_prob_idx], nonmatch_qual, i, j, op_quals[i][j]);*/
+               err_prob_idx += 1;
+          }
+     }
+     assert(err_prob_idx == num_err_probs);
+
+     /*  need num_non_matches-1 */
+     orig_num_non_matches = num_non_matches;
+     if (num_non_matches>0) {
+          num_non_matches -= 1;
+     }
+     if (0 == num_non_matches) {
+#if 0
+          src_qual = PROB_TO_PHREDQUAL_SAFE(0.0);
+#else
+          src_qual = PROB_TO_PHREDQUAL(LDBL_MIN);
+#endif
+          goto free_and_exit;
+     }
+
+#ifdef SOURCEQUAL_USES_PAIRS
+     if ((b->core.flag&BAM_FPAIRED) && (b->core.flag&BAM_FPROPER_PAIR)) {
+          double median_err = dbl_median(err_probs, num_err_probs);
+          LOG_FIXME("%s\n", "SOURCEQUAL_USES_PAIRS: assuming perfect match for paire read");
+          /* can't easily access info about read in pair. assume 
+             perfect perfect match, using length and median prob of 
+             current one */
+          
+          num_err_probs *= 2;
+          if (NULL == (err_probs = realloc(err_probs, num_err_probs * sizeof(double)))) {
+               fprintf(stderr, "FATAL: couldn't reallocate memory at %s:%s():%d\n",
+                       __FILE__, __FUNCTION__, __LINE__);
+               exit(1);
+          }
+          for (i=err_prob_idx-1; i<num_err_probs; i++) {
+               err_probs[i] = median_err;
+          }
+          LOG_FIXME("median_err = %f\n", median_err);
+     }
+#endif
+
+     /* src_prob: what's the prob of seeing n_mismatches-1 by chance,
+      * given quals? or: how likely is this read from the genome.
+      * 1-src_value = prob read is not from genome
+      */
+
+     /* sorting in theory should be numerically more stable and also
+      * make poissbin faster */
+     qsort(err_probs, num_err_probs, sizeof(double), dbl_cmp);
+     probvec = poissbin(&unused_pval, err_probs,
+                        num_err_probs, num_non_matches, 1.0, 0.05);
+     /* need prob not pv */
+     errno = 0;
+     feclearexcept(FE_ALL_EXCEPT);
+     src_prob = exp(probvec[num_non_matches-1]);
+     if (errno || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW)) {
+          if (src_prob < DBL_EPSILON) {
+               src_prob = DBL_MIN;/* to zero but prevent actual 0 value */
+          } else {
+               src_prob = DBL_MAX; /* otherwise set to 1 which might pass filters */
+          }
+     }
+
+     free(probvec);
+     src_qual = PROB_TO_PHREDQUAL(1.0 - src_prob);
+
+
+free_and_exit:
+     for (i=0; i<NUM_OP_CATS; i++) {
+          free(op_quals[i]);
+     }
+     free(op_quals);
+
+     free(err_probs);
+
+     /* if we wanted to use softening from precomputed stats then add
+      * all non-matches up instead of using the matches */
+#if 0
+#define TRACE
+#endif
+#ifdef TRACE
+     LOG_DEBUG("returning src_qual=%d (orig prob = %g) for cigar=%s num_err_probs=%d num_non_matches=%d(%d) @%d\n",
+               src_qual, src_prob, cigar_str_from_bam(b), num_err_probs, num_non_matches, orig_num_non_matches, b->core.pos);
+#endif
+#undef TRACE
+
+     return src_qual;
+}
+/* source_qual() */
+
+
+
+/* not part of offical samtools/htslib API but part of samtools */
+static int
+mplp_func(void *data, bam1_t *b)
+{
+     mplp_aux_t *ma = (mplp_aux_t*)data;
+     int ret, skip = 0;
+
+     do {
+          int has_ref;
+          ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b);
+          if (ret < 0)
+               break;
+
+#ifdef TRACE
+          LOG_DEBUG("Got read %s with flag %d\n", bam1_qname(b), core.flag);
+#endif
+          if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { /* exclude unmapped reads */
+               skip = 1;
+               continue;
+          }
+         
+          if (b->core.flag & BAM_DEF_MASK) {/* == BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP */
+#ifdef TRACE
+               LOG_DEBUG("%s BAM_DEF_MASK match\n", bam1_qname(b));
+#endif
+               skip = 1; 
+               continue;
+          }
+          if (ma->conf->bed) { /* test overlap */
+               skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b)));
+               if (skip)
+                    continue;
+          }
+
+          if (ma->conf->flag & MPLP_ILLUMINA13) {
+               int i;
+               uint8_t *qual = bam_get_qual(b);
+               for (i = 0; i < b->core.l_qseq; ++i)
+                    qual[i] = qual[i] > 31? qual[i] - 31 : 0;
+          }
+          has_ref = (ma->ref && ma->ref_id == b->core.tid)? 1 : 0;
+
+          /* lofreq fix to original samtools routines which ensures that
+           * the reads mapping to first position have a reference
+           * attached as well and therefore baq, sq etc can be
+           * applied */
+          if (! has_ref && ma->conf->fai) {
+               int ref_len = -1;
+               ma->ref = faidx_fetch_seq(ma->conf->fai, ma->h->target_name[b->core.tid], 0, 0x7fffffff, &ref_len);
+               if (!ma->ref) {
+                    LOG_FATAL("Couldn't fetch sequence '%s'.\n", ma->h->target_name[b->core.tid]);
+                    exit(1);/* FIXME just returning would just skip calls for this seq */
+
+               } else {
+                    strtoupper(ma->ref);/* safeguard */
+                    ma->ref_id = b->core.tid;
+                    has_ref = 1;
+               }
+          }
+
+          skip = 0;
+#if 0
+          {
+               fprintf(stderr, "before realn\n");
+               samfile_t *fp = samopen("-", "w",  ma->h);
+               samwrite(fp, b);
+               fflush(stdout);
+          }
+#endif
+          if (ma->conf->flag & MPLP_BAQ || ma->conf->flag & MPLP_IDAQ) {
+               int baq_flag = ma->conf->flag & MPLP_BAQ ? 1 : 0;
+               int baq_ext =  ma->conf->flag & MPLP_EXT_BAQ ? 1 : 0;
+               int idaq_flag = ma->conf->flag & MPLP_IDAQ ? 1 : 0;
+
+               if (! has_ref) {
+                    LOG_FATAL("%s\n", "Can't compute BAQ or IDAQ without reference sequence");
+                    exit(1);
+               }
+               if (baq_flag && ma->conf->flag & MPLP_REDO_BAQ) {
+                    baq_flag = 2;
+               }                    
+
+               if (bam_prob_realn_core_ext(b, ma->ref, baq_flag, baq_ext, idaq_flag)) {
+                    LOG_ERROR("bam_prob_realn_core() failed for %s\n", bam1_qname(b));
+               }
+
+#if 0
+               {
+                    uint8_t *baq_aux = NULL;
+                    baq_aux = bam_aux_get(b, BAQ_TAG);
+                    if (! baq_aux) {
+                         LOG_ERROR("bam_prob_realn_core() didn't report an error but %s is missing. Can happen on refskips etc\n", BAQ_TAG);
+                    }
+
+               }
+#endif
+          }
+
+#if 0
+          {
+               fprintf(stdout, "after realn\n");
+               samfile_t *fp = samopen("-", "w",  ma->h);
+               samwrite(fp, b);
+               fflush(stdout);
+          }
+      
+#endif
+
+        if (b->core.qual > ma->conf->max_mq) {
+             b->core.qual = ma->conf->max_mq;
+        } else if (b->core.qual < ma->conf->min_mq) {
+             skip = 1;
+        }
+        /* never tried RELAXED_ORPHAN. most examples I saw where orphans matter evaluate to true under both conditions anyway */
+#ifdef RELAXED_ORPHAN
+        /* only orphan if mate is wrongly mapped (but not unmapped) */
+        else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FMUNMAP)) {
+#else
+        /* orphan as in samtools: read is aligned but not as proper pair as defined by aligner */
+        else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&BAM_FPAIRED) && !(b->core.flag&BAM_FPROPER_PAIR)) {
+#endif
+             skip = 1;
+        }
+    } while (skip);
+
+    /* compute source qual if requested and have ref and attach as aux
+     * to bam. only disadvantage of doing this here is that we don't
+     * have BAQ info yet (only interesting if it's supposed to be used
+     * instead of BQ) only have the ref but not the cons base.
+     */
+    if (ma->ref && ma->ref_id == b->core.tid && ma->conf->flag & MPLP_USE_SQ) {
+         int sq = source_qual(b, ma->ref, ma->conf->def_nm_q,
+                              ma->h->target_name[b->core.tid], DEFAULT_MIN_BQ/* FIXME could use->conf->min_bq which is set to a conservative 3 */);
+         /* -1 indicates error or NA, but can't be stored as uint. hack is to use 0 instead */
+         if (sq<0) {
+              sq=0;
+         }
+         bam_aux_append(b, SRC_QUAL_TAG, 'i', sizeof(sq), (uint8_t*) &sq);
+#if 0
+         int sq2 = bam_aux2i(bam_aux_get(b, SRC_QUAL_TAG));
+         LOG_WARN("sq=%d sq2=%d\n", sq, sq2);
+#endif
+    }
+
+    return ret;
+}
+
+/* homopolymer run at (to the right of) current
+                * position. if indels are not left aligned and current
+                * position is already a homopolymer this will be taken
+                * into account. mainly for filtering low af FP indel
+                * at the beginning of poly-AT regions. A del GT>G
+                * which is in the sequence context of GTTT will
+                * receive an hrun value of 3. same for ins G>GT */
+int
+get_hrun(const int pos, const char *ref, const int ref_len)
+{
+     char c;
+     int hrun=1;
+     int i;
+
+     /* to the right */
+     i=pos+1;
+     if (i>=ref_len) {
+        return hrun;
+     }   
+     
+     c=toupper(ref[i]);
+     for (i=i+1; i<ref_len; i++) {
+          /*LOG_DEBUG("to right: %c vs %c at %d\n", c, toupper(ref[i]), i+1);*/
+          if (toupper(ref[i])==c) {
+               hrun+=1;
+          } else {
+               break;
+          }
+     }
+
+     /* extend to the left in case not left aligned */
+     for (i=pos; i>=0; i--) {
+          /*LOG_DEBUG("to left: %c vs %c at %d\n", c, toupper(ref[i]), i+1);*/
+          if (toupper(ref[i])==c) {
+               hrun+=1;
+          } else {
+               break;
+          }
+     }
+
+     return hrun;
+}
+
+/* Press pileup info into one data-structure. plp_col members
+ * allocated here. Called must free with plp_col_free();
+ *
+ * FIXME this used to be a convenience function and turned into a big
+ * and slow monster. keeping copies of everything is inefficient and
+ * in some cases an unnecessary waste of memory (e.g. SQ, BAQ or MQ if
+ * not needed). Needs optimization.
+ */
+void compile_plp_col(plp_col_t *plp_col,
+                 const bam_pileup1_t *plp, const int n_plp,
+                 const mplp_conf_t *conf, const char *ref, const int pos,
+                 const int ref_len, const char *target_name)
+{
+     int i;
+     char ref_base;
+
+     /* "base counts" minus error-probs before base-level filtering
+      * for each base. temporary data-structure for cheaply determining
+      * consensus which is saved in plp_col */
+     double base_counts[NUM_NT4] = { 0 };
+     /* sum of qualities for all non-indel events */
+     int ins_nonevent_qual = 0, del_nonevent_qual = 0;
+
+     /* computation of depth (after read-level *and* base-level filtering)
+      * samtools-0.1.18/bam2depth.c:
+      *   if (p->is_del || p->is_refskip) ++m;
+      *   else if (bam1_qual(p->b)[p->qpos] < bq) ++m
+      * n_plp[i] - m
+      */
+     ref_base = (ref && pos < ref_len)? ref[pos] : 'N';
+
+     plp_col_init(plp_col);
+     plp_col->target = strdup(target_name);
+     plp_col->pos = pos;
+     plp_col->ref_base = ref_base;
+     plp_col->coverage_plp = n_plp;  /* this is coverage as in the original mpileup,
+                                    i.e. after read-level filtering */
+     plp_col->num_bases = 0;
+     plp_col->num_ign_indels = 0;
+     plp_col->num_non_indels = 0;
+     LOG_DEBUG("Processing %s:%d\n", plp_col->target, plp_col->pos+1);
+     
+     if (ref) {
+          plp_col->hrun = get_hrun(pos, ref, ref_len);
+     }
+
+     for (i = 0; i < n_plp; ++i) {
+          /* inserted parts of pileup_seq() here.
+           * logic there goes like this:
+           *
+           * if is_head: put ^
+           *
+           * if ! is_del: put c
+           * else:        put * (or <> if refskip)
+           *
+           * if indel>0:   print + p[qpos +1...indel]
+           * elif indel<0: print - p[qpos +1...indel]
+           *
+           * if is_tail: put $
+           */
+          const bam_pileup1_t *p = plp + i;
+          int nt4;
+          int mq=-1, bq, baq; /* phred scores */
+          int iq = 0, dq = 0;
+          int iaq = -1, daq = -1;
+          int base_skip = 0; /* boolean */
+          int sq = -1;
+#ifdef USE_ALNERRPROF
+          int aq = 0;
+#endif
+          uint8_t *bi = bam_aux_get(p->b, BI_TAG);
+          uint8_t *bd = bam_aux_get(p->b, BD_TAG);
+          uint8_t *ai = bam_aux_get(p->b, AI_TAG);
+          uint8_t *ad = bam_aux_get(p->b, AD_TAG);
+          uint8_t *baq_aux = NULL; /* full baq value (not offset as "BQ"!) */
+
+#ifdef USE_OLD_AI_AD
+          /* temporary fix preventing problems due to the fact that we changed AI AD to ai ad
+           * to be deleted soon
+           */
+          if (! ai) {
+               ai = bam_aux_get(p->b, "AI");
+          }
+          if (! ad) {
+               ad = bam_aux_get(p->b, "AD");
+          }
+#endif
+          if (conf->flag & MPLP_USE_SQ) {
+               sq = bam_aux2i(bam_aux_get(p->b, SRC_QUAL_TAG)); /* lofreq internally computed on the fly */
+          }
+
+          if (conf->flag & MPLP_BAQ) {
+               baq_aux = bam_aux_get(p->b, BAQ_TAG);
+               /* should have been recomputed already */
+               if (! baq_aux) {
+                    if (! missing_baq_warning_printed) {
+                         LOG_WARN("BAQ tag %s missing but BAQ was supposed to be used (at %s:%d; can happend if refskips are encountered)\n", BAQ_TAG, plp_col->target, plp_col->pos+1);
+                         /*LOG_FATAL("%s\n", "Please pre-process your BAM file with lofreq alnqual first");*/
+                         missing_baq_warning_printed = 1;
+                    }
+               } else {
+                    baq_aux++; /* first char is type (same for bd and bi done below) */
+               }
+          }
+
+#if 0
+          LOG_FIXME("At %s:%d %c: p->is_del=%d p->is_refskip=%d p->indel=%d p->is_head=%d p->is_tail=%d\n",
+                    plp_col->target,
+                    plp_col->pos+1,
+                    bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)],
+                    p->is_del, p->is_refskip, p->indel, p->is_head, p->is_tail);
+#endif
+          /* no need for check if mq is within user defined
+           * limits. check was done in mplp_func */
+          mq = p->b->core.qual;
+
+          /* p->is_del mean there was a deletion (already printed before this column). */
+          if (! p->is_del) {
+               double count_incr;
+
+               if (p->is_head) {
+                    plp_col->num_heads += 1;
+               }
+               if (p->is_tail) {
+                    plp_col->num_tails += 1;
+               }
+
+#if 0
+               /* nt for printing */
+               nt = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
+               nt = bam1_strand(p->b)? tolower(nt) : toupper(nt);
+#endif
+
+               /* nt4 for indexing */
+               nt4 = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
+
+               bq = bam1_qual(p->b)[p->qpos];
+
+               /* minimal base-call quality filtering. doing this here
+                * will make all downstream analysis blind to filtering
+                * and might skew AFs etc
+                */
+               if (bq < conf->min_plp_bq) {
+                    base_skip = 1;
+                    /* NOTE: all values used after check_indel need to be initialized before this goto */
+                    goto check_indel; /* goto was easiest */
+               }
+
+               /* the following samtools' original code will correct
+                * base-pairs down if they exceed the valid
+                * sanger/phred limits. don't think it's wise to do this
+                * automatically as this would indicate a problem with
+                * the input and it's also unclear what the BAQ then means
+                */
+               if (bq > SANGER_PHRED_MAX) {
+                    /* bq = SANGER_PHRED_MAX; /@ Sanger/Phred max */
+                    LOG_FATAL("Base qualitiy above allowed maximum detected (%d)\n", bq);
+                    exit(1);
+               }
+               PLP_COL_ADD_QUAL(& plp_col->base_quals[nt4], bq);
+
+               if (baq_aux) {
+                    baq = baq_aux[p->qpos]-33;
+                    PLP_COL_ADD_QUAL(& plp_col->baq_quals[nt4], baq);
+               } else if (conf->flag & MPLP_BAQ)  {
+                    /* baq was enabled but failed. set to -1 */
+                    PLP_COL_ADD_QUAL(& plp_col->baq_quals[nt4], -1);
+               }
+
+               /* samtools check to detect Sanger max value: problem
+                * is that an MQ Phred of 255 means NA according to the
+                * samtools spec (needed below). This however is not
+                * detectable if the following original samtools line
+                * gets executed, which is why we remove it:
+                * if (mq > 126) mq = 126;
+                */
+               PLP_COL_ADD_QUAL(& plp_col->map_quals[nt4], mq);
+
+               if (conf->flag & MPLP_USE_SQ) {
+                    PLP_COL_ADD_QUAL(& plp_col->source_quals[nt4], sq);
+               }
+#ifdef USE_ALNERRPROF
+               if (alnerrprof) {
+                    int tid = p->b->core.tid;
+                    assert(tid < alnerrprof->num_targets);
+                    if (alnerrprof->prop_len[tid] > p->qpos) {
+                         aq = PROB_TO_PHREDQUAL_SAFE(alnerrprof->props[tid][p->qpos]);
+                         PLP_COL_ADD_QUAL(& plp_col->alnerr_qual[nt4], aq);
+                    } else {
+                         LOG_ERROR("alnerror for tid=%d too small for qpos=%d. Setting to 0\n", tid, p->qpos+1);
+                         PLP_COL_ADD_QUAL(& plp_col->alnerr_qual[nt4], PROB_TO_PHREDQUAL(LDBL_MIN));
+                    }
+               }
+               /* don't add anything. keep empty */
+#endif
+
+
+#ifdef MERGEQ_FOR_CONS_CALL
+#ifdef USE_ALNERRPROF
+               count_incr = 1.0 - merge_srcq_baseq_mapq_and_alnq(sq, bq, mq, aq);
+#else
+               count_incr = 1.0 - merge_srcq_baseq_and_mapq(sq, bq, mq);
+#endif
+#else
+               count_incr = 1.0 - PHREDQUAL_TO_PROB(bq);
+#endif
+
+               /* FIXME this can't be the proper way to handle cases where count_incr = 0.0 because one of the values is 0? */
+               if (count_incr == 0.0) {
+                    count_incr = DBL_MIN;
+               }
+
+               base_counts[nt4] += count_incr;
+               if (bam1_strand(p->b)) {
+                    plp_col->rv_counts[nt4] += 1;
+               } else {
+                    plp_col->fw_counts[nt4] += 1;
+               }
+
+          } /* ! p->is_del */
+          /* else {deletion (already printed before this column), i.e. we got physical coverage (if no terminal indels are allowed} */
+
+check_indel:
+
+          /* for post read- and base-level coverage. FIXME review */
+          if (! (p->is_del || p->is_refskip || 1 == base_skip)) {/* FIXME also use p->indel? */
+               plp_col->num_bases += 1;
+          }
+
+          if (bi) {
+               char *t = (char*)(bi+1); /* 1 is type */
+#if 0
+               int j;
+               printf("At %d qpos %d: %s=%s", plp_col->pos+1, p->qpos+1, BI_TAG, t);
+               for (j = 0; j < p->indel; ++j) {
+                    printf(" %c:%d-%d-%d", t[p->qpos+j], t[p->qpos+j-1]-33, t[p->qpos+j]-33, t[p->qpos+j+1]-33);
+               }
+               printf("\n");
+#endif
+               /* adding 1 value representing whole insert */
+               iq = t[p->qpos] - 33;
+          } /* else default to 0 */
+
+          if (bd) {
+               char *t = (char*)(bd+1);  /* 1 is type */
+#if 0
+               int j;
+               printf("At %d qpos %d: %s=%sx", plp_col->pos+1, p->qpos+1, BD_TAG, t);
+               for (j = 0; j < p->indel; ++j) {
+                    printf(" %c:%d-%d-%d", t[p->qpos+j], t[p->qpos+j-1]-33, t[p->qpos+j]-33, t[p->qpos+j+1]-33);
+               }
+               printf("\n");
+#endif
+               /* adding 1 value representing whole del */
+               dq = t[p->qpos] - 33;
+          } /* else default to 0 */
+
+
+          if (iq < conf->min_plp_idq || dq < conf->min_plp_idq) {
+               /* LOG_DEBUG("iq=%d < conf->min_plp_idq=%d || dq=%d < conf->min_plp_idq=%d\n", iq, conf->min_plp_idq, dq, conf->min_plp_idq); */
+               if (p->indel != 0 || p->is_del != 0) {
+                  plp_col->num_ign_indels += 1;
+               }
+          } else {
+
+               if (p->indel != 0) {
+                    /* insertion (+)
+                     */
+                    if (p->indel > 0) {
+                         char *ins_seq;
+                         int j;
+
+                         if (ai) {
+                              char *a = (char*)(ai+1);
+                              iaq = a[p->qpos] - 33;
+                              plp_col->has_indel_aqs = 1;
+                         }
+
+                         plp_col->num_ins += 1;
+                         plp_col->sum_ins += p->indel;
+
+                         if ((ins_seq = malloc((p->indel+1) * sizeof(char)))==NULL) {
+                              LOG_FATAL("%s\n", "Memory allocation failed");
+                              exit(1);
+                         }
+
+                         /* get inserted sequence */
+                         for (j = 1; j <= p->indel; ++j) {
+                              int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos+j)];
+                              ins_seq[j-1] = toupper(c);
+                         }
+                         ins_seq[j-1] = '\0';
+
+
+                         /*LOG_DEBUG("Insertion of %s at %d with iq %d iaq %d\n", ins_seq, pos, iq, iaq);*/
+                         add_ins_sequence(&plp_col->ins_event_counts,
+                              ins_seq, iq, iaq, mq, sq,
+                              bam1_strand(p->b)? 1: 0);
+
+                         PLP_COL_ADD_QUAL(& plp_col->del_quals, dq);
+                         PLP_COL_ADD_QUAL(& plp_col->del_map_quals, mq);
+                         PLP_COL_ADD_QUAL(& plp_col->del_source_quals, sq);
+                         del_nonevent_qual += dq;
+                         if (bam1_strand(p->b)) {
+                              plp_col->non_del_fw_rv[1] += 1;
+                         } else {
+                              plp_col->non_del_fw_rv[0] += 1;
+                         }
+                         free(ins_seq);
+
+                    /* deletion (-)
+                     */
+                    } else if (p->indel < 0) {
+                         /* get deleted sequence */
+                         char *del_seq;
+                         int j;
+
+                         if (ad) {
+                              char *a = (char*)(ad+1);
+                              daq = a[p->qpos] - 33;
+                              plp_col->has_indel_aqs = 1;
+                         }
+
+                         plp_col->num_dels += 1;
+                         plp_col->sum_dels -= p->indel;
+
+                         if ((del_seq = malloc(((-p->indel)+1) * sizeof(char)))==NULL) {
+                              LOG_FATAL("%s\n", "Memory allocation failed");
+                              exit(1);
+                         }
+
+                         for (j = 1; j <= -p->indel; ++j) {
+                              int c =  (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
+                              del_seq[j-1] = toupper(c);
+                         }
+                         del_seq[j-1] = '\0';
+                         /*LOG_DEBUG("Deletion of %s at %d with dq %d daq %d\n", del_seq, pos, dq, daq);*/
+#ifdef PACBIO_SUPPRESS_1BASE_DEL
+                         /*LOG_FIXME("Deletion of %s at %d with dq %d daq %d\n", del_seq, pos, dq, daq);*/
+                         if (strlen(del_seq)==1) {
+                              if (del_seq[0]=='G' || del_seq[0]=='C') {
+                                   dq -= 10;
+                              }
+                              if (del_seq[0]=='A' || del_seq[0]=='T') {
+                                   dq -= 5;
+                              }
+                              if (dq<0) {
+                                   dq=0;
+                              }
+                         }
+#endif
+                         add_del_sequence(&plp_col->del_event_counts,
+                              del_seq, dq, daq, mq, sq,
+                              bam1_strand(p->b)? 1: 0);
+                         PLP_COL_ADD_QUAL(& plp_col->ins_quals, iq);
+                         PLP_COL_ADD_QUAL(& plp_col->ins_map_quals, mq);
+                         PLP_COL_ADD_QUAL(& plp_col->ins_source_quals, sq);
+                         ins_nonevent_qual += iq;
+                         if (bam1_strand(p->b)) {
+                              plp_col->non_ins_fw_rv[1] += 1;
+                         } else {
+                              plp_col->non_ins_fw_rv[0] += 1;
+                         }
+                         free(del_seq);
+                    }
+
+               } else { /* if (p->indel != 0) ... */
+                    plp_col->num_non_indels += 1;
+                    /* neither deletion, nor insertion. need the qualities anyway */
+                    PLP_COL_ADD_QUAL(& plp_col->ins_quals, iq);
+                    PLP_COL_ADD_QUAL(& plp_col->ins_map_quals, mq);
+                    ins_nonevent_qual += iq;
+                    if (bam1_strand(p->b)) {
+                         plp_col->non_ins_fw_rv[1] += 1;
+                    } else {
+                         plp_col->non_ins_fw_rv[0] += 1;
+                    }
+
+                    /*LOG_DEBUG("Neither deletion nor insertion. Adding iq=%d dq=%d\n", iq, dq);*/
+                    PLP_COL_ADD_QUAL(& plp_col->del_quals, dq);
+                    PLP_COL_ADD_QUAL(& plp_col->del_map_quals, mq);
+                    del_nonevent_qual += dq;
+                    if (bam1_strand(p->b)) {
+                         plp_col->non_del_fw_rv[1] += 1;
+                    } else {
+                         plp_col->non_del_fw_rv[0] += 1;
+                    }
+               }
+          }
+
+     }  /* end: for (i = 0; i < n_plp; ++i) { */
+
+
+     /* ****************** FINDING CONSENSUS **************** */
+     /* consensus is saved as a char array starting with '+' or '-'
+      * if the consensus is an insertion or deletion. there is an
+      * insertion event if the sum of qualities for that insertion event
+      * is greater than the sum of qualities for all non-insertion events.
+      * there is a deletion event if the sum of qualities for that
+      * deletion event is greater than the sum of qualities for all non-deletion
+      * events. otherwise, the consensus base is not an indel and is given
+      * by the nucleotide with the greatest sum of qualities.
+      *
+      *
+      * YHT 2/10/14: """the idea is to find the event which has the highest probability of
+      * occurring the number of times it was observed. For example, if I see
+      * 2 +A events at a position, the probability that those 2 events are real
+      * and occurred together is (1 - the probability that the +A event occurred
+      * due to error for the first supporting read)*(1 - the probability that the
+      * +A event occurred due to error for the second supporting read). I keep
+      * track of this for each insertion event, for e.g. +AA, +T etc. at that
+      * position. I guess in theory this should incorporate errors from other sources.
+      *
+      * I also find the probability of seeing n number of non-insertions at that
+      * position, which is the product of (1 - the probability of seeing an insertion
+      * error at that position for each of those reads).
+      *
+      * I then compare the probabilities of each of these events. It turns out that
+      * comparing the products of (1 - error probability) is the same as comparing
+      * the log sum of the qualities, because qualities are the negative log of the
+      * error probabilities."""
+      *
+      * FIXME: check consensus indel against minimum consensus quality
+      * FIXME: merge indel qualities when determining consensus indel event
+      * FIXME(AW): why are we using max qualities here and not errprob corrected counts?
+      */
+
+     ins_event *ins_it, *ins_it_tmp;
+     char *ins_maxevent_key = NULL;
+     int ins_maxevent_qual = 0;
+     HASH_ITER(hh_ins, plp_col->ins_event_counts, ins_it, ins_it_tmp) {
+          if (ins_it->cons_quals > ins_maxevent_qual) {
+               ins_maxevent_key = ins_it->key;
+               ins_maxevent_qual = ins_it->cons_quals;
+          }
+     }
+     del_event *del_it, *del_it_tmp;
+     char *del_maxevent_key = NULL;
+     int del_maxevent_qual = 0;
+     HASH_ITER(hh_del, plp_col->del_event_counts, del_it, del_it_tmp) {
+          if (del_it->cons_quals > del_maxevent_qual) {
+               del_maxevent_key = del_it->key;
+               del_maxevent_qual = del_it->cons_quals;
+          }
+     }
+
+     /* LOG_DEBUG("ins_maxevent_qual:%d ins_nonevent_qual:%d "
+               "del_maxevent_qual:%d del_nonevent_qual:%d\n",
+               ins_maxevent_qual, ins_nonevent_qual,
+               del_maxevent_qual, del_nonevent_qual); */
+
+     if (!(ins_maxevent_qual > ins_nonevent_qual) &&
+         !(del_maxevent_qual > del_nonevent_qual)) {
+          /* determine consensus from 'counts'. will never produce N on tie  */
+          plp_col->cons_base[0] = bam_nt4_rev_table[
+               argmax_d(base_counts, NUM_NT4)];
+          plp_col->cons_base[1] = '\0';
+     } else if (ins_maxevent_qual > ins_nonevent_qual) {  // consensus insertion
+          /* LOG_DEBUG("cons ins: ins_maxevent_qual=%d > ins_nonevent_qual=%d\n", ins_maxevent_qual, ins_nonevent_qual); */
+          plp_col->cons_base[0] = '+';
+          strcpy(plp_col->cons_base+1, ins_maxevent_key);
+     } else if (del_maxevent_qual > del_nonevent_qual) { // consensus deletion
+          /* LOG_DEBUG("cons del: del_maxevent_qual=%d > del_nonevent_qual=%d\n", del_maxevent_qual, del_nonevent_qual); */
+          plp_col->cons_base[0] = '-';
+          strcpy(plp_col->cons_base+1, del_maxevent_key);
+     } else {
+          LOG_FATAL("internal error...");
+          exit(1);
+     }
+
+     if (debug) {
+          plp_col_debug_print(plp_col, stderr);
+     }
+#if 0
+     plp_col_mpileup_print(plp_col, conf, stdout);
+#endif
+
+     for (i = 0; i < NUM_NT4; ++i) {
+          assert(plp_col->fw_counts[i] + plp_col->rv_counts[i] == plp_col->base_quals[i].n);
+          assert(plp_col->base_quals[i].n == plp_col->baq_quals[i].n);
+          assert(plp_col->base_quals[i].n == plp_col->map_quals[i].n);
+          assert(plp_col->map_quals[i].n == plp_col->source_quals[i].n);
+     }
+}
+/* compile_plp_col() */
+
+
+
+/* not part of offical samtools/htslib API but part of samtools */
+int
+mpileup(const mplp_conf_t *mplp_conf,
+        void (*plp_proc_func)(const plp_col_t*, void*),
+        void *plp_proc_conf,
+        const int n, const char **fn)
+{
+    mplp_aux_t **data;
+    int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len = -1, ref_tid = -1, max_depth;
+    const bam_pileup1_t **plp;
+    bam_mplp_t iter;
+    bam_header_t *h = 0;
+    char *ref;
+    kstring_t buf;
+    long long int plp_counter = 0; /* note: some cols are simply skipped */
+
+    /* paranoid exit. n only allowed to be one in our case (not much
+     * of an *m*pileup, I know...) */
+    if (1 != n) {
+         fprintf(stderr, "FATAL(%s:%s): need exactly one BAM files as input (got %d)\n",
+                 __FILE__, __FUNCTION__, n);
+         for (i=0; i<n; i++) {
+              fprintf(stderr, "%s\n", fn[i]);
+         }
+         return 1;
+    }
+
+    memset(&buf, 0, sizeof(kstring_t));
+    data = calloc(n, sizeof(mplp_aux_t*));
+    plp = calloc(n, sizeof(bam_pileup1_t*));
+    n_plp = calloc(n, sizeof(int));
+
+
+    /* read the header and initialize data
+     *
+     * note: most of this is overkill since it deals with multiple bam
+     * files, whereas we allow only one. however, if we keep it close
+     * to the original source then a diff against future versions of
+     * samtools is easier
+     *
+     */
+    for (i = 0; i < n; ++i) {
+        bam_header_t *h_tmp;
+        if (0 != strcmp(fn[i], "-")) {
+          if (! file_exists(fn[i])) {
+            fprintf(stderr, "File '%s' does not exist. Exiting...\n", fn[i]);
+            exit(1);
+          }
+        }
+        data[i] = calloc(1, sizeof(mplp_aux_t));
+        data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r");
+        data[i]->conf = mplp_conf;
+        h_tmp = bam_header_read(data[i]->fp);
+        if ( !h_tmp ) {
+             fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]);
+             exit(1);
+        }
+        data[i]->h = i? h : h_tmp; /* for i==0, "h" has not been set yet */
+
+        if (mplp_conf->reg) {
+            int beg, end;
+            bam_index_t *idx;
+            idx = bam_index_load(fn[i]);
+            if (idx == 0) {
+                fprintf(stderr, "[%s] fail to load index for %d-th input.\n", __func__, i+1);
+                exit(1);
+            }
+            if (bam_parse_region(h_tmp, mplp_conf->reg, &tid, &beg, &end) < 0) {
+                fprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\n", __func__, i+1);
+                exit(1);
+            }
+            if (i == 0) tid0 = tid, beg0 = beg, end0 = end;
+            data[i]->iter = bam_iter_query(idx, tid, beg, end);
+            bam_index_destroy(idx);
+        }
+        if (i == 0) {
+             h = h_tmp;
+        } else {
+            bam_header_destroy(h_tmp);
+        }
+    }
+    LOG_DEBUG("%s\n", "BAM header initialized");
+    if (debug) {
+         for (i=0; i < h->n_targets; i++) {
+              LOG_DEBUG("BAM header target #%d: name=%s len=%d\n", i, h->target_name[i], h->target_len[i]);
+         }
+    }
+    if (tid0 >= 0 && mplp_conf->fai) { /* region is set */
+         ref = faidx_fetch_seq(mplp_conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len);
+         if (NULL == ref || h->target_len[tid0] != ref_len) {
+              LOG_FATAL("Reference fasta file doesn't seem to contain the right sequence(s) for this BAM file. (mismatch for seq %s listed in BAM header)\n", h->target_name[tid0]);
+              return -1;
+         }
+         strtoupper(ref);/* safeguard */
+         ref_tid = tid0;
+         for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0;
+    } else {
+         ref_tid = -1;
+         ref = 0;
+    }
+    iter = bam_mplp_init(n, mplp_func, (void**)data);
+    max_depth = mplp_conf->max_depth;
+    bam_mplp_set_maxcnt(iter, max_depth);
+
+#ifdef USE_ALNERRPROF
+    if (mplp_conf->alnerrprof_file) {
+         alnerrprof = calloc(1, sizeof(alnerrprof_t));
+         if (parse_alnerrprof_statsfile(alnerrprof, mplp_conf->alnerrprof_file, h)) {
+              LOG_FATAL("parse_errprof_statsfile() on %s failed\n", mplp_conf->alnerrprof_file);
+              exit(1);
+         }
+         normalize_alnerrprof(alnerrprof);
+    }
+#endif
+
+    LOG_DEBUG("%s\n", "Starting pileup loop");
+    while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
+        plp_col_t plp_col;
+        int i=0; /* NOTE: mpileup originally iterated over n */
+
+        if (mplp_conf->reg && (pos < beg0 || pos >= end0))
+             continue; /* out of the region requested */
+        if (mplp_conf->bed && tid >= 0 && !bed_overlap(mplp_conf->bed, h->target_name[tid], pos, pos+1))
+             continue;
+        if (tid != ref_tid) {
+            free(ref); ref = 0;
+            if (mplp_conf->fai) {
+                 ref = faidx_fetch_seq(mplp_conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len);
+                 if (NULL == ref || h->target_len[tid] != ref_len) {
+                      LOG_DEBUG("ref %s at %p h->target_len[tid]=%d ref_len=%d\n", h->target_name[tid], ref, h->target_name[tid], ref_len)
+                      LOG_FATAL("Reference fasta file doesn't seem to contain the right sequence(s) for this BAM file. (mismatch for seq %s listed in BAM header).\n", h->target_name[tid]);
+                      return -1;
+                 }
+                 strtoupper(ref);/* safeguard */
+                 LOG_DEBUG("%s\n", "sequence fetched");
+            }
+            for (i = 0; i < n; ++i)  {
+                 data[i]->ref = ref, data[i]->ref_id = tid;
+            }
+            ref_tid = tid;
+        }
+        i=0; /* i is 1 for first pos which is a bug due to the removal
+              * of one of the loops, so reset here */
+
+        plp_counter += 1;
+        if (1 == plp_counter%100000) {
+             LOG_VERBOSE("Alive and happily crunching away on pos"
+                         " %d of %s...\n", pos+1, h->target_name[tid]);
+        }
+
+        compile_plp_col(&plp_col, plp[i], n_plp[i], mplp_conf,
+                        ref, pos, ref_len, h->target_name[tid]);
+
+        (*plp_proc_func)(& plp_col, plp_proc_conf);
+
+        plp_col_free(& plp_col);
+
+    } /* while bam_mplp_auto */
+
+#ifdef USE_ALNERRPROF
+    if (alnerrprof) {
+         free_alnerrprof(alnerrprof);
+         free(alnerrprof);
+    }
+#endif
+    free(buf.s);
+    bam_mplp_destroy(iter);
+    bam_header_destroy(h);
+    for (i = 0; i < n; ++i) {
+        bam_close(data[i]->fp);
+        if (data[i]->iter) bam_iter_destroy(data[i]->iter);
+        free(data[i]);
+    }
+    free(data); free(plp); free(ref); free(n_plp);
+    return 0;
+}
+/* mpileup() */
diff --git a/src/lofreq/plp.h b/src/lofreq/plp.h
new file mode 100644
index 0000000..5fa35e0
--- /dev/null
+++ b/src/lofreq/plp.h
@@ -0,0 +1,173 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef PLP_H
+#define PLP_H
+
+#include "htslib/faidx.h"
+#include "utils.h"
+#include "vcf.h"
+#include "utils.h"
+
+/* mpileup configuration flags 
+ */
+#define MPLP_NO_ORPHAN   0x10
+#define MPLP_BAQ         0x20
+#define MPLP_REDO_BAQ    0x40
+#define MPLP_EXT_BAQ     0x80
+#define MPLP_IDAQ        0x100
+#define MPLP_REDO_IDAQ   0x200
+#define MPLP_USE_SQ      0x400
+#define MPLP_ILLUMINA13  0x800
+
+
+extern const char *bam_nt4_rev_table; /* similar to bam_nt16_rev_table */
+#define NUM_NT4 5 /* strlen(bam_nt4_rev_table); */
+
+extern const unsigned char bam_nt4_table[256];
+
+
+/* mpileup configuration structure 
+ */
+typedef struct {
+     int max_mq, min_mq;
+     int flag; /* tag: shared */
+     int max_depth;
+     int min_plp_bq; /* use with caution: this makes lofreq blind to any bases below this value */
+     int min_plp_idq;
+     int def_nm_q;
+     char *reg;
+     char *fa;
+     faidx_t *fai;
+     void *bed;
+     char *alnerrprof_file; /* logically belongs to varcall_conf, but we need it here since only here the bam header is known */
+     char cmdline[1024];
+} mplp_conf_t;
+
+
+typedef struct {
+     char *target; /* chromsome or sequence name */
+     int pos; /* position */
+     char ref_base; /* uppercase reference base (given by fasta) */
+     char cons_base[MAX_INDELSIZE]; /* uppercase consensus base according to base-counts, after read-level filtering. */
+     int coverage_plp; /* original samtools value. upper count limit for all kept values */
+     int num_bases; /* number of bases after base filtering */
+     /* num_ins and num_dels gives 'num_indels' */
+     int num_ign_indels; /* a hack: indels often get filtered because of low quality of missing qualities in bam file. we need to know nevertheless they are present. this is the count of all "ignored" indels */
+
+     /* list of qualities: keeping them all here in one place so that
+      * filtering can become separate step. alternative is to filter
+      * during pileup. the latter doesn't work if you want to filter
+      * based on a consensus which you don't know in advance */
+     int_varray_t base_quals[NUM_NT4]; 
+     int_varray_t baq_quals[NUM_NT4]; 
+     int_varray_t map_quals[NUM_NT4]; 
+     int_varray_t source_quals[NUM_NT4]; 
+#ifdef USE_ALNERRPROF
+     int_varray_t alnerr_qual[NUM_NT4]; /* FIXME this should be precomputed and then build into model */
+#endif
+     long int fw_counts[NUM_NT4]; 
+     long int rv_counts[NUM_NT4]; 
+     /* fw_counts[b] + rv_counts[b] = x_quals.n = coverage */
+
+     int num_heads; /* number of read starts at this pos */
+     int num_tails; /* number of read ends at this pos */
+
+     /* Indel qualities are stored separately according to the type of
+      * indel event observed. Insertions and deletions are considered 
+      * independently. If there was no indel event observed,
+      * the indel quality, indel mapping quality and indel source quality
+      * are stored in *_quals, *_map_quals, *_source_quals. Since no
+      * indel was observed, there is no indel alignment quality. If
+      * an indel event is observed, the qualities are stored in 
+      * the hash table to which *_event_counts points to and keyed to the
+      * sequence of the indel event. See utils.h for the data structure for storing
+      * indel qualities if an indel event is observed. */
+
+     int num_non_indels;/* non-indel events for which we have indel qualities */
+
+     int num_ins, sum_ins;
+     int_varray_t ins_quals; 
+     int_varray_t ins_map_quals;
+     int_varray_t ins_source_quals;
+     ins_event *ins_event_counts;
+
+     int num_dels, sum_dels;
+     int_varray_t del_quals; 
+     int_varray_t del_map_quals;
+     int_varray_t del_source_quals;
+     del_event *del_event_counts;
+     
+     /* fw or rv counts for all non-indel events 
+      * fw = 0, rv = 1*/
+     long int non_ins_fw_rv[2]; 
+     long int non_del_fw_rv[2];
+
+     int has_indel_aqs; /* flag, which is only used to make sure that
+                           BAM contained alignment quality for indel calls 
+                           (all reads with indels should have those).
+                           indels are still predicted if missing, but overcalled. */  
+     int hrun; /* homopolymer run at (to the right of) current
+                * position. if indels are not left aligned and current
+                * position is already a homopolymer this will be taken
+                * into account. mainly for filtering low af FP indel
+                * at the beginning of poly-AT regions. A del GT>G
+                * which is in the sequence context of GTTT will
+                * receive an hrun value of 3. same for ins G>GT.
+                */
+     /* changes here should be reflected in plp_col_init, plp_col_free etc. */
+} plp_col_t;
+
+
+#define PLP_COL_ADD_QUAL(p, q)   int_varray_add_value((p), (q))
+
+/* initialize members of preallocated varcall_conf */
+void init_mplp_conf(mplp_conf_t *c);
+
+int
+base_count(const plp_col_t *p, char base);
+
+void
+dump_mplp_conf(const mplp_conf_t *c, FILE *stream);
+
+int
+mpileup(const mplp_conf_t *mplp_conf, 
+        void (*plp_proc_func)(const plp_col_t*, void*),
+        void *plp_proc_conf, 
+        const int n, const char **fn);
+
+int
+source_qual_load_ign_vcf(const char *vcf_path, void *bed);
+
+void
+source_qual_free_ign_vars();
+
+int 
+var_in_ign_list(var_t *var);
+
+#endif
diff --git a/src/lofreq/samutils.c b/src/lofreq/samutils.c
new file mode 100644
index 0000000..cd838ea
--- /dev/null
+++ b/src/lofreq/samutils.c
@@ -0,0 +1,669 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <assert.h>
+
+/* samtools includes */
+#include "sam.h"
+#include "htslib/kstring.h"
+
+/* lofreq includes */
+#include "log.h"
+#include "vcf.h"
+#include "plp.h"
+#include "samutils.h"
+
+/* libbam:bamaux.c */
+extern void bam_init_header_hash(bam_header_t *header);
+extern void bam_destroy_header_hash(bam_header_t *header);
+
+#define INDEL_QUAL_DEFAULT 45
+
+#define BUF_SIZE 1024
+
+#define MAX_READ_LEN 8192
+
+#ifdef USE_ALNERRPROF
+
+void
+free_alnerrprof(alnerrprof_t *alnerrprof)
+{
+     int i;
+     for (i=0; i<alnerrprof->num_targets; i++) {
+          if (alnerrprof->prop_len[i]){
+               free(alnerrprof->props[i]);/* free right-away if no data */
+          }
+     }
+     free(alnerrprof->props);
+     free(alnerrprof->prop_len);
+     alnerrprof->num_targets = -1;
+}
+
+
+void
+normalize_alnerrprof(alnerrprof_t *alnerrprof)
+{
+     int i;
+
+#if 0
+     {/* fixme report */
+          for (i=0; i<alnerrprof->num_targets; i++) {
+               int j;
+               fprintf(stderr, "FIXME in tid=%d len=%d: ", i, alnerrprof->prop_len[i]);
+               for (j=0; j<alnerrprof->prop_len[i]; j++) {
+                    fprintf(stderr, " %d:%g ", j, alnerrprof->props[i][j]);
+               }
+               fprintf(stderr, "\n");
+          }
+     }
+#endif
+
+     for (i=0; i<alnerrprof->num_targets; i++) {
+          int j;
+          double median = dbl_median(alnerrprof->props[i], alnerrprof->prop_len[i]);
+#if 0
+          fprintf(stderr, "FIXME tid=%d median=%g\n", i, median);
+#endif
+          for (j=0; j<alnerrprof->prop_len[i]; j++) {
+               double val = alnerrprof->props[i][j] - median;
+               if (val >= 0.0) {
+                    alnerrprof->props[i][j]  = val;
+               } else {
+                    alnerrprof->props[i][j]  = 0.0;
+               }
+
+          }
+     }
+     
+#if 0
+     {/* fixme report */
+          for (i=0; i<alnerrprof->num_targets; i++) {
+               int j;
+               fprintf(stderr, "FIXME out tid=%d len=%d: ", i, alnerrprof->prop_len[i]);
+               for (j=0; j<alnerrprof->prop_len[i]; j++) {
+                    fprintf(stderr, " %d:%g ", j, alnerrprof->props[i][j]);
+               }
+               fprintf(stderr, "\n");
+          }
+     }
+#endif
+}
+
+
+/* will return non-0 on error. parsed error prof will be written to
+ * alnerrprof. values are allocated here and should be freed with
+ * free_alnerrprof */
+int
+parse_alnerrprof_statsfile(alnerrprof_t *alnerrprof, const char *path, bam_header_t *bam_header)
+{
+     char line[BUF_SIZE];
+     int i;
+     int *max_obs_pos;
+     const int default_read_len = 250;
+     int free_bam_header_hash = 0;
+     int rc;
+     FILE *in = fopen(path, "r");
+
+
+     /* needed for finding tid from tname */
+     if (bam_header->hash == 0) {
+          bam_init_header_hash(bam_header);             
+          free_bam_header_hash = 1;
+     }
+
+     max_obs_pos = calloc(bam_header->n_targets, sizeof(int));
+     
+     alnerrprof->num_targets = bam_header->n_targets;
+     alnerrprof->prop_len = calloc(alnerrprof->num_targets, sizeof(int));
+     alnerrprof->props = calloc(alnerrprof->num_targets, sizeof(double *));     
+     for (i=0; i<alnerrprof->num_targets; i++) {
+          alnerrprof->prop_len[i] = default_read_len;/* default alloc here and realloc later */
+          alnerrprof->props[i] = calloc(alnerrprof->prop_len[i], sizeof(double));
+     }
+     i=-1; /* make sure value is not reused by accident; triggers clang warning though */
+
+     while (NULL != fgets(line, BUF_SIZE, in)) {
+          int pos = -1;
+          char tname[BUF_SIZE];
+          double prop = -1;
+          unsigned long int count = -1;
+          int tid = -1;
+          if (line[0]=='#') {
+               continue;
+          }
+
+          if (4 != sscanf(line, "%s\t%d\t%lg\t%lu\n", tname, &pos, &prop, &count)) {
+              LOG_ERROR("Couldn't parse line %s\n", line);
+              rc = 1;
+              goto free_and_exit;
+         }
+
+         assert(prop>=0.0 && prop<=1.0);
+
+         pos = pos - 1;
+         assert(pos<MAX_READ_LEN);
+
+         tid = bam_get_tid(bam_header, tname);
+         if (-1 == tid) {
+              LOG_ERROR("Target name '%s' found in error profile doesn't match any of the sequences in BAM header. Skipping and trying to continue...\n", tname);
+              continue;
+         }
+         assert(tid<alnerrprof->num_targets);
+
+         /* for later downsizing */
+         if (pos+1 > max_obs_pos[tid]) {
+              max_obs_pos[tid] = pos+1;
+         }
+
+         /* upsize if necessary */
+         while (pos >= alnerrprof->prop_len[tid]) {
+              LOG_DEBUG("upsizing pos+1=%d alnerrprof->prop_len[tid=%d]=%d\n\n", pos+1, tid, alnerrprof->prop_len[tid]);
+              alnerrprof->prop_len[tid] *= 2;
+              alnerrprof->props[tid] = realloc(alnerrprof->props[tid], alnerrprof->prop_len[tid] * sizeof(double));
+         }
+         alnerrprof->props[tid][pos] = prop;
+     }
+
+     /* downsize */
+     for (i=0; i<alnerrprof->num_targets; i++) {
+          if (max_obs_pos[i]) {
+               LOG_DEBUG("downsizing alnerrprof->prop_len[tid=%d] to max %d\n", i, max_obs_pos[i]);
+               alnerrprof->props[i] = realloc(alnerrprof->props[i], max_obs_pos[i] * sizeof(double));
+          } else {
+               free(alnerrprof->props[i]);/* no data for this tid: free */
+          }
+          alnerrprof->prop_len[i] = max_obs_pos[i];
+     }
+
+#if 0
+     {/* fixme report */
+          for (i=0; i<alnerrprof->num_targets; i++) {
+               int j;
+               fprintf(stderr, "tid=%d len=%d: ", i, alnerrprof->prop_len[i]);
+               for (j=0; j<alnerrprof->prop_len[i]; j++) {
+                    fprintf(stderr, " %d:%g ", j, alnerrprof->props[i][j]);
+               }
+               fprintf(stderr, "\n");
+               fprintf(stderr, "median for tid %d: %g for size %d\n",
+                       i,
+                       dbl_median(alnerrprof->props[i], alnerrprof->prop_len[i]),
+                       alnerrprof->prop_len[i]);
+          }
+     }
+#endif
+
+     rc = 0;
+
+free_and_exit:
+     
+     free(max_obs_pos);
+
+     free_bam_header_hash = 0; /* FIXME segfaults often for unknown reason */
+     if (free_bam_header_hash) {
+          bam_destroy_header_hash(bam_header);
+     }
+     fclose(in);
+
+     return rc;
+}
+
+
+
+void
+write_alnerrprof_stats(char *target_name, unsigned long int *alnerrprof_usedpos, 
+                    double *alnerrprof, int max_obs_read_len, FILE *out)
+{
+     /* poor man's version (fw reads only and not taking quality into account):
+      *  samtools view -h -F 0x10 $bam | samtools calmd -S -e - $reffa | cut -f 10 | awk '{for (i=0; i<length($0); i++) {if (substr($0, i, 1)!="=") {c[i]+=1}}} END {for (i in c) {print i, c[i], c[i]/NR}}' | sort -k 1 -n
+      */
+     int i;
+     fprintf(out, "# Error, i.e. 'no-match' profile along read after subtracting base-call/indel quality\n");
+     fprintf(out, "# Numbers are in scientific notation\n");
+     fprintf(out, "# chrom\tread-pos\terror-freq\tcount\n");
+
+     for (i=0; i<max_obs_read_len; i++) {
+         double prop = 0.0;
+         if (alnerrprof_usedpos[i]) {
+              prop = alnerrprof[i]/(double)(alnerrprof_usedpos[i]);
+         }
+         fprintf(out, "%s\t%d\t%g\t%lu\n", target_name, i+1, prop, alnerrprof_usedpos[i]);/*, alnerrprof[i], alnerrprof_usedpos[i]);*/
+     }
+}
+
+
+
+/* Counts probability of non-match count along the read after
+ * subtracting error prob at that position (using the original
+ * orientation). used_pos is an array of ints indicating whether
+ * position was used or not (trimmed, clipped etc). alnerrprof and
+ * used_pos must be of at least length b->core.l_qseq. Note: will add
+ * to alnerrprof and used_pos, i.e. arrays should be initialized to 0 if
+ * you don't want aggregate values.
+ *
+ * WARNING code duplication with count_cigar_ops but merging the two
+ * functions is messy.
+ */
+void
+calc_read_alnerrprof(double *alnerrprof, unsigned long int *used_pos, 
+                   const bam1_t *b, const char *ref)
+{
+     /* modelled after bam.c:bam_calend(), bam_format1_core() and
+      * pysam's aligned_pairs (./pysam/csamtools.pyx)
+      */
+     uint32_t *cigar = bam1_cigar(b);
+     uint32_t k, i;
+     const bam1_core_t *c = &b->core;
+#if 0
+     int32_t qlen = (int32_t) bam_cigar2qlen(c, cigar); /* read length */
+#else
+     int qlen = b->core.l_qseq; /* read length */
+#endif
+     uint32_t pos = c->pos; /* pos on genome */
+     uint32_t qpos = 0; /* pos on read/query */
+     uint32_t qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;/* original qpos before mapping as possible reverse */
+
+
+     /* loop over cigar to get aligned bases
+      *
+      * read: bam_format1_core(NULL, b, BAM_OFDEC);
+      */
+     for (k=0; k < c->n_cigar; ++k) { /* n_cigar: number of cigar operations */
+          int op = cigar[k] & BAM_CIGAR_MASK; /* the cigar operation */
+          uint32_t l = cigar[k] >> BAM_CIGAR_SHIFT;
+
+          /* following conditionals could be collapsed to much shorter
+           * code, but we keep them as they were in pysam's
+           * aligned_pairs to make later handling of indels easier
+           */
+          if (op == BAM_CMATCH || op == BAM_CDIFF) {
+               for (i=pos; i<pos+l; i++) {                             
+                    assert(qpos < qlen);
+                    /* case agnostic */
+                    char ref_nt = ref[i];
+                    char read_nt = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), qpos)];
+                    int bq = bam1_qual(b)[qpos];
+#if 0
+                    printf("[M]MATCH qpos,i,ref,read = %d,%d,%c,%c\n", qpos, i, ref_nt, read_nt);
+#endif                    
+
+                    if (ref_nt != 'N') {
+                         if (ref_nt != read_nt || op == BAM_CDIFF) {
+                              alnerrprof[qpos_org] += (1.0 - PHREDQUAL_TO_PROB(bq));
+                         } /* otherwise leave at 0.0 but count anyway */
+                         used_pos[qpos_org] += 1;
+                    }
+                    qpos += 1;
+                    qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;
+               }
+               pos += l;
+
+          } else if (op == BAM_CINS) {
+               for (i=pos; i<pos+l; i++) {
+                    assert(qpos < qlen);
+                    
+                    alnerrprof[qpos] += (1.0 - PHREDQUAL_TO_PROB(INDEL_QUAL_DEFAULT));
+                    used_pos[qpos] += 1;
+#if 0
+                    printf("INS qpos,i = %d,None\n", qpos);
+#endif
+                    qpos += 1;
+                    qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;
+               }
+               
+          } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
+               for (i=pos; i<pos+l; i++) {
+#if 0
+                    printf("DEL qpos,i = None,%d\n", i);
+#endif
+
+                    if (op == BAM_CDEL) {
+                         alnerrprof[qpos] += (1.0 - PHREDQUAL_TO_PROB(INDEL_QUAL_DEFAULT));
+                         used_pos[qpos] += 1;
+                    }
+               }
+               pos += l;
+               /* deletion: don't increase qpos */
+
+          } else if (op == BAM_CSOFT_CLIP) {
+#if 0
+               printf("SOFT CLIP qpos = %d\n", qpos);
+#endif
+               qpos += l;
+               qpos_org = bam1_strand(b) ? qlen-qpos-1 : qpos;
+
+          } else if (op != BAM_CHARD_CLIP) {
+               LOG_WARN("Unknown op %d in cigar %s\n", op, cigar_str_from_bam(b));
+
+          }
+     } /* for k */
+     assert(pos == bam_calend(&b->core, bam1_cigar(b))); /* FIXME correct assert? what if hard clipped? */
+     if (qpos != qlen) {
+          LOG_FIXME("got qpos=%d and qlen=%d for cigar %s l_qseq %d\n", qpos, qlen, cigar_str_from_bam(b), b->core.l_qseq);
+     }
+     assert(qpos == qlen); /* FIXME correct assert? What if hard clipped? */
+
+#if 0
+     fprintf(stderr, "%s:", __FUNCTION__);
+     for (i=0; i< b->core.l_qseq; i++) {
+          fprintf(stderr, " %g/%d", alnerrprof[i], used_pos[i]);
+     }
+     fprintf(stderr, "\n");
+#endif
+}
+#endif
+
+
+
+/* from char *bam_format1_core(const bam_header_t *header, const
+ * bam1_t *b, int of) 
+ */
+char *
+cigar_str_from_bam(const bam1_t *b)
+{
+     const bam1_core_t *c = &b->core;
+     kstring_t str;
+     int i;
+     str.l = str.m = 0; str.s = 0;
+     for (i = 0; i < c->n_cigar; ++i) {
+          kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
+          kputc("MIDNSHP=X"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
+     }
+     return str.s;
+}
+/* cigar_str_from_bam() */
+
+
+
+/* Count matches (OP_MATCH), mismatches (OP_MISMATCH), insertions
+ * (OP_INS) and deletions (OP_DEL) for an aligned read. Written to
+ * (preallocated, size 4) counts at indices given above. Will ignore
+ * all mis-/match bases if their bq is below min_bq.
+ *
+ * Returns the total number of operations counted (excl. clipped bases
+ * or those with bq<min_bq) or -1 on error. Consecutive indels are
+ * counted as one operation, using INDEL_QUAL_DEFAULT, which is
+ * suboptimal. 0 is a valid return value, e.g. if all bases are below
+ * the quality threshold.
+ *
+ * If quals is not NULL it will be used as a two dim array (has to be
+ * preallocated) with OPs as first dim (len NUM_OP_CATS) and the
+ * qualities of the bases as second dim. NOTE/FIXME: this uses bq for
+ * mis/matches and INDEL_QUAL_DEFAULT for now in case of indels. The
+ * number of elements corresponds to the count entry and can be at max
+ * readlen.
+ * 
+ * If target is non-NULL will ignore preloaded variant positions via
+ * var_in_ign_list
+ *
+ * WARNING code duplication with calc_read_alnerrprof but merging the
+ * two functions was too complicated (and the latter is unused anyway)
+ */
+int
+count_cigar_ops(int *counts, int **quals, const bam1_t *b,
+                const char *ref, int min_bq, char *target)
+{
+#if 0
+#define TRACE 1
+#endif
+     int num_ops = 0;
+     /* modelled after bam.c:bam_calend(), bam_format1_core() and
+      * pysam's aligned_pairs (./pysam/csamtools.pyx)
+      */
+     uint32_t *cigar = bam1_cigar(b);
+     const bam1_core_t *c = &b->core;
+     uint32_t tpos = c->pos; /* pos on genome */
+     uint32_t qpos = 0; /* pos on read/query */
+     uint32_t k, i;
+#if 0
+     int32_t qlen = (int32_t) bam_cigar2qlen(c, cigar); /* read length */
+#else
+     int qlen = b->core.l_qseq; /* read length */
+#endif
+
+     if (! ref) {
+          return -1;
+     }
+     if (! counts) {
+          return -1;
+     }
+
+     memset(counts, 0, NUM_OP_CATS*sizeof(int));
+
+     /* loop over cigar to get aligned bases
+      *
+      * read: bam_format1_core(NULL, b, BAM_OFDEC);
+      */
+     for (k=0; k < c->n_cigar; ++k) { /* n_cigar: number of cigar operations */
+          int op = cigar[k] & BAM_CIGAR_MASK; /* the cigar operation */
+          uint32_t l = cigar[k] >> BAM_CIGAR_SHIFT;
+
+          /* following conditionals could be collapsed to much shorter
+           * code, but we keep them roughly as they were in pysam's
+           * aligned_pairs to make later comparison and handling of
+           * indels easier
+           */
+          if (op == BAM_CMATCH || op == BAM_CDIFF) {
+               for (i=tpos; i<tpos+l; i++) {                             
+                    int actual_op;
+                    assert(qpos < qlen);
+                    char ref_nt = ref[i];
+                    char read_nt = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), qpos)];
+                    int bq = bam1_qual(b)[qpos];
+
+                    if (ref_nt != read_nt || op == BAM_CDIFF) {
+                         actual_op = OP_MISMATCH;
+                    } else {
+                         actual_op = OP_MATCH;
+                    }
+
+                    /* ignoring base if below min_bq, independent of type */
+                    if (bq<min_bq) {
+#ifdef TRACE
+                         fprintf(stderr, "TRACE(%s): [M]MATCH ignoring base because of bq=%d at %d (qpos %d)\n", bam1_qname(b), bq, i, qpos);
+#endif
+                         qpos += 1;
+                         continue;
+                    }
+
+                    /* for mismatches only */
+                    if (target && actual_op == OP_MISMATCH) {
+                         var_t fake_var;
+                         memset(&fake_var, 0, sizeof(var_t));
+                         fake_var.chrom = target;
+                         fake_var.pos = i;
+                         /* FIXME evil, evil hack. only works as long as var_in_ign_list only uses chrom and pos */
+                         if (var_in_ign_list(&fake_var)) {
+
+#ifdef TRACE
+                              fprintf(stderr, "TRACE(%s): MM: ignoring because in ign list at %d (qpos %d)\n", bam1_qname(b), i, qpos);
+#endif
+                              qpos += 1;
+                              continue;
+                         } 
+                    }
+
+#ifdef TRACE
+                    fprintf(stderr, "TRACE(%s): adding [M]MATCH qpos,tpos,ref,read,bq = %d,%d,%c,%c,%d\n", bam1_qname(b), qpos, tpos, ref_nt, read_nt, bq);
+#endif                    
+                    counts[actual_op] += 1;
+                    if (quals) {
+                         quals[actual_op][counts[actual_op]-1] = bq;
+                    }
+
+                    qpos += 1;
+               }
+               tpos += l;
+
+          } else if (op == BAM_CINS || op == BAM_CDEL) {
+
+               if (target) {
+                    /* vcf: 
+                     * indel at tpos 1 means, that qpos 2 is an insertion  (e.g. A to AT)
+                     * del at tpos 1 means, that qpos 2 is missing (e.g. AT to A)
+                     */
+                    var_t fake_var;
+                    fake_var.chrom = target;
+                    fake_var.pos = tpos;
+                    if (op==BAM_CINS) {
+                         fake_var.pos -= 1;
+                    }
+                    /* FIXME see above: only works as long as var_in_ign_list only uses chrom and pos */
+                    if (var_in_ign_list(&fake_var)) {
+                         if (op == BAM_CINS) {
+                              qpos += l;
+                         }
+#ifdef TRACE
+                         fprintf(stderr, "TRACE(%s): %c: ignoring because in ign list at tpos %d (qpos %d)\n", bam1_qname(b), op == BAM_CINS? 'I':'D', tpos, qpos);
+#endif
+                         continue;
+                    }
+               }
+
+#ifdef TRACE
+               fprintf(stderr, "TRACE(%s): adding %c qpos,tpos = %d,%d\n", bam1_qname(b), op==BAM_CINS?'I':'D', qpos, tpos);
+#endif                    
+
+               if (op == BAM_CINS) {
+                    counts[OP_INS] += 1; /* counts indel as 1 operation only */
+                    if (quals) {
+                         quals[OP_INS][counts[OP_INS]-1] = INDEL_QUAL_DEFAULT; /* FIXME use iq */
+                    }
+                    qpos += l;/* forward query pos by length of operation */
+
+               } else if (op == BAM_CDEL) {
+                    counts[OP_DEL] += 1; /* counts indel as 1 operation only */
+                    if (quals) {
+                         quals[OP_DEL][counts[OP_DEL]-1] = INDEL_QUAL_DEFAULT; /* FIXME use dq */
+                    }
+                    tpos += l; /* forward genome pos by length of operation */
+
+               } else {
+                    LOG_FATAL("%s\n", "INTERNAL ERROR: should never get here");
+                    exit(1);
+               }
+
+          } else if (op == BAM_CREF_SKIP) {
+               tpos += l;
+
+          } else if (op == BAM_CSOFT_CLIP) {
+#if 0
+               printf("SOFT CLIP qpos = %d\n", qpos);
+#endif
+               qpos += l;
+
+          } else if (op != BAM_CHARD_CLIP) {
+               LOG_WARN("Untested op %d in cigar %s\n", op, cigar_str_from_bam(b));
+               /* don't think we need to do anything here */
+          }
+     } /* for k */
+
+     assert(qpos == bam_calend(&b->core, bam1_cigar(b))); /* FIXME correct assert? what if hard clipped? */
+     if (qpos != qlen) {
+          LOG_WARN("got qpos=%d and qlen=%d for cigar %s l_qseq %d in read %s\n", qpos, qlen, cigar_str_from_bam(b), b->core.l_qseq, bam1_qname(b));
+     }
+     assert(qpos == qlen);
+
+     num_ops = 0;
+     for (i=0; i<NUM_OP_CATS; i++) {
+          num_ops += counts[i];
+#ifdef TRACE
+          int j;
+          for (j=0; j<counts[i]; j++) {
+               fprintf(stderr, "TRACE(%s) op %s #%d: %d\n", bam1_qname(b), op_cat_str[i], j, quals[i][j]);
+          }
+#endif
+     }
+     return num_ops;
+}
+/* count_cigar_ops() */
+#undef TRACE
+
+
+/* check match between reference and bam files. prints an error
+ * message and return non-zero on mismatch 
+*/
+int checkref(char *fasta_file, char *bam_file)
+{
+     int i = -1;
+     bam_header_t *header;
+     faidx_t *fai;
+     char *ref;
+     int ref_len = -1;
+     bamFile bam_fp;
+     
+     if (! file_exists(fasta_file)) {
+          LOG_FATAL("Fsata file %s does not exist. Exiting...\n", fasta_file);
+          return 1;
+     }     
+
+     if (0 != strcmp(bam_file, "-")  && ! file_exists(bam_file)) {
+          LOG_FATAL("BAM file %s does not exist. Exiting...\n", bam_file);
+          return 1;
+     }     
+
+     bam_fp = strcmp(bam_file, "-") == 0 ? bam_dopen(fileno(stdin), "r") : bam_open(bam_file, "r");
+     header = bam_header_read(bam_fp);
+     if (!header) {
+          LOG_FATAL("Failed to read BAM header from %s\n", bam_file);
+          return 1;
+     }
+     
+     fai = fai_load(fasta_file);
+     if (!fai) {
+          LOG_FATAL("Failed to fasta index for %s\n", fasta_file);
+          return 1;
+     }
+     
+     for (i=0; i < header->n_targets; i++) {
+          LOG_DEBUG("BAM header target %d of %d: name=%s len=%d\n", 
+                    i+1, header->n_targets, header->target_name[i], header->target_len[i]);
+          
+          ref = faidx_fetch_seq(fai, header->target_name[i], 
+                                0, 0x7fffffff, &ref_len);
+          if (NULL == ref) {
+               LOG_FATAL("Failed to fetch sequence %s from fasta file\n", header->target_name[i]);
+               return -1;
+          }
+          if (header->target_len[i] != ref_len) {
+               LOG_FATAL("Sequence length mismatch for sequence %s (%dbp in fasta; %dbp in bam)\n", 
+                         header->target_name[i], header->target_len[i], ref_len);
+               return -1;
+          }
+          free(ref);
+     }
+     
+     fai_destroy(fai);
+     bam_header_destroy(header);
+     bam_close(bam_fp);
+
+     return 0;
+}
diff --git a/src/lofreq/samutils.h b/src/lofreq/samutils.h
new file mode 100644
index 0000000..0a21848
--- /dev/null
+++ b/src/lofreq/samutils.h
@@ -0,0 +1,95 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef SAMUTILS_H
+#define SAMUTILS_H
+
+#include "htslib/sam.h"
+
+
+
+
+typedef enum {
+        OP_MATCH,
+        OP_MISMATCH,
+        OP_INS,
+        OP_DEL,
+        NUM_OP_CATS,
+} op_cat_t;
+
+#define STR(name) # name
+
+static char *op_cat_str[] = {
+    STR(OP_MATCH),
+    STR(OP_MISMATCH),
+    STR(OP_INS),
+    STR(OP_DEL),
+    STR(NUM_OP_CATS)
+};
+
+
+char *
+cigar_str_from_bam(const bam1_t *b);
+
+int
+count_cigar_ops(int *counts, int **quals,
+                const bam1_t *b, const char *ref, int min_bq,
+                char *target);
+
+
+#ifdef USE_ALNERRPROF
+
+typedef struct {
+     int num_targets; /* bam_header->n_targets */
+     int *prop_len; /* one prop length per target: index is tid */
+     double **props; /* one prop array per target: index is tid */
+} alnerrprof_t;
+
+
+void
+normalize_alnerrprof(alnerrprof_t *alnerrprof);
+
+int
+parse_alnerrprof_statsfile(alnerrprof_t *alnerrprof, const char *path, bam_header_t *bam_header);
+
+void
+calc_read_alnerrprof(double *alnerrprof, unsigned long int *used_pos, 
+                        const bam1_t *b, const char *ref);
+
+void
+write_alnerrprof_stats(char *target_name, unsigned long int *alnerrprof_usedpos, 
+                    double *alnerrprof, int max_obs_read_len, FILE *out);
+
+void
+free_alnerrprof(alnerrprof_t *alnerrprof);
+
+#endif
+
+int checkref(char *fasta_file, char *bam_file);
+
+#endif
diff --git a/src/lofreq/snpcaller.c b/src/lofreq/snpcaller.c
new file mode 100644
index 0000000..42cc60f
--- /dev/null
+++ b/src/lofreq/snpcaller.c
@@ -0,0 +1,1278 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+
+#define TIMING 0
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include <ctype.h>
+#include <float.h>
+#include <errno.h>
+#include <fenv.h>
+
+#include "fet.h"
+#include "utils.h"
+#include "log.h"
+
+#include "snpcaller.h"
+#if TIMING
+#include <time.h>
+#endif
+
+
+/* Converting MQ=0 into prob would 'kill' a read. Previously used 0.66 here since
+   the median number of best hits in BWA for one examined human wgs sample
+   was 3 (sadly BWA-MEM doesn't produce X0 tags anymore). For simplicity's
+   sake, give MQ0 the benefit of doubt and assume that one only one other best
+   location existed, i.e. use 0.5
+*/
+#define MQ0_ERRPROB 0.5
+
+#define LOGZERO -1e100
+/* shouldn't we use something from float.h ? */
+
+
+#if 0
+#define DEBUG
+#endif
+
+#if 0
+#define TRACE
+#endif
+
+#if 0
+#define NAIVE
+#endif
+
+/* scale 0-60 to from 0-254 and shrink
+ * Y = 254.0/60.0 * MQ * (MQ**X)/(60**X)
+ *
+ * if 20 should stay 20
+ * 20 = 254/60.0 * 20 * (20**X)/(60**X)
+ * 60/254.0 = (20**X)/(60**X)
+ * (20/60.0)**X = 60/254.0
+ * since a**x = y equals log_a(y) = x
+ * x = log_a(60/254.0); a=20/60.0;
+ * x = 1.3134658329243962
+ */
+#if 0
+#define SCALE_MQ 1
+#define SCALE_MQ_FAC  1.3134658329243962
+#endif
+
+#if 0
+/* filled in missing values with the min of the two neighbouring values */
+static int MQ_TRANS_BWA_062_SAMPE_HG19_2X100_SIMUL[61] = {
+1,
+1,
+3,
+4,
+5,
+5,
+8,
+9,
+4,
+8,
+14,
+17,
+22,
+25,
+25,
+29,
+32,
+33,
+34,
+34, /* NA */
+34,
+34, /* NA */
+34, /* NA */
+34,
+34, /* NA */
+34, /* NA */
+34, /* NA */
+34, /* NA */
+34, /* NA */
+41,
+41, /* NA */
+41, /* NA */
+41, /* NA */
+41, /* NA */
+41, /* NA */
+41, /* NA */
+41, /* NA */
+41, /* NA */
+50,
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46, /* NA */
+46,
+46, /* NA */
+46, /* NA */
+54,
+37,
+37, /* NA */
+45,
+45, /* NA */
+45, /* NA */
+67};
+
+
+static int MQ_TRANS_BWA_079_MEM_HG19_CHR22_2X75_SIMUL[72] = {
+     19,
+     48,
+     66,
+     47,
+     58,
+     59,
+     58,
+     55,
+     60,
+     54,
+     57,
+     58,
+     58,
+     58,
+     65,
+     56,
+     60,
+     62,
+     62,
+     57,
+     57,
+     54,
+     50,
+     51,
+     52,
+     49,
+     74,
+     49,
+     49,
+     77,
+     77,
+     77,
+     77,
+     68,
+     68,
+     74,
+     74,
+     74,
+     74,
+     74,
+     68,
+     68,
+     68,
+     68,
+     71,
+     71,
+     71,
+     71,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     74,
+     74,
+     74,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77,
+     77};
+
+#if 0
+#define MQ_TRANS_TABLE MQ_TRANS_BWA_062_SAMPE_HG19_2X100_SIMUL
+#else
+#define MQ_TRANS_TABLE MQ_TRANS_BWA_079_MEM_HG19_CHR22_2X75_SIMUL
+#endif
+static int mq_trans_range_violation_printed = 0;
+
+#endif
+
+
+double log_sum(double log_a, double log_b);
+double log_diff(double log_a, double log_b);
+double probvec_tailsum(const double *probvec, int tail_startindex,
+                       int probvec_len);
+double *naive_calc_prob_dist(const double *err_probs, int N, int K);
+double *pruned_calc_prob_dist(const double *err_probs, int N, int K,
+                      long long int bonf_factor, double sig_level);
+
+
+
+#ifdef MQ_TRANS_TABLE
+int mq_trans(int mq) {
+#if 1
+     if (mq<=72  && mq>=0) {
+#else
+     if (mq<=60  && mq>=0) {
+#endif
+          return MQ_TRANS_TABLE[mq];
+
+     } else if (mq!=255) {
+          if (! mq_trans_range_violation_printed) {
+               LOG_WARN("MQ value %d is outside of valid range defined in translation table\n", mq);
+               mq_trans_range_violation_printed = 1;
+          }
+     }
+     return mq;
+}
+#endif
+
+
+
+/* PJ = PM + (1-PM)*PS + (1-PM)*(1-PS)*PA + (1-PM)*(1-PS)*(1-PA)*PB, where
+ * PJ = joined error prob
+ * PM = mapping error prob
+ * PS = source/genome error prob
+ * PA = base alignment error prob (BAQ)
+ * PB = base error prob
+ * Or in plain English:
+ * either this is a mapping error
+ * or
+ * not, but a genome/source error
+ * or
+ * none of the above, but a base-alignment error
+ * or
+ * none of the above but a base-error
+ *
+ * In theory PS should go first but the rest is hard to compute then.
+ * Using PM things get tractable and it intrinsically takes care of
+ * PS.
+ *
+ * NOTE: the standard says that MQ=255 means NA. In this function we
+ * use -1 instead for all unknown values, and treat 255 as valid
+ * phred-score so you might want to change mq before.
+ *
+ */
+double
+merge_srcq_mapq_baq_and_bq(const int sq, const int mq, const int baq, const int bq)
+{
+     double sp, mp, bap, bp, jp; /* corresponding probs */
+
+     if (-1 == sq) {
+          sp = 0.0;
+     } else {
+          sp = PHREDQUAL_TO_PROB(sq);
+     }
+
+     if (-1 == mq) {
+          mp = 0.0;
+     } else if (0 == mq) {
+          mp = MQ0_ERRPROB;
+     } else {
+          mp = PHREDQUAL_TO_PROB(mq);
+     }
+
+     if (-1 == baq) {
+          bap = 0.0;
+     } else {
+          bap = PHREDQUAL_TO_PROB(baq);
+     }
+
+     if (-1 == bq) {
+          bp = 0.0;
+     } else {
+          bp = PHREDQUAL_TO_PROB(bq);
+     }
+
+     /* FIXME do calculations in log space and return Q instead of p */
+     jp = mp + (1.0-mp)*sp + (1-mp)*(1-sp)*bap + (1-mp)*(1-sp)*(1-bap)*bp;
+
+#if 0
+     LOG_DEBUG("sq=%d|%f mq=%d|%f baq=%d|%f bq=%d|%f. returning %f\n",
+              sq, sp, mq, mp, baq, bap, bq, bp, jp);
+#endif
+     return jp;
+}
+
+
+
+void
+plp_to_errprobs(double **err_probs, int *num_err_probs,
+                int *alt_bases, int *alt_counts, int *alt_raw_counts,
+                const plp_col_t *p, varcall_conf_t *conf)
+{
+     int i, j;
+     int alt_idx;
+     int avg_ref_bq = -1;
+
+     if (NULL == ((*err_probs) = malloc(p->coverage_plp * sizeof(double)))) {
+          /* coverage = base-count after read level filtering */
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          return;
+     }
+
+     /* determine median ref bq in advance if needed
+     */
+     if (-1 == conf->def_alt_bq) {
+          avg_ref_bq = -1;
+          for (i=0; i<NUM_NT4; i++) {
+               int nt = bam_nt4_rev_table[i];
+               if (nt != p->ref_base) {
+                    continue;
+               }
+               if (p->base_quals[i].n) {
+                    int *ref_quals = malloc(sizeof(int) * p->base_quals[i].n);
+                    memcpy(ref_quals, p->base_quals[i].data, sizeof(int) * p->base_quals[i].n);
+                    avg_ref_bq = int_median(ref_quals, p->base_quals[i].n);
+                    free(ref_quals);
+                    break; /* there can only be one */
+               }
+          }
+          LOG_DEBUG("avg_ref_bq=%d\n", avg_ref_bq);
+     }
+
+     (*num_err_probs) = 0;
+     alt_idx = -1;
+     for (i=0; i<NUM_NT4; i++) {
+          int is_alt_base;
+          int nt = bam_nt4_rev_table[i];
+          if (nt == 'N') {
+               continue;
+          }
+
+          is_alt_base = 0;
+          if (nt != p->ref_base) {
+               is_alt_base = 1;
+               alt_idx += 1;
+               alt_bases[alt_idx] = nt;
+               alt_counts[alt_idx] = 0;
+               alt_raw_counts[alt_idx] = 0;
+          }
+
+          for (j=0; j<p->base_quals[i].n; j++) {
+               int bq = -1;
+               int mq = -1;
+               int sq = -1;
+               int baq = -1;
+#ifdef USE_ALNERRPROF
+               int aq = -1;
+               LOG_FATAL("%s\n", "ALNERRPROF not supported anymore\n"); exit(1);
+#endif
+               double merged_err_prob; /* final quality used for snv calling */
+               int merged_qual;
+
+               if (p->base_quals[i].n) {
+                    bq = p->base_quals[i].data[j];
+
+                    /* bq filtering for all */
+                    if (bq < conf->min_bq) {
+                         continue;
+                    }
+
+                    /* alt bq threshold and overwrite if needed */
+                    if (is_alt_base) {
+                         alt_raw_counts[alt_idx] += 1;
+                         /* ignore altogether if below alt bq threshold */
+                         if (bq < conf->min_alt_bq) {
+                              continue;
+                         } else if (-1 == conf->def_alt_bq)  {
+                              bq = avg_ref_bq;
+                         } else if (0 != conf->def_alt_bq)  {
+                              bq = conf->def_alt_bq;
+                         }
+                         /* 0: keep original */
+                    }
+               }
+
+               if ((conf->flag & VARCALL_USE_BAQ) && p->baq_quals[i].n) {
+                    baq = p->baq_quals[i].data[j];
+               }
+
+               if ((conf->flag & VARCALL_USE_MQ) && p->map_quals[i].n) {
+                    mq = p->map_quals[i].data[j];
+                    /*according to spec 255 is unknown */
+                    if (mq == 255) {
+                         mq = -1;
+                    }
+#ifdef SCALE_MQ
+                    mq = 254/60.0*mq * pow(mq, SCALE_MQ_FAC)/pow(60, SCALE_MQ_FAC);
+#elif defined(MQ_TRANS_TABLE)
+                    mq = mq_trans(mq);
+#endif
+               }
+
+               if ((conf->flag & VARCALL_USE_SQ) && p->source_quals[i].n) {
+                    sq = p->source_quals[i].data[j];
+               }
+
+               merged_err_prob = merge_srcq_mapq_baq_and_bq(sq, mq, baq, bq);
+               merged_qual =  PROB_TO_PHREDQUAL_SAFE(merged_err_prob);
+
+               /* min merged q filtering for all */
+               if (merged_qual < conf->min_jq) {
+                    continue;
+               }
+
+               if (is_alt_base) {
+#if 0
+                    LOG_debug("alt_base %d: bq=%d merged q=%d p=%f\n",
+                              alt_idx, bq, PROB_TO_PHREDQUAL_SAFE(merged_err_prob), merged_err_prob);
+#endif
+                    /* apply alt merged qual threshold and overwrite if needed
+                     */
+                    if (merged_qual < conf->min_alt_jq) {
+                         continue;
+                    } else if (-1 == conf->def_alt_jq)  {
+                         LOG_FATAL("%s\n", "median off ref joined q not implemented yet (FIXME)");
+                         exit(1);
+                    } else if (0 != conf->def_alt_jq)  {
+                         merged_err_prob = PHREDQUAL_TO_PROB(conf->def_alt_jq);
+                    }
+                    /* 0: keep original */
+                    alt_counts[alt_idx] += 1;
+               }
+               (*err_probs)[(*num_err_probs)++] = merged_err_prob;
+
+#if 0
+               LOG_FIXME("%s:%d %c bq=%d mq=%d finalq=%d is_alt_base=%d\n", p->target, p->pos+1, nt, bq, mq, PROB_TO_PHREDQUAL_SAFE(merged_err_prob), is_alt_base);
+#endif
+          }
+     }
+}
+
+/* FIXME merge with plp_to_del_errprobs */
+void
+plp_to_ins_errprobs(double **err_probs, int *num_err_probs,
+                    const plp_col_t *p, varcall_conf_t *conf,
+                    char key[MAX_INDELSIZE]){
+
+     if (NULL == ((*err_probs) = malloc(p->coverage_plp * sizeof(double)))) {
+          /* coverage = base-count after read level filtering */
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          free(err_probs);
+          return;
+     }
+
+     (*num_err_probs) = 0;
+     int i, j;
+     double final_err_prob;
+     int iq, aq, mq, sq;
+     iq = aq = mq = sq = -1;
+
+     for (i = 0; i < p->ins_quals.n; i++) {
+          iq = mq = -1;
+          iq = p->ins_quals.data[i];
+          if (conf->flag & VARCALL_USE_MQ) {
+               mq = p->ins_map_quals.data[i];
+          }
+          final_err_prob = merge_srcq_mapq_baq_and_bq(-1, mq, -1, iq);
+          (*err_probs)[(*num_err_probs)++] = final_err_prob;
+     }
+
+     ins_event *it, *it_tmp;
+     HASH_ITER(hh_ins, p->ins_event_counts, it, it_tmp) {
+          for (j = 0; j < it->ins_quals.n; j++) {
+               iq = aq = mq = sq = -1;
+               iq = it->ins_quals.data[j];
+
+               /* don't use idaq if not wanted or if not indel in question (FIXME does the latter amek sense)? */
+               if ((conf->flag & VARCALL_USE_IDAQ) && (0 == strcmp(it->key, key))) {
+                    aq = it->ins_aln_quals.data[j];
+               }
+
+               if ((conf->flag & VARCALL_USE_MQ) && it->ins_map_quals.n) {
+                    mq = it->ins_map_quals.data[j];
+                    /*according to spec 255 is unknown */
+                    if (mq == 255) {
+                         mq = -1;
+                    }
+               }
+
+               if ((conf->flag & VARCALL_USE_SQ) && it->ins_source_quals.n)  {
+                    sq = it->ins_source_quals.data[j];
+               }
+               
+               final_err_prob = merge_srcq_mapq_baq_and_bq(sq, mq, aq, iq);
+#ifdef TRACE
+               LOG_DEBUG("+%s IQ:%d IAQ:%d MQ:%d SQ:%d EP:%lg\n",
+                         it->key, iq, aq, mq, sq, final_err_prob);
+#endif
+               (*err_probs)[(*num_err_probs)++] = final_err_prob;
+          }
+     }
+}
+
+/* FIXME merge with plp_to_ins_errprobs */
+void
+plp_to_del_errprobs(double **err_probs, int *num_err_probs,
+                    const plp_col_t *p, varcall_conf_t *conf,
+                    char key[MAX_INDELSIZE]){
+     if (NULL == ((*err_probs) = malloc(p->coverage_plp * sizeof(double)))) {
+          /* coverage = base-count after read level filtering */
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          free(err_probs);
+          return;
+     }
+
+     (*num_err_probs) = 0;
+     int i, j;
+     double final_err_prob;
+     int dq, aq, mq, sq;
+     dq = aq = mq = sq = -1;
+
+     for (i = 0; i < p->del_quals.n; i++) {
+          dq = mq = -1;
+          dq = p->del_quals.data[i];
+          if (conf->flag & VARCALL_USE_MQ) {
+               mq = p->del_map_quals.data[i];
+          }
+          final_err_prob = merge_srcq_mapq_baq_and_bq(-1, mq, -1, dq);
+          (*err_probs)[(*num_err_probs)++] = final_err_prob;
+     }
+
+     del_event *it, *it_tmp;
+     HASH_ITER(hh_del, p->del_event_counts, it, it_tmp) {
+          for (j = 0; j < it->del_quals.n; j++) {
+               dq = aq = mq = sq = -1;
+               dq = it->del_quals.data[j];
+
+               /* don't use idaq if not wanted or if not indel in question (FIXME does the latter amek sense)? */
+               if ((conf->flag & VARCALL_USE_IDAQ) && (0 == strcmp(it->key, key))) {
+                    aq = it->del_aln_quals.data[j];
+               }
+
+               if ((conf->flag & VARCALL_USE_MQ) && it->del_map_quals.n) {
+                    mq = it->del_map_quals.data[j];
+                    /*according to spec 255 is unknown */
+                    if (mq == 255) {
+                         mq = -1;
+                    }
+               }
+
+               if ((conf->flag & VARCALL_USE_SQ) && it->del_source_quals.n) {
+                         sq = it->del_source_quals.data[j];
+               }
+
+               final_err_prob = merge_srcq_mapq_baq_and_bq(sq, mq, aq, dq);
+#ifdef TRACE
+               LOG_DEBUG("+%s DQ:%d DAQ:%d MQ:%d SQ:%d EP:%lg\n",
+                         it->key, dq, aq, mq, sq, final_err_prob);
+#endif
+               (*err_probs)[(*num_err_probs)++] = final_err_prob;
+          }
+     }
+}
+
+/* initialize members of preallocated varcall_conf */
+void
+init_varcall_conf(varcall_conf_t *c)
+{
+     memset(c, 0, sizeof(varcall_conf_t));
+
+     c->min_bq = DEFAULT_MIN_BQ;
+     c->min_alt_bq = DEFAULT_MIN_ALT_BQ;
+     c->def_alt_bq = DEFAULT_DEF_ALT_BQ;
+
+     c->min_jq = DEFAULT_MIN_JQ;
+     c->min_alt_jq = DEFAULT_MIN_ALT_JQ;
+     c->def_alt_jq = DEFAULT_DEF_ALT_JQ;
+
+     c->min_cov = DEFAULT_MIN_COV;
+     c->bonf_dynamic = 1;
+     c->bonf_subst = 1;
+     c->bonf_indel = 1;
+     c->sig = DEFAULT_SIG;
+     /* c->out = ; */
+     c->flag |= VARCALL_USE_MQ;
+     c->flag |= VARCALL_USE_BAQ;
+     c->flag |= VARCALL_USE_IDAQ;
+     c->only_indels = 0;
+     c->no_indels = 0;
+}
+
+
+void
+dump_varcall_conf(const varcall_conf_t *c, FILE *stream)
+{
+     fprintf(stream, "snvcall options\n");
+     fprintf(stream, "  min_bq         = %d\n", c->min_bq);
+     fprintf(stream, "  min_alt_bq     = %d\n", c->min_alt_bq);
+     fprintf(stream, "  def_alt_bq     = %d\n", c->def_alt_bq);
+     fprintf(stream, "  min_jq         = %d\n", c->min_jq);
+     fprintf(stream, "  min_alt_jq     = %d\n", c->min_alt_jq);
+     fprintf(stream, "  def_alt_jq     = %d\n", c->def_alt_jq);
+     fprintf(stream, "  min_cov        = %d\n", c->min_cov);
+     fprintf(stream, "  bonf_subst       = %lld  (might get recalculated)\n", c->bonf_subst);
+     fprintf(stream, "  bonf_indel     = %lld  (might get recalculated)\n", c->bonf_indel);
+     fprintf(stream, "  bonf_dynamic   = %d\n", c->bonf_dynamic);
+     fprintf(stream, "  sig            = %f\n", c->sig);
+/*     fprintf(stream, "  out            = %p\n", (void*)c->out);*/
+     fprintf(stream, "  flag & VARCALL_USE_BAQ     = %d\n", c->flag&VARCALL_USE_BAQ?1:0);
+     fprintf(stream, "  flag & VARCALL_USE_MQ      = %d\n", c->flag&VARCALL_USE_MQ?1:0);
+     fprintf(stream, "  flag & VARCALL_USE_SQ      = %d\n", c->flag&VARCALL_USE_SQ?1:0);
+     fprintf(stream, "  flag & VARCALL_USE_IDAQ    = %d\n", c->flag&VARCALL_USE_IDAQ?1:0);
+#ifdef SCALE_MQ
+     LOG_WARN("%s\n", "MQ scaling switched on!");
+#elif defined MQ_TRANS_TABLE
+     LOG_WARN("%s\n", "MQ translation switched on!");
+#endif
+     fprintf(stream, "  only_indels    = %d\n", c->only_indels);
+     fprintf(stream, "  no_indels      = %d\n", c->no_indels);
+}
+
+
+
+/**
+ * @brief Computes log(exp(log_a) + exp(log_b))
+ *
+ * Taken from util.h of FAST source code:
+ * http://www.cs.cornell.edu/~keich/FAST/fast.tar.gz
+ * and using log1p
+ */
+double
+log_sum(double log_a, double log_b)
+{
+    if (log_a > log_b) {
+        return log_a + log1p(exp(log_b-log_a));
+    } else {
+        return log_b + log1p(exp(log_a-log_b));
+    }
+}
+/* log_sum() */
+
+
+/**
+ * @brief Computes log(exp(log_a) - exp(log_b))
+ *
+ * Adapted from log_sum above and scala/breeze/numerics logDiff
+ * See also http://stackoverflow.com/questions/778047/we-know-log-add-but-how-to-do-log-subtract
+ *
+ */
+double
+log_diff(double log_a, double log_b)
+{
+    if (log_a >= log_b) {
+        return log_a + log1p(- exp(log_b-log_a));
+    } else {
+        return log_b + log1p(- exp(log_a-log_b));
+    }
+}
+/* log_diff() */
+
+
+
+/**
+ * @brief Computes sum of probvec values (log space) starting from (including)
+ * tail_startindex to (excluding) probvec_len
+ *
+ */
+double
+probvec_tailsum(const double *probvec, int tail_startindex, int probvec_len)
+{
+    double tailsum;
+    int i;
+
+    tailsum = probvec[tail_startindex];
+    for (i=tail_startindex+1; i<probvec_len; i++) {
+        tailsum = log_sum(tailsum, probvec[i]);
+    }
+
+    return tailsum;
+}
+/* probvec_tailsum() */
+
+
+/**
+ *
+ */
+double *
+naive_calc_prob_dist(const double *err_probs, int N, int K)
+{
+     double *probvec = NULL;
+     double *probvec_prev = NULL;
+     double *probvec_swp = NULL;
+
+     int n;
+     fprintf(stderr, "CRITICAL(%s:%s:%d): Possibly buggy code. Use pruned_calc_prob_dist instead of me\n",
+             __FILE__, __FUNCTION__, __LINE__);
+     exit(1);
+
+    if (NULL == (probvec = malloc((N+1) * sizeof(double)))) {
+        fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                __FILE__, __FUNCTION__, __LINE__);
+        return NULL;
+    }
+    if (NULL == (probvec_prev = malloc((N+1) * sizeof(double)))) {
+        fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                __FILE__, __FUNCTION__, __LINE__);
+        free(probvec);
+        return NULL;
+    }
+
+    /* init */
+    probvec_prev[0] = 0.0; /* 0.0 = log(1.0) */
+
+    for (n=1; n<N+1; n++) {
+        int k;
+        double log_pn, log_1_pn;
+        double pn = err_probs[n-1];
+
+
+        /* if pn=0 log(on) will fail. likewise if pn=1 (Q0) then
+         * log1p(-pn) = log(1-1) = log(0) will fail. therefore test */
+        if (fabs(pn) < DBL_EPSILON) {
+             log_pn = log(DBL_EPSILON);
+        } else {
+             log_pn = log(pn);
+        }
+        if (fabs(pn-1.0) < DBL_EPSILON) {
+             log_1_pn = log1p(-pn+DBL_EPSILON);
+        } else {
+             log_1_pn = log1p(-pn);
+        }
+
+#if 0
+        fprintf(stderr, "DEBUG(%s:%s:%d): pn=%g log_pn=%g log_1_pn=%g err_probs[n=%d-1]=%g\n",
+                __FILE__, __FUNCTION__, __LINE__, pn, log_pn, log_1_pn, n, err_probs[n-1]);
+#endif
+
+        k = 0;
+        probvec[k] = probvec_prev[k] + log_1_pn;
+
+        for (k=1; k<K; k++) {
+             /* FIXME clang says: The left operand of '+' is a garbage value */
+            probvec[k] = log_sum(probvec_prev[k] + log_1_pn,
+                                 probvec_prev[k-1] + log_pn);
+        }
+        k = n;
+        probvec[k] = probvec_prev[k-1] + log_pn;
+
+
+        /* swap */
+        probvec_swp = probvec;
+        probvec = probvec_prev;
+        probvec_prev = probvec_swp;
+    }
+
+
+    free(probvec_prev);
+    return probvec;
+}
+/* naive_prob_dist */
+
+
+
+/**
+ * Should really get rid of bonf_factor and sig_level here and
+ * upstream as well
+ *
+ */
+double *
+pruned_calc_prob_dist(const double *err_probs, int N, int K,
+                      long long int bonf_factor, double sig_level)
+{
+    double *probvec = NULL;
+    double *probvec_prev = NULL;
+    double *probvec_swp = NULL;
+    int n;
+
+    if (NULL == (probvec = malloc((K+1) * sizeof(double)))) {
+        fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                __FILE__, __FUNCTION__, __LINE__);
+        return NULL;
+    }
+    if (NULL == (probvec_prev = malloc((K+1) * sizeof(double)))) {
+        fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                __FILE__, __FUNCTION__, __LINE__);
+        free(probvec);
+        return NULL;
+    }
+
+    for (n=0; n<N; n++) {
+         /*LOG_FIXME("err_probs[n=%d]=%g\n", n, err_probs[n]);*/
+         assert(err_probs[n] + DBL_EPSILON >= 0.0 && err_probs[n] - DBL_EPSILON <= 1.0);
+    }
+
+#ifdef DEBUG
+    for (n=0; n<K+1; n++) {
+        probvec_prev[n] = probvec[n] = 666.666;
+    }
+#endif
+
+    /* init */
+    probvec_prev[0] = 0.0; /* log(1.0) */
+
+    for (n=1; n<=N; n++) {
+        int k;
+        double pn = err_probs[n-1];
+        double log_pn, log_1_pn;
+
+        /* if pn=0 log(on) will fail. likewise if pn=1 (Q0) then
+         * log1p(-pn) = log(1-1) = log(0) will fail. therefore test */
+        if (fabs(pn) < DBL_EPSILON) {
+             log_pn = log(DBL_EPSILON);
+        } else {
+             log_pn = log(pn);
+        }
+        if (fabs(pn-1.0) < DBL_EPSILON) {
+             log_1_pn = log1p(-pn+DBL_EPSILON);
+        } else {
+             log_1_pn = log1p(-pn);/* 0.0 = log(1.0) */
+        }
+
+#ifdef TRACE
+		fprintf(stderr, "DEBUG(%s:%s:%d): n=%d err_probs[n-1]=%g pn=%g log_pn=%g log_1_pn=%g\n",
+                __FILE__, __FUNCTION__, __LINE__, n, err_probs[n-1], pn, log_pn, log_1_pn);
+#endif
+
+        if(n < K) {
+            probvec_prev[n] = LOGZERO;
+        }
+
+        for (k=MIN(n,K-1); k>=1; k--) {
+            assert(probvec_prev[k]<=0.0 && probvec_prev[k-1]<=0.0);
+            probvec[k] = log_sum(probvec_prev[k] + log_1_pn,
+                                 probvec_prev[k-1] + log_pn);
+        }
+        k = 0;
+        assert(probvec_prev[k]<=0.0);
+        probvec[k] = probvec_prev[k] + log_1_pn;
+
+#ifdef TRACE
+        for (k=0; k<=MIN(n, K-1); k++) {
+            fprintf(stderr, "DEBUG(%s:%s:%d): probvec[k=%d] = %g\n",
+                    __FILE__, __FUNCTION__, __LINE__, k, probvec[k]);
+        }
+        for (k=0; k<=MIN(n,K-1); k++) {
+            fprintf(stderr, "DEBUG(%s:%s:%d): probvec_prev[k=%d] = %g\n",
+                    __FILE__, __FUNCTION__, __LINE__, k, probvec_prev[k]);
+        }
+#endif
+
+        if (n==K) {
+             probvec[K] = probvec_prev[K-1] + log_pn;
+             /* FIXME prune here as well? */
+
+        } else if (n > K) {
+             long double pvalue;
+             int errsv = 0;
+             /*LOG_FIXME("probvec_prev[K=%d]=%g probvec_prev[K=%d -1]=%g\n", K, probvec_prev[K], K, probvec_prev[K-1]);*/
+             assert(probvec_prev[K]-DBL_EPSILON<=0.0 && probvec_prev[K-1]-DBL_EPSILON<=0.0);
+
+             probvec[K] = log_sum(probvec_prev[K], probvec_prev[K-1]+log_pn);
+
+             errno = 0;
+             feclearexcept(FE_ALL_EXCEPT);
+
+             pvalue = expl(probvec[K]);
+
+             errsv = errno;
+             if (errsv || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW)) {
+                  if (pvalue < DBL_EPSILON) {
+                       pvalue = LDBL_MIN;/* to zero but prevent actual 0 value */
+                  } else {
+                       pvalue = LDBL_MAX; /* might otherwise be set to 1 which might pass filters */
+                  }
+             }
+             /* store as phred scores instead:
+
+              Q = -10*log_10(e^X), where X=probvec[K]
+              remember, log_b(x) = log_k(x)/log_k(b), i.e. log_10(Y) = log_e(Y)/log_e(10)
+              therefore, Q = -10 * log_e(e^X)/log_e(10) = -10 * X/log_e(10)
+              e.g.
+              >>> from math import log, log10, e
+              >>> X = -100
+              >>> -10 * log10(e**X)
+              434.29448190325184
+              >>> -10 * X/log(10)
+              434.2944819032518
+             */
+             if (pvalue * (double)bonf_factor > sig_level) {
+#ifdef DEBUG
+                  fprintf(stderr, "DEBUG(%s:%s:%d): early exit at n=%d K=%d with pvalue %Lg\n",
+                          __FILE__, __FUNCTION__, __LINE__, n, K, pvalue);
+#endif
+                  free(probvec_prev);
+                  return probvec;
+             }
+        }
+
+        assert(! isinf(probvec[0])); /* used to happen when first q=0 */
+
+        /* swap */
+        probvec_swp = probvec;
+        probvec = probvec_prev;
+        probvec_prev = probvec_swp;
+    }
+
+    /* return prev because we just swapped (if not pruned) */
+    free(probvec);
+    return probvec_prev;
+}
+/* pruned_calc_prob_dist */
+
+
+#ifdef PSEUDO_BINOMIAL
+/* binomial test using poissbin. only good for high n and small prob.
+ * returns -1 on error */
+int
+pseudo_binomial(long double *pvalue,
+                int num_success, int num_trials, double succ_prob)
+{
+     const long long int bonf = 1.0;
+     const double sig = 1.0;
+     double *probvec = NULL;
+     double *probs;
+     int i;
+
+     fprintf(stderr, "WARNING(%s): this function only approximates the binomial for high n and small p\n", __FUNCTION__);
+     if (NULL == (probs = malloc((num_trials) * sizeof(double)))) {
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          return -1;
+     }
+
+     for (i=0; i<num_trials; i++) {
+          probs[i] = succ_prob;
+     }
+
+     probvec = poissbin(pvalue, probs,
+                        num_trials, num_success,
+                        bonf, sig);
+     free(probvec);
+     free(probs);
+
+     return 0;
+}
+#endif
+
+
+
+/* main logic. return of probvec (needs to be freed by caller allows
+ * to check pvalues for other numbers < (original num_failures), like
+ * so: exp(probvec_tailsum(probvec, smaller_numl, orig_num+1)) but
+ * only if first pvalue was below limits implied by bonf and sig.
+ * default pvalue is DBL_MAX (1 might still be significant).
+ *
+ *  note: pvalues > sig/bonf are not computed properly
+ */
+double *
+poissbin(long double *pvalue, const double *err_probs,
+         const int num_err_probs, const int num_failures,
+         const long long int bonf, const double sig)
+{
+    double *probvec = NULL;
+    int errsv;
+#if TIMING
+    clock_t start = clock();
+    int msec;
+#endif
+    *pvalue = LDBL_MAX;
+
+#if TIMING
+    start = clock();
+#endif
+#ifdef NAIVE
+    probvec = naive_prob_dist(err_probs, num_err_probs,
+                                    num_failures);
+#else
+    probvec = pruned_calc_prob_dist(err_probs, num_err_probs,
+                                    num_failures, bonf, sig);
+#endif
+#if TIMING
+    msec = (clock() - start) * 1000 / CLOCKS_PER_SEC;
+    fprintf(stderr, "calc_prob_dist() took %d s %d ms\n", msec/1000, msec%1000);
+#endif
+
+    errno = 0;
+    feclearexcept(FE_ALL_EXCEPT);
+
+    *pvalue = expl(probvec[num_failures]); /* no need for tailsum here */
+
+    errsv = errno;
+    if (errsv || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW)) {
+         if (*pvalue < DBL_EPSILON) {
+              *pvalue = LDBL_MIN;/* to zero but prevent actual 0 value */
+         } else {
+              *pvalue = LDBL_MAX; /* otherwise set to 1 which might pass filters */
+         }
+    }
+
+    return probvec;
+}
+
+
+
+/**
+ * @brief
+ *
+ * pvalues computed for each of the NUM_NONCONS_BASES noncons_counts
+ * will be written to snp_pvalues in the same order. If pvalue was not
+ * computed (always insignificant) its value will be set to LDBL_MAX
+ *
+ */
+int
+snpcaller(long double *snp_pvalues,
+          const double *err_probs, const int num_err_probs,
+          const int *noncons_counts,
+          const long long int bonf_factor, const double sig_level)
+{
+    double *probvec = NULL;
+    int i;
+    int max_noncons_count = 0;
+    long double pvalue;
+
+#if 0
+    for (i=0; i<num_err_probs; i++) {
+         fprintf(stderr,  "%f ", err_probs[i]);
+    }
+    fprintf(stderr,  "\n");
+#endif
+
+#ifdef DEBUG
+    fprintf(stderr, "DEBUG(%s:%s():%d): num_err_probs=%d noncons_counts=%d,%d,%d bonf_factor=%lld sig_level=%f\n",
+            __FILE__, __FUNCTION__, __LINE__,
+            num_err_probs, noncons_counts[0], noncons_counts[1], noncons_counts[2],
+            bonf_factor, sig_level);
+#endif
+
+    /* initialise empty results so that we can return anytime */
+    for (i=0; i<NUM_NONCONS_BASES; i++) {
+        snp_pvalues[i] = LDBL_MAX;
+    }
+
+    /* determine max non-consensus count */
+    for (i=0; i<NUM_NONCONS_BASES; i++) {
+        if (noncons_counts[i] > max_noncons_count) {
+            max_noncons_count = noncons_counts[i];
+        }
+    }
+
+    /* no need to do anything if no snp bases */
+    if (0==max_noncons_count) {
+        goto free_and_exit;
+    }
+
+    probvec = poissbin(&pvalue, err_probs, num_err_probs,
+                       max_noncons_count, bonf_factor, sig_level);
+
+#if 0
+    for (i=1; i<max_noncons_count+1; i++) {
+        fprintf(stderr, "DEBUG(%s:%s():%d): prob for count %d=%Lg\n",
+                __FILE__, __FUNCTION__, __LINE__,
+                i, expl(probvec[i]));
+    }
+#endif
+
+    if (pvalue * (double)bonf_factor > sig_level) {
+#ifdef DEBUG
+        fprintf(stderr, "DEBUG(%s:%s():%d): Most frequent SNV candidate already gets not signifcant pvalue of %Lg * %lld > %f\n",
+                __FILE__, __FUNCTION__, __LINE__,
+                pvalue, bonf_factor, sig_level);
+#endif
+        goto free_and_exit;
+    }
+
+
+    /* report p-value for each non-consensus base
+     */
+    for (i=0; i<NUM_NONCONS_BASES; i++) {
+        if (0 != noncons_counts[i]) {
+             int errsv;
+             errno = 0;
+             feclearexcept(FE_ALL_EXCEPT);
+
+             pvalue = expl(probvec_tailsum(probvec, noncons_counts[i], max_noncons_count+1));
+
+             errsv = errno;
+             if (errsv || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW)) {
+                  /* failed expl will set pvalue either to 0 or 1,
+                   * both of which is not wanted here: this function
+                   * should never return 0.0 but only just (LDBL_MIN)
+                   * and 1.0 might vreate problems with high Bonf/Sig
+                   * factors so we need to return a high value
+                   * (LDBL_MAX)
+                   */
+                 if (pvalue < DBL_EPSILON) {
+                       pvalue = LDBL_MIN;/* to zero but prevent actual 0 value */
+                  } else {
+                       pvalue = LDBL_MAX; /* otherwise set to 1 which might pass filters */
+                  }
+             }
+            snp_pvalues[i] = pvalue;
+#ifdef DEBUG
+            fprintf(stderr, "DEBUG(%s:%s():%d): i=%d noncons_counts=%d max_noncons_count=%d pvalue=%Lg\n",
+                    __FILE__, __FUNCTION__, __LINE__,
+                    i, noncons_counts[i], max_noncons_count, pvalue);
+#endif
+        }
+    }
+
+ free_and_exit:
+    if (NULL != probvec) {
+        free(probvec);
+    }
+
+    return 0;
+}
+/* snpcaller() */
+
+
+#ifdef SNPCALLER_MAIN
+
+
+/*
+ * gcc -pedantic -Wall -g -std=gnu99 -O2 -DSNPCALLER_MAIN -o snpcaller snpcaller.c utils.c log.c
+ * newer versions need the convoluted
+ * gcc -Wall -g -std=gnu99 -O2 -DSNPCALLER_MAIN [-DUSE_SNPCALLER] -o snpcaller -I../uthash/ -I../libbam/ snpcaller.c utils.c log.c   plp.c samutils.c ../libbam/libbam.a -lm -lz -lpthread -DNDEBUG
+ *
+
+ Could use poibin for testing but parameter choice there is unclear
+
+ library(poibin)
+ # if pnorm is missing also do library(stats)
+
+ pp=c(0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001, 0.001)
+ nerrs = 1
+ # convert to success probabilities
+ pp=1-pp
+ > dpoibin(kk=length(pp)-nerrs, pp=pp)
+ [1] 0.009910359
+ > ppoibin(kk=length(pp)-nerrs, pp=pp)
+ [1] 0.00995512
+ # no approximation seems to work better:
+ > ppoibin(kk=length(pp)-nerrs, pp=pp, method="NA")
+ [1] 4.732391e-07
+
+ ./snpcaller 10 1 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001 0.001num_trials=10 num_errs=1
+ 0.00896408
+
+ ?!
+
+
+
+ $ ./snpcaller 957 9  $(cat eprobs.txt)
+ num_trials=957 num_errs=9
+ 1.77668e-05
+
+ p = read.table('scratch/errprobs')
+ pp = c(p)$V1
+ nerrs = 9
+ > ppoibin(kk=957-9, pp=1-pp)
+ [1] 0.000262769
+ > ppoibin(kk=957-9, pp=1-pp, method="NA")
+ [1] 1.162356e-05
+
+ ?!
+
+ *
+ */
+int main(int argc, char *argv[]) {
+     int num_trials;
+     int num_errs;
+     double *err_probs;
+     int i;
+     const float bonf = 1.0 ;
+     const float sig = 1.0 ;
+
+     verbose = 1;
+
+     if (argc<4) {
+          LOG_FATAL("%s\n", "need: num_trials num_errs p_e1 ... p_en");
+          return -1;
+     }
+
+     num_trials = atoi(argv[1]);
+     num_errs = atoi(argv[2]);
+     if (argc-3 != num_trials) {
+          LOG_FATAL("number of trials (%d) doesn't match number of error probabilities (%d)\n", num_trials, argc-3);
+          exit(1);
+     }
+     err_probs = malloc(sizeof(double) * num_trials);
+     for (i=3; i<argc; i++) {
+          err_probs[i-3] = atof(argv[i]);
+     }
+     LOG_VERBOSE("num_trials=%d num_errs=%d\n", num_trials, num_errs);
+
+
+#ifdef PSEUDO_BINOMIAL
+     {
+          if (-1 == pseudo_binomial(&pvalue,
+                                    num_success, num_trials, succ_prob)) {
+               LOG_ERROR("%s\n", "pseudo_binomial() failed");
+               return -1;
+          }
+          printf("pseudo_binomial: %g\n", pvalue);
+     }
+#endif
+
+
+#ifdef USE_SNPCALLER
+     {
+          long double snp_pvalues[NUM_NONCONS_BASES];
+          int noncons_counts[NUM_NONCONS_BASES];
+          noncons_counts[0] = num_errs;
+          noncons_counts[1] = num_errs-1;
+          noncons_counts[2] = num_errs-2;
+
+          snpcaller(snp_pvalues, err_probs, num_trials, noncons_counts, bonf, sig);
+          printf("prob from snpcaller(): (.. -2:%Lg .. -1:%Lg ..) = %Lg\n", snp_pvalues[2], snp_pvalues[1], snp_pvalues[0]);
+     }
+#else
+     {
+          double *probvec;
+          long double pvalue;
+          probvec = poissbin(&pvalue, err_probs, num_trials,
+                             num_errs, bonf, sig);
+          printf("%Lg\n", pvalue);
+          free(probvec);
+     }
+#endif
+
+     free(err_probs);
+}
+#endif
diff --git a/src/lofreq/snpcaller.h b/src/lofreq/snpcaller.h
new file mode 100644
index 0000000..117505c
--- /dev/null
+++ b/src/lofreq/snpcaller.h
@@ -0,0 +1,103 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef SNPCALLER_H
+#define SNPCALLER_H
+
+#include "vcf.h"
+#include "plp.h"
+#include "defaults.h"
+
+
+
+typedef struct {
+     int min_bq;
+     int min_alt_bq;
+     int def_alt_bq;
+
+     int min_jq;
+     int min_alt_jq;
+     int def_alt_jq;
+
+     int bonf_dynamic; /* boolean: incr bonf as we go along. eventual
+                        * filtering of all has to be done by
+                        * caller! */
+     int min_cov;
+     long long int bonf_subst; /* warning: changed dynamically ! */
+     long long int bonf_indel;
+     float sig;
+     vcf_file_t vcf_out;
+     int flag; /* FIXME doc? */
+
+     /* FIXME the following two logically don't belong her but
+      * would require a new structure */
+     int only_indels; 
+     int no_indels; 
+
+} varcall_conf_t;
+
+
+double
+merge_srcq_baseq_and_mapq(const int sq, const int bq, const int mq);
+
+double
+merge_srcq_baseq_mapq_and_alnq(const int sq, const int bq, const int mq, const int aq);
+
+void
+plp_to_errprobs(double **err_probs, int *num_err_probs, 
+                int *alt_bases, int *alt_counts, int *alt_raw_counts,
+                const plp_col_t *p, varcall_conf_t *conf);
+void 
+plp_to_ins_errprobs(double **err_probs, int *num_err_probs, 
+                    const plp_col_t *p, varcall_conf_t *conf,
+                    char key[MAX_INDELSIZE]);
+
+void 
+plp_to_del_errprobs(double **err_probs, int *num_err_probs, 
+                    const plp_col_t *p, varcall_conf_t *conf,
+                    char key[MAX_INDELSIZE]);
+
+void
+init_varcall_conf(varcall_conf_t *c);
+
+void
+dump_varcall_conf(const varcall_conf_t *c, FILE *stream) ;
+
+
+extern double *
+poissbin(long double *pvalue, const double *err_probs,
+         const int num_err_probs, const int num_failures, 
+         const long long int bonf, const double sig);
+extern int
+snpcaller(long double *snp_pvalues, const double *err_probs,
+          const int num_err_probs, const int *noncons_counts,
+          const long long int bonf_factor,
+          const double sig_level);
+
+
+#endif
diff --git a/src/lofreq/utils.c b/src/lofreq/utils.c
new file mode 100644
index 0000000..c923be3
--- /dev/null
+++ b/src/lofreq/utils.c
@@ -0,0 +1,708 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <limits.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <libgen.h>
+#include <ctype.h>
+
+#include "log.h"
+#include "utils.h"
+
+#ifndef SIZE_MAX
+#define SIZE_MAX ((size_t)-1)
+#endif
+
+#define DIR_SEP "/"
+
+
+
+/* overflow safe int comparison for e.g. qsort.
+ *
+ * a simple return *ia - *ib; can in theory overflow see
+ * http://stackoverflow.com/questions/6103636/c-qsort-not-working-correctly
+ */
+int int_cmp(const void *a, const void *b)
+{
+     const int ia = *(const int *)a;
+     const int ib = *(const int *)b;
+     return ia<ib ? -1 : ia>ib? 1 : 0;
+}
+
+
+int dbl_cmp(const void *a, const void *b)
+{
+     const double da = *(const double *)a;
+     const double db = *(const double *)b;
+
+     /* epsilon stuff needed/working at all? */
+     if (fabs(da-db) < DBL_EPSILON) {
+          return 0;
+     }
+     return da<db ? -1 : da>db? 1 : 0;
+}
+
+
+int str_cmp(const void *a, const void *b)
+{ 
+    return strcmp(*(char **)a, *(char **)b);
+}
+
+
+/* return index for max double in array. will return the lower index
+ * on tie */
+int argmax_d(const double *arr, const int n)
+{
+  int i;
+  int maxidx = 0;
+
+  for (i=0; i<n; i++) {
+       if (arr[i] > arr[maxidx]) {
+            maxidx = i;
+       }
+  }
+  return maxidx;
+}
+
+
+void int_varray_free(int_varray_t *a) 
+{
+    assert(NULL != a);
+
+    free(a->data); /* save even if a->data==NULL */
+    a->data = NULL;
+    a->n = a->alloced = a->grow_by_size = 0;
+}
+
+void int_varray_init(int_varray_t *a, 
+                     const size_t grow_by_size)
+{
+    assert(NULL != a);
+
+    a->n = 0;
+    a->data = NULL;
+    a->grow_by_size = grow_by_size;
+    a->alloced = 0;
+}
+
+void int_varray_add_value(int_varray_t *a, const int value)
+{
+    assert(NULL != a);
+
+    if (a->n * sizeof(int) == a->alloced) {
+        size_t size_to_alloc;        
+        if (1 >=  a->grow_by_size) {
+             assert(SIZE_MAX - a->alloced > a->alloced);
+             size_to_alloc = 0==a->n ? sizeof(int) : a->alloced*2;
+        } else {
+             assert(SIZE_MAX - a->alloced > a->grow_by_size);
+             size_to_alloc = a->alloced + a->grow_by_size;
+        }
+        a->data = realloc(a->data, size_to_alloc);
+        a->alloced = size_to_alloc;
+    }
+    a->data[a->n] = value;
+    a->n++;
+}
+
+
+/* returns 1 of path is directory, otherwise 0 if it's anything else
+ * or if there's permission problem 
+*/
+int is_dir(const char *path)
+{
+     struct stat s;
+     if (stat(path, &s) == 0) {
+          if (s.st_mode & S_IFDIR) {
+               return 1;
+          } else {
+               return 0;
+          }
+     } else {
+          /* error: could check set errno for more details */
+          return 0;
+     }         
+
+}
+
+/* also exists in htslib. see http://en.wikipedia.org/wiki/Weak_symbol */
+#pragma weak file_exists
+int file_exists(const char *fname) 
+{
+     /* from 
+      * http://stackoverflow.com/questions/230062/whats-the-best-way-to-check-if-a-file-exists-in-c-cross-platform 
+      */
+     if (access(fname, F_OK) != -1) {
+          return 1;
+
+     } else {
+          return 0;
+     }
+}
+
+
+/* from http://www.anyexample.com/programming/c/how_to_load_file_into_memory_using_plain_ansi_c_language.xml
+ *
+ * returns file size (number of bytes) on success or negative number
+ * on error
+ * 
+ * warnings:
+ * Function ae_load_file_to_memory returns loaded data size which does not take into account last null-terminate symbol.
+ * If you want to use this function to process string data, note that it may work incorrectly with multibyte encodings.
+ */
+int ae_load_file_to_memory(const char *filename, char **result) 
+{ 
+	int size = 0;
+	FILE *f = fopen(filename, "rb");
+	if (f == NULL) { 
+		*result = NULL;
+		return -1; /* -1 means file opening fail */
+	} 
+	fseek(f, 0, SEEK_END);
+	size = ftell(f);
+	fseek(f, 0, SEEK_SET);
+	*result = (char *)malloc(size+1);
+    if (NULL==result) {
+        return -2;
+    }
+	if (size != fread(*result, sizeof(char), size, f)) {
+		free(*result);
+		return -3; /* -2 means file reading fail  */
+	} 
+	fclose(f);
+	(*result)[size] = 0;
+	return size;
+}
+
+/* count number of lines advise from
+ * http://stackoverflow.com/questions/8689344/portable-end-of-line-newline-in-c:
+ * open in binary mode and count \n. in text mode \n is replaced by a
+ * platform specific ELS.
+ *
+ * Returns value <0 on failure. Otherwise line count.
+ */
+long int
+count_lines(const char *filename)
+{
+    int c;
+    long int count = 0;
+	FILE *f = fopen(filename, "rb");
+
+	if (f == NULL) { 
+		return -1; /* -1 means file opening fail */
+	}
+    while (EOF != (c=getc(f))) {
+        if ('\n'==c) {
+            if (count==LONG_MAX) {
+                LOG_FATAL("%s\n", "count overflow!");
+                return -2;
+            }
+            count++;
+        }
+    }
+    fclose(f);
+    return count;
+}
+/* count_lines */
+
+
+/* returns -1 on error, otherwise number of matches. caller has to
+ * free matches */
+int
+ls_dir(char ***matches, const char *path, const char *pattern,
+       const int sort_lexi)
+{
+    DIR* d = opendir(path);
+    struct dirent *sd = NULL;
+    int num_matches = 0;
+
+    (*matches) = NULL;
+
+    if (d == NULL) {
+        LOG_ERROR("Couldn't open path %s\n", path);
+        return -1;
+    }
+
+    while (NULL != (sd = readdir(d))) {/* readdir not thread safe */
+        int match = 0;
+        if (pattern && strstr(sd->d_name, pattern)) {
+            match = 1;
+        } else if (NULL==pattern) {
+            match = 1;
+        }
+        if (0 == match) {
+            continue;
+        }
+        num_matches += 1;
+
+        (*matches) = realloc((*matches), num_matches*sizeof(char*)); /* FIXME inefficient one by one allocation */
+        if (NULL == (*matches)) {
+            LOG_ERROR("%s\n", "Realloc failed");
+            return -1;
+        }
+        (*matches)[num_matches-1] = calloc(strlen(path) +
+                                        strlen(sd->d_name) +
+                                        1 /*/*/ +1 /*\0*/,
+                                        sizeof(char));
+        sprintf((*matches)[num_matches-1], "%s/%s", path, sd->d_name);
+    }
+    closedir(d);
+
+    if (sort_lexi) {
+        qsort((*matches), num_matches, sizeof(char*), *str_cmp);
+    }
+    return num_matches;
+}
+
+
+
+/* appends dir p2 to p1 and canonicalizes the pathname. returns NULL
+ * on error or if normalized path doesn't exist. will allocate memory
+ * for p1 as needed.
+ */
+char * 
+join_paths(char **p1, const char *p2) {
+     int bufsize;
+     char *buf;
+     char *buf_resolved;
+
+     if (NULL == p1 || NULL == p2) {
+          return NULL;
+     }
+
+     bufsize = strlen(*p1) + 1 + strlen(p2) + 1;
+     if (bufsize < PATH_MAX) {
+          bufsize = PATH_MAX; /* realpath requirement */
+     }
+     buf = malloc(bufsize * sizeof(char));
+     buf_resolved = malloc(bufsize * sizeof(char));
+
+     buf[0] = '\0';
+     (void) strcat(buf, *p1);
+     (void) strcat(buf, DIR_SEP);
+     (void) strcat(buf, p2);
+     if (NULL == realpath(buf, buf_resolved)) {
+#if 0
+          LOG_WARN("Couldn't normalize %s: %s\n",
+                   buf, strerror(errno));
+#endif
+          free(buf_resolved);
+          free(buf);
+          return NULL;
+     } 
+     *p1 = realloc(*p1, (strlen(buf_resolved)+1)*sizeof(char));
+     (void) strcpy(*p1, buf_resolved);
+
+     free(buf_resolved);
+     free(buf);
+
+     return *p1;
+}
+
+
+
+/* taken from
+ * http://www.delorie.com/gnu/docs/glibc/libc_279.html
+ * needed because if readlink's 'return value equals size, you cannot
+ * tell whether or not there was room to return the entire name'.
+ */
+char *
+readlink_malloc(const char *filename)
+{
+     int size = 100;
+     char *buffer = NULL;
+     
+     while (1) {
+          int nchars = readlink(filename, buffer, size);
+          buffer = (char *)realloc(buffer, size);
+          if (nchars < 0) {
+               free(buffer);
+               return NULL;
+          }
+          if (nchars < size) {
+               return buffer;
+          }
+          size *= 2;
+     }
+}
+
+
+/* follows symlinks until resolved and returns realpath. returns NULL
+ * on error, otherwise true path. caller has to free
+ */
+char *
+resolved_path(const char *path)
+{
+     char *resolved_path, *tmp_path;
+     char orig_wd[PATH_MAX];
+
+     if (NULL == getcwd(orig_wd, PATH_MAX)) {
+          return NULL;
+     }
+
+     resolved_path = strdup(path);
+     while (1) {
+          char realpath_buf[PATH_MAX];
+          struct stat stat_buf;
+
+          if (lstat(resolved_path, &stat_buf)) {
+               /*LOG_WARN("%s\n", "lstat() failed");*/
+               free(resolved_path);
+               resolved_path = NULL;
+               goto chdir_and_return;
+          }
+
+          /* done if not a link */
+          if (! S_ISLNK(stat_buf.st_mode)) {
+               /*LOG_FIXME("no more link: %s\n", resolved_path);*/
+               break;
+          }
+
+          /* read link and change to dirname of link */
+          if (NULL == (tmp_path = readlink_malloc(resolved_path))) {
+               LOG_ERROR("%s\n", "readlink() failed.");
+               free(resolved_path);
+               resolved_path = NULL;
+               goto chdir_and_return;
+          }
+          if (-1 == chdir(dirname(resolved_path))) {
+               LOG_ERROR("%s\n", "chdir() failed.");
+               free(tmp_path);
+               free(resolved_path);
+               return NULL;
+          }
+          /*LOG_FIXME("Now in %s\n", dirname(resolved_path));*/
+
+          if (NULL == realpath(tmp_path, realpath_buf)) {
+               LOG_ERROR("realpath failed on %s\n", tmp_path);
+               free(tmp_path);
+               free(resolved_path);
+               resolved_path = NULL;
+               goto chdir_and_return;
+          }
+                    
+          free(tmp_path);
+          free(resolved_path);
+          resolved_path = strdup(realpath_buf);
+     }
+
+chdir_and_return:
+
+     if (-1 == chdir(orig_wd)) {
+          LOG_ERROR("%s\n", "chdir() failed. Trying to continue...");
+     }
+     /*LOG_FIXME("resolved_path is now %s\n", resolved_path);*/
+
+     return resolved_path;
+}
+
+/* FIXME use wirth's method instead for larger arrays 
+ * FIXME Make malloc optional in case input data can be sorted
+ */
+int
+int_median(int data[], int size)
+{
+     int ret;
+     int *sdata;
+
+     if (size==0) {
+          return 0;
+     }
+     sdata = malloc(sizeof(int) * size);
+     memcpy(sdata, data, sizeof(int) * size);
+     qsort(sdata, size, sizeof(int), int_cmp);
+     if (size%2 == 0) {
+          /* even number: return mean of the two elements in the middle */
+          ret = (sdata[size/2] + sdata[size/2 - 1]) / 2.0;
+     } else {
+          /* odd number: return element in middle */
+          ret = sdata[size/2];
+     }
+
+     free(sdata);
+     return ret;
+}
+
+
+
+/* FIXME use wirth's method instead for larger arrays
+ * FIXME Make malloc optional in case input data can be sorted */
+double
+dbl_median(double data[], int size)
+{
+     double ret;
+     double *sdata;
+
+     if (size==0) {
+          return 0.0;
+     }
+     sdata =  malloc(sizeof(double) * size);
+     memcpy(sdata, data, sizeof(double) * size);
+     qsort(sdata, size, sizeof(double), dbl_cmp);
+     if (size%2 == 0) {
+          /* even number: return mean of the two elements in the middle */
+          ret = (sdata[size/2] + sdata[size/2 - 1]) / 2.0;
+     } else {
+          /* odd number: return element in middle */
+          ret = sdata[size/2];
+     }
+     free(sdata);
+     return ret;
+}
+
+
+void chomp(char *s)
+{
+     if (!s) {
+          return;
+     }
+     int end = strlen(s)-1;
+     while (end >= 0 && (s[end]=='\n' || s[end]=='\r')) {
+          s[end]='\0';
+          end = end-1;
+     }
+}
+
+
+
+void
+strstrip(char *str)
+{
+     size_t size;
+
+     fprintf(stderr, "FIXME untested function\n"); exit(1);
+     if (! str) {
+          return;
+     }
+
+     size = strlen(str);
+     if (!size) {
+          return;
+     }
+
+     /* rstrip */
+     while (size>0 && isspace(str[size-1])) {
+          str[--size] = 0;
+     }
+     /* lstrip */
+     while (*str && isspace(*str)) {
+          str++;
+     }
+}
+
+
+/* check if first file is newer (mtime) than second. returns 1 if yes,
+ * 0 if not and -1 on error */
+int
+is_newer(const char *p1, const char *p2)
+{
+     struct stat s1;
+     struct stat s2;
+     int res;
+
+     if (!p1 || !p2) {
+          return -1;
+     }
+     res = stat(p1, &s1);
+     if (res == 0 && !S_ISREG(s1.st_mode)) {
+          /* exists but not regular file */
+          return -1;
+     } else if (res != 0) {
+          return -1;
+     }
+
+     res = stat(p2, &s2);
+     if (res == 0 && !S_ISREG(s2.st_mode)) {
+          /* exists but not regular file */
+          return -1;
+     } else if (res != 0) {
+          /* stat failed */
+          return -1;
+     }
+
+     return s1.st_mtime > s2.st_mtime;
+}
+
+void add_ins_sequence(ins_event **head_ins_counts, char seq[], 
+     int ins_qual, int ins_aln_qual, int ins_map_qual, int ins_source_qual, 
+     int fw_rv) {
+     ins_event *it = NULL;
+     int seq_length = strlen(seq);
+
+     HASH_FIND(hh_ins, *head_ins_counts, seq, seq_length, it);
+     if (it) {
+          it->count += 1;
+          it->cons_quals += ins_qual;
+          
+          it->fw_rv[fw_rv] += 1;
+          
+          int_varray_add_value(& it->ins_quals, ins_qual);
+          int_varray_add_value(& it->ins_aln_quals, ins_aln_qual);
+          int_varray_add_value(& it->ins_map_quals, ins_map_qual);
+          int_varray_add_value(& it->ins_source_quals, ins_source_qual);
+
+     } else {
+          it = malloc(sizeof(ins_event));
+          strncpy((char *)it->key, seq, MAX_INDELSIZE-1);
+          it->count = 1;
+          it->cons_quals = ins_qual;
+          
+          it->fw_rv[0] = it->fw_rv[1] = 0;
+          it->fw_rv[fw_rv] += 1;
+
+          int_varray_init(& it->ins_quals, 0);
+          int_varray_init(& it->ins_aln_quals, 0);
+          int_varray_init(& it->ins_map_quals, 0);
+          int_varray_init(& it->ins_source_quals, 0);
+          
+          int_varray_add_value(& it->ins_quals, ins_qual);
+          int_varray_add_value(& it->ins_aln_quals, ins_aln_qual);
+          int_varray_add_value(& it->ins_map_quals, ins_map_qual);
+          int_varray_add_value(& it->ins_source_quals, ins_source_qual);
+
+          HASH_ADD_KEYPTR(hh_ins, *head_ins_counts, it->key, seq_length, it);
+     }
+}
+
+ins_event * find_ins_sequence(ins_event *const *head_ins_counts, char seq[]) {
+     ins_event *it = NULL;
+     HASH_FIND(hh_ins, *head_ins_counts, seq, strlen(seq), it);
+     return it;
+}
+
+void destruct_ins_event_counts(ins_event **head_ins_counts) {
+     ins_event *it_ins, *it_tmp;
+     HASH_ITER(hh_ins, *head_ins_counts, it_ins, it_tmp) {
+          HASH_DELETE(hh_ins, *head_ins_counts, it_ins);
+          int_varray_free(& it_ins->ins_quals);
+          int_varray_free(& it_ins->ins_aln_quals);
+          int_varray_free(& it_ins->ins_map_quals);
+          int_varray_free(& it_ins->ins_source_quals);
+          free(it_ins);
+     }
+}
+
+void add_del_sequence(del_event **head_del_counts, char seq[], 
+     int del_qual, int del_aln_qual, int del_map_qual, int del_source_qual, 
+     int fw_rv) {
+     del_event *it = NULL;
+     int seq_length = strlen(seq);
+
+     HASH_FIND(hh_del, *head_del_counts, seq, seq_length, it);
+     if (it) {
+          it->count += 1;
+          it->cons_quals += del_qual;
+          
+          it->fw_rv[fw_rv] += 1;
+          
+          int_varray_add_value(& it->del_quals, del_qual);
+          int_varray_add_value(& it->del_aln_quals, del_aln_qual);
+          int_varray_add_value(& it->del_map_quals, del_map_qual);
+          int_varray_add_value(& it->del_source_quals, del_source_qual);
+     
+     } else {
+          it = malloc(sizeof(del_event));
+          strncpy((char *)it->key, seq, MAX_INDELSIZE-1);
+          it->count = 1;
+          it->cons_quals = del_qual;
+          
+          it->fw_rv[0] = it->fw_rv[1] = 0;
+          it->fw_rv[fw_rv] += 1;
+
+          int_varray_init(& it->del_quals, 0);
+          int_varray_init(& it->del_aln_quals, 0);
+          int_varray_init(& it->del_map_quals, 0);
+          int_varray_init(& it->del_source_quals, 0);
+
+          int_varray_add_value(& it->del_quals, del_qual);
+          int_varray_add_value(& it->del_aln_quals, del_aln_qual);
+          int_varray_add_value(& it->del_map_quals, del_map_qual);
+          int_varray_add_value(& it->del_source_quals, del_source_qual);
+
+          HASH_ADD_KEYPTR(hh_del, *head_del_counts, it->key, seq_length, it);
+     }
+}
+
+del_event * find_del_sequence(del_event *const *head_del_counts, char seq[]) {
+     del_event *it = NULL;
+     HASH_FIND(hh_del, *head_del_counts, seq, strlen(seq), it);
+     return it;
+}
+
+void destruct_del_event_counts(del_event **head_del_counts) {
+     del_event *it_del, *it_tmp;
+     HASH_ITER(hh_del, *head_del_counts, it_del, it_tmp) {
+          HASH_DELETE(hh_del, *head_del_counts, it_del);
+          int_varray_free(& it_del->del_quals);
+          int_varray_free(& it_del->del_aln_quals);
+          int_varray_free(& it_del->del_map_quals);
+          int_varray_free(& it_del->del_source_quals);
+          free(it_del);
+     }
+}
+
+void strtoupper(char *s) {
+     for (; *s != '\0'; s++) {
+          *s = toupper(*s);
+     }
+}
+
+
+/* gcc -o utils utils.c log.c -DXMAIN -Wall -ansi -pedantic  */
+#ifdef MEDIAN_MAIN
+int main(int argc, char **argv)
+{
+     int i;
+     double *data;
+
+     data = malloc((argc-1) * sizeof(double));
+     for (i=1; i<argc; i++) {
+          printf("%f\n", atof(argv[i]));
+          data[i-1] = atof(argv[i]);
+     }
+     printf("median = %f\n", dbl_median(data, argc-1));
+     free(data);
+     return 0;
+}
+#endif
+
+#ifdef  NEWER_MAIN
+int main(int argc, char **argv)
+{
+    printf("is_newer %s %s = %d\n", argv[1], argv[2], is_newer(argv[1], argv[2]));
+     return 0;
+}
+#endif
diff --git a/src/lofreq/utils.h b/src/lofreq/utils.h
new file mode 100644
index 0000000..f3a31d6
--- /dev/null
+++ b/src/lofreq/utils.h
@@ -0,0 +1,141 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef UTILS_H
+#define UTILS_H
+
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <uthash.h>
+
+#define MAX_INDELSIZE 256
+
+#define HAS_GZIP_EXT(f)  (strlen(f)>3 && 0==strncmp(& f[strlen(f)-3], ".gz", 3))
+
+
+#define PHREDQUAL_TO_PROB(phred) (phred==INT_MAX ? DBL_MIN : pow(10.0, -1.0*(phred)/10.0))
+
+/* requires that prob comes out of our functions is is never zero! */
+#define PROB_TO_PHREDQUAL(prob) (int)(-10.0 * log10l(prob))
+#define PROB_TO_PHREDQUAL_SAFE(prob) (prob<=0.0 ? INT_MAX : (int)(-10.0 * log10l(prob)))
+
+#define BASECALLQUAL_VALID_RANGE(phred) ((phred)>=0 && (phred)<100)
+
+#define BASENAME(x) strrchr((x), '/') ? strrchr((x), '/')+1 : (x)
+
+int file_exists(const char *fname);
+int  is_dir(const char *path);
+int ae_load_file_to_memory(const char *filename, char **result);
+int int_cmp(const void *a, const void *b);
+int dbl_cmp(const void *a, const void *b);
+int argmax_d(const double *arr, const int n);
+long int count_lines(const char *filename);
+
+typedef struct {
+     unsigned long int n; /* number of elements stored */
+     int *data; /* actual array of data */
+
+     size_t grow_by_size; /* if needed grow array by this value. will double previous size if <=1 */
+     size_t alloced; /* actually allocated size for data */
+} int_varray_t;
+
+void int_varray_add_value(int_varray_t *a, const int value);
+void int_varray_free(int_varray_t *a);
+void int_varray_init(int_varray_t *a, 
+                     const size_t grow_by_size);
+
+int
+ls_dir(char ***matches, const char *path, const char *pattern,
+       const int sort_lexi);
+
+char *
+join_paths(char **p1, const char *p2);
+
+void
+chomp(char *s);
+
+char *
+readlink_malloc(const char *filename);
+
+char *
+resolved_path(const char *path);
+
+double
+dbl_median(double data[], int size);
+
+int
+int_median(int data[], int size);
+void
+strstrip(char *str);
+int
+is_newer(const char *p1, const char *p2);
+
+/* utility hash functions for indel calling */
+
+typedef struct {
+  char key[MAX_INDELSIZE];
+  int count;
+  int cons_quals;
+  int_varray_t ins_quals;
+  int_varray_t ins_aln_quals;
+  int_varray_t ins_map_quals;
+  int_varray_t ins_source_quals;
+  long int fw_rv[2];
+  UT_hash_handle hh_ins;
+} ins_event;
+
+void add_ins_sequence(ins_event **head_ins_count, char seq[], 
+  int ins_qual, int ins_aln_qual, int ins_map_qual, int ins_source_qual, 
+  int fw_rv);
+ins_event *find_ins_sequence(ins_event *const *head_ins_counts, char seq[]);
+void destruct_ins_event_counts(ins_event **head_ins_counts);
+
+typedef struct {
+  char key[MAX_INDELSIZE];
+  int count;
+  int cons_quals;
+  int_varray_t del_quals;
+  int_varray_t del_aln_quals;
+  int_varray_t del_map_quals;
+  int_varray_t del_source_quals;
+  long int fw_rv[2];
+  UT_hash_handle hh_del;
+} del_event;
+
+void add_del_sequence(del_event **head_del_counts, char seq[], 
+  int del_qual, int del_aln_qual, int del_map_qual, int del_source_qual, 
+  int fw_rv);
+del_event * find_del_sequence(del_event *const *head_del_counts, char seq[]);
+void destruct_del_event_counts(del_event **head_del_counts);
+
+void
+strtoupper(char *s);
+
+
+#endif
diff --git a/src/lofreq/vcf.c b/src/lofreq/vcf.c
new file mode 100644
index 0000000..33632ad
--- /dev/null
+++ b/src/lofreq/vcf.c
@@ -0,0 +1,941 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+
+/* NOTE: this is by no means a generic vcf parser, since many
+ * functions depends on the properties/format of your variants. Here,
+ * we only use whatever is needed inside LoFreq
+ */
+
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include "htslib/bgzf.h"
+#include "htslib/kstring.h"
+#include "htslib/kseq.h"
+#include "htslib/tbx.h"
+
+#include "uthash.h"
+
+#include "log.h"
+#include "utils.h"
+#include "vcf.h"
+#include "defaults.h"
+
+#define LINE_BUF_SIZE 1<<12
+
+
+/* this is the actual header. all the other stuff is actually called meta-info 
+ * note, newline character is missing here
+ */
+const char *VCF_HEADER = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
+
+
+
+void 
+var_hash_free_table(var_hash_t *var_hash)
+{
+     var_hash_t *cur, *tmp;
+     if (NULL == var_hash) {
+          return;
+     }
+     HASH_ITER(hh, var_hash, cur, tmp) {
+#ifdef TRACE
+          LOG_ERROR("Freeing %s\n", cur->key);
+#endif
+          HASH_DEL(var_hash, cur);
+          var_hash_free_elem(cur);
+     }
+}
+
+
+void
+var_hash_free_elem(var_hash_t *hash_elem_ptr) 
+{
+     vcf_free_var(& hash_elem_ptr->var);
+     free(hash_elem_ptr->key);
+     free(hash_elem_ptr);
+}
+
+
+/* FIXME key and var will not be copied, i.e. don't free for now */
+void
+var_hash_add(var_hash_t **var_hash, char *key, var_t *var)
+{
+    var_hash_t *vh_elem = NULL;
+    var_hash_t *match = NULL;
+
+    HASH_FIND_STR((*var_hash), key, match);
+    if (match) {
+         LOG_DEBUG("Already got a variant match for key '%s'. Will keep the old one.\n", key);
+         return;
+    }
+
+    vh_elem = (var_hash_t *) malloc(sizeof(var_hash_t));
+    vh_elem->key = key;
+    vh_elem->var = var;
+    /* FIXME should we test for existance first? */
+    HASH_ADD_KEYPTR(hh, (*var_hash), vh_elem->key, strlen(vh_elem->key), vh_elem);
+}
+
+
+/* key is allocated here and has to be freed by called */
+void
+vcf_var_key(char **key, var_t *var)
+{
+     int bufsize = strlen(var->chrom)+16;
+     assert(var->ref && var->alt);
+     (*key) = malloc(bufsize *sizeof(char));
+     snprintf(*key, bufsize, "%s %ld %s %s", var->chrom, var->pos+1, 
+              var->ref, var->alt);
+     /* pos+1 make terminal output easier */
+}
+
+/* as above but only using chrom and pos */
+void
+vcf_var_key_pos_only(char **key, var_t *var)
+{
+     int bufsize = strlen(var->chrom)+16;
+     (*key) = malloc(bufsize *sizeof(char));
+     snprintf(*key, bufsize, "%s %ld", var->chrom, var->pos+1);
+     /* pos+1 make terminal output easier */     
+}
+
+
+int vcf_printf(vcf_file_t *f, char *fmt, ...)
+{
+     /* sadly there is no gzvprintf */
+     char buf[64000];/* needs to be able to hold header */
+     va_list args;
+     int len;
+
+     va_start(args, fmt);
+     len = vsnprintf(buf, 64000, fmt, args);    
+     va_end(args);
+
+     if (len>=64000) {
+          LOG_WARN("%s\n", "Truncated vcf_printf");
+     }
+     if (f->is_bgz) {
+          return bgzf_write(f->fh_bgz, buf, strlen(buf));
+     } else {
+          return fputs(buf, f->fh);
+     }
+}
+
+int
+vcf_file_seek(vcf_file_t *f, long int offset, int whence) 
+{
+     if (f->is_bgz) {
+          return bgzf_seek(f->fh_bgz, offset, whence);
+     } else {
+          return fseek(f->fh, offset, whence);
+     }
+}
+
+
+/* returns 0 on success. non-zero otherwise */
+int
+vcf_file_open(vcf_file_t *f, const char *path, const int bgzip, char mode) 
+{
+     if (mode!='r' && mode!='w') {
+          LOG_FATAL("Internal error: unknown mode %c\n", mode);
+          return -1;
+     }
+
+     if (path[0] != '-' && mode=='r') {
+          if (! file_exists(path) || is_dir(path)) {
+               LOG_ERROR("VCF file %s does not exist\n", path);
+               return -1;
+          }
+     }
+
+     f->path = strdup(path);
+     f->mode =mode;
+     
+     if (bgzip) {
+          if (path[0] == '-') {
+               LOG_FIXME("%s\n", "bgzip support for stdin/stdout not implemented yet");
+               return -1;
+          }
+          f->is_bgz = 1;
+          f->fh = NULL;
+          if (mode=='r') {
+               f->fh_bgz = bgzf_open(path, "rb");
+          } else if (mode=='w') {
+               f->fh_bgz = bgzf_open(path, "wb");
+          }
+
+     } else {
+          f->is_bgz = 0;
+          f->fh_bgz = NULL;
+          if (mode=='r') {
+               if (path[0] == '-') {
+                    f->fh = stdin;
+               } else {
+                    f->fh = fopen(path, "r");
+               }
+          } else if (mode=='w') {
+               if (path[0] == '-') {
+                    f->fh = stdout;
+               } else {
+                    f->fh = fopen(path, "w");
+               }
+          }
+     }     
+
+     if (! f->fh && ! f->fh_bgz) {
+          return -1;
+     } else {
+          return 0;
+     }
+}
+
+
+/* note: tries to tabix index and also frees path */
+int
+vcf_file_close(vcf_file_t *f) 
+{
+     int rc = 0;
+     if (f->is_bgz) {          
+          rc = bgzf_close(f->fh_bgz);
+          if (rc==0 && f->mode=='w' && f->path && f->path[0] != '-') {
+               int min_shift = -1;
+               tbx_conf_t conf = tbx_conf_vcf;
+               rc = tbx_index_build(f->path, min_shift, &conf);
+               if (rc) {
+                    LOG_WARN("indexing of %s failed\n", f->path);
+               }
+          }
+     } else {
+          if (f->fh!=stdout) {
+               rc = fclose(f->fh);
+          } else {
+               rc = 0;
+          }
+     }
+     free(f->path);
+     return rc;
+}
+
+
+/* returns NULL on error or EOF */
+char *
+vcf_file_gets(vcf_file_t *f, int len, char *line) 
+{
+     if (f->is_bgz) {
+          kstring_t str = {0, 0, 0};
+          if (bgzf_getline(f->fh_bgz, '\n', &str) > 0) {
+               /* will get errors like
+                  [E::get_intv] failed to parse TBX_VCF, was wrong -p [type] used?
+                  The offending line was: "19,0,1"
+                  on just gzipped data. not sure how to catch this. the following is a paranoia check
+               */
+               if (str.l<1) {
+                    return NULL;
+               }
+               strncpy(line, str.s, len-2);
+               /* behave like fgets and keep newline */
+               line[strlen(line)] = '\n';
+               line[strlen(line)+1] = '\0';
+               
+               free(str.s);
+               return line;
+          } else {
+               return NULL;
+          }
+
+     } else {
+          return fgets(line, len, f->fh);
+     }
+}
+
+
+int vcf_var_filtered(const var_t *var)
+{
+     if (! var->filter) {
+          return 0;
+     } else if (0 == strcmp(var->filter, VCF_MISSING_VAL_STR)) {
+          return 0;
+     } else if (strlen(var->filter)>=4 && 0 == strcmp(var->filter, "PASS")) {
+          return 0;
+     } else {
+          return 1;
+     }
+}
+
+int vcf_var_is_indel(const var_t *var)
+{
+     if (strlen(var->ref)>1 ||
+         strlen(var->alt)>1 ||
+         vcf_var_has_info_key(NULL, var, "INDEL")) {
+          return 1;
+     } else {
+          return 0;
+     }
+}
+
+/* value for key will be stored in value if not NULL. value will NULL
+ * if not found. Otherwise its allocated here and caller must free.
+ * FIXME shoddily written and we should use a hash for info key:val
+ * pairs anyway */
+int
+vcf_var_has_info_key(char **value, const var_t *var, const char *key) {
+     const char field_delimiter[] = ";";
+     char *token;
+     char *info;
+     char *info_ptr;
+
+     if (value) {
+          (*value) = NULL;
+     }
+
+     if (! var->info || ! key) {
+          return 0;
+     }
+     if (strlen(var->info)<2) {
+          return 0;
+     }
+     info = strdup(var->info);
+     if (! info) {
+          LOG_FATAL("%s\n", "insufficient memory");
+          exit(1);
+     }
+     info_ptr = info;
+     token = info;
+     /* note: strsep modifies ptr. see also
+      * http://stackoverflow.com/questions/21383082/parsing-a-string-in-c-with-strsep-alternative-methods */
+     while (token) {
+          strsep(&info_ptr, field_delimiter);
+          /*fprintf(stderr, "token=%s key=%s\n", token, key);*/
+          if (0 == strncasecmp(key, token, MIN(strlen(token), strlen(key)))) {
+               if (value) {
+                    char *s = strchr(token, '=');
+                    if (NULL != s) {
+                         (*value) = strdup(s+1);
+                    }
+               }
+               free(info);
+               return 1;
+          }
+          token = info_ptr;
+     }
+
+     free(info);
+     return 0;
+}
+
+
+void vcf_new_var(var_t **var)
+{
+     (*var) = malloc(sizeof(var_t));
+     (*var)->chrom = NULL;
+     (*var)->pos = -1;
+     (*var)->id = NULL;
+     (*var)->ref = NULL;
+     (*var)->alt = NULL;
+     (*var)->qual = -1; /* -1 == missing */
+     (*var)->filter = NULL;
+     (*var)->info = NULL;
+
+     (*var)->format = NULL;
+     (*var)->num_samples = 0;
+     (*var)->samples = NULL;
+}
+
+
+void vcf_free_var(var_t **var)
+{
+     int i;
+
+     if (NULL == (*var)) {
+          return;
+     }
+
+     free((*var)->chrom);
+     free((*var)->id);
+     free((*var)->ref);
+     free((*var)->alt);
+     free((*var)->filter);
+     free((*var)->info);
+
+     free((*var)->format);
+     for (i=0; i<(*var)->num_samples; i++) {
+          free((*var)->samples[i]);
+     }
+     free((*var)->samples);
+
+     free(*var);
+}
+
+void vcf_cp_var(var_t **dest, var_t *src)
+{
+     int i;
+     vcf_new_var(dest);
+     (*dest)->chrom = strdup(src->chrom);
+     (*dest)->pos = src->pos;
+     if (src->id) {
+          (*dest)->id = strdup(src->id);
+     }
+     if (src->ref) {
+          (*dest)->ref = strdup(src->ref);
+     }
+     if (src->alt) {
+          (*dest)->alt = strdup(src->alt);
+     }
+     (*dest)->qual = src->qual;
+     if (src->filter) {
+          (*dest)->filter = strdup(src->filter);
+     }
+     if (src->info) {
+          (*dest)->info = strdup(src->info);
+     }
+     if (src->format) {
+          (*dest)->format = strdup(src->format);
+     }
+     (*dest)->num_samples = src->num_samples;
+     if (src->num_samples>0) {
+          (*dest)->samples = malloc(src->num_samples * sizeof(char*));
+          for (i=0; i<src->num_samples; i++) {
+               (*dest)->samples[i] = strdup(src->samples[i]);
+          }
+     }
+}
+
+void vcf_write_var(vcf_file_t *vcf_file, const var_t *var)
+{
+     /* in theory all values are optional */
+
+     vcf_printf(vcf_file, "%s\t%ld\t%s\t%s\t%s\t",
+             NULL == var->chrom ? VCF_MISSING_VAL_STR : var->chrom,
+             var->pos + 1,
+             NULL == var->id ? VCF_MISSING_VAL_STR : var->id,
+             var->ref,
+             var->alt);
+     if (var->qual>-1) {
+          vcf_printf(vcf_file, "%d", var->qual);
+     } else {
+          vcf_printf(vcf_file, "%c", VCF_MISSING_VAL_CHAR);
+     }
+
+     vcf_printf(vcf_file, "\t%s\t%s",
+             var->filter ? var->filter : VCF_MISSING_VAL_STR,
+             var->info ? var->info : VCF_MISSING_VAL_STR);
+
+     if (var->format) {
+          int i=0;
+          vcf_printf(vcf_file, "\t%s", var->format);
+          for (i=0; i<var->num_samples; i++) {
+               vcf_printf(vcf_file, "\t%s", var->samples[i]);
+          }
+     }
+     vcf_printf(vcf_file, "\n");
+}
+
+
+char *
+vcf_var_add_to_info(var_t *var, const char *info_str)
+{
+     if (!var || !info_str) {
+          return NULL;
+     }
+     var->info = realloc(var->info,
+                         (strlen(var->info) + strlen(info_str)
+                          + 1/*;*/ + 1/*\0*/) * sizeof(char));
+     if (!var->info) {
+          return NULL;
+     }
+     if (strlen(var->info)) {
+          if (0 == strcmp(var->info, VCF_MISSING_VAL_STR)) {
+               var->info[0] = '\0';
+          } else {
+               (void) strcat(var->info, ";");
+          }
+     }
+     (void) strcat(var->info, info_str);
+     return var->info;
+}
+
+char *
+vcf_var_add_to_filter(var_t *var, const char *filter_name)
+{
+     if (! filter_name || ! var) {
+          return NULL;
+     }
+     if (var->filter) {
+          /* clear field, if PASSED or missing  */
+          if ((strlen(var->filter)>=4 && 0 == strcmp(var->filter, "PASS"))
+              ||
+              (strlen(var->filter) && var->filter[0] == VCF_MISSING_VAL_CHAR)) {
+               free(var->filter);
+               var->filter = NULL;
+          }
+     }
+
+     if (! var->filter) {/* could have been freed above so don't else if */
+          var->filter = malloc(1 * sizeof(char));
+          var->filter[0] = '\0';
+     }
+
+     /* realloc */
+     if (var->filter) {
+          var->filter = realloc(var->filter,
+                                (strlen(var->filter) + strlen(filter_name)
+                                + 1/*;*/ + 1/*\0*/) * sizeof(char));
+     }
+     if (! var->filter) {
+          fprintf(stderr, "FATAL: couldn't allocate memory at %s:%s():%d\n",
+                  __FILE__, __FUNCTION__, __LINE__);
+          return NULL;
+     }
+
+     /* add */
+     if (strlen(var->filter)) {
+          (void) strcat(var->filter, ";");
+     }
+     (void) strcat(var->filter, filter_name);
+
+     return var->filter;
+}
+
+
+int vcf_get_dp4(dp4_counts_t *dp4, var_t *var)
+{
+     const char delimiter[] = ",";
+     char *token;
+     char *dp4_char = NULL;
+     char *dp4_char_cp;
+     int i = 0;
+
+     if ( ! vcf_var_has_info_key(&dp4_char, var, "DP4")) {
+          memset(dp4, -1, sizeof(dp4_counts_t)); /* -1 = error */
+          return 1;
+     }
+     /* note: strsep modifies ptr */
+     dp4_char_cp = strdup(dp4_char);
+     free(dp4_char);
+     dp4_char = dp4_char_cp;
+
+     i = 0;
+     /* note: strsep modifies ptr */
+     while (NULL != (token = strsep(& dp4_char, delimiter))) {
+          int val = strtol(token, (char **) NULL, 10); /* = atoi */
+          if (i==0) {
+               dp4->ref_fw = val;
+          } else if (i==1) {
+               dp4->ref_rv = val;
+          } else if (i==2) {
+               dp4->alt_fw = val;
+          } else if (i==3) {
+               dp4->alt_rv = val;
+          }
+          i += 1;
+     }
+     free(dp4_char_cp);
+     if (i != 4) {
+          memset(dp4, -1, sizeof(dp4_counts_t)); /* -1 = error */
+          return 1;
+     }
+     return 0;
+}
+
+
+/* var->info allocated here. caller has to free */
+void vcf_var_sprintf_info(var_t *var,
+                          const int dp, const float af, const int sb,
+                          const dp4_counts_t *dp4,
+                          const int indel, const int hrun, 
+                          const int consvar)
+{
+     char buf[LINE_BUF_SIZE];
+     snprintf(buf, sizeof(buf)-32, /* leave some for INDEL and other flags below */
+              "DP=%d;AF=%f;SB=%d;DP4=%d,%d,%d,%d",
+              dp, af, sb, dp4->ref_fw, dp4->ref_rv, dp4->alt_fw, dp4->alt_rv);
+     if (indel) {
+          sprintf(buf, "%s;INDEL", buf);
+          sprintf(buf, "%s;HRUN=%d", buf, hrun);
+     }
+     if (consvar) {
+          sprintf(buf, "%s;CONSVAR", buf);
+     }
+
+     var->info = strdup(buf);
+
+     /* FIXME format and samples not supported */
+}
+
+
+void vcf_write_header(vcf_file_t *vcf_file, const char *header)
+{
+#if 0
+     fprintf(stderr, "TMP DEBUG: writing header %s", header);
+     fprintf(stderr, "TMP DEBUG: vcf_file path = %s\n", vcf_file->path);
+     fprintf(stderr, "TMP DEBUG: vcf_file is_bgz = %d\n", vcf_file->is_bgz);
+     fprintf(stderr, "TMP DEBUG: vcf_file fh = %p\n", vcf_file->fh);
+     fprintf(stderr, "TMP DEBUG: vcf_file fh_bgz = %p\n", vcf_file->fh_bgz);
+     fprintf(stderr, "TMP DEBUG: vcf_file mode = %c\n", vcf_file->mode);
+#endif
+     vcf_printf(vcf_file, "%s", header);
+}
+
+
+/* src can either be the program or the command. that's at least what
+ * the vcftools folks do as well.
+ */
+void vcf_write_new_header(vcf_file_t *vcf_file, const char *src, const char *reffa)
+{
+     char tbuf[9];
+     struct tm tm;
+     time_t t;
+
+     t = time(0);
+     localtime_r(&t, &tm);
+     strftime(tbuf, 9, "%Y%m%d", &tm);
+
+     vcf_printf(vcf_file, "##fileformat=VCFv4.0\n");
+     vcf_printf(vcf_file, "##fileDate=%s\n", tbuf);
+     if (src) {
+          vcf_printf(vcf_file, "##source=%s\n", src);
+     }
+     if (reffa) {
+          vcf_printf(vcf_file, "##reference=%s\n", reffa);
+     }
+     vcf_printf(vcf_file, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw Depth\">\n");
+     vcf_printf(vcf_file, "##INFO=<ID=AF,Number=1,Type=Float,Description=\"Allele Frequency\">\n");
+     vcf_printf(vcf_file, "##INFO=<ID=SB,Number=1,Type=Integer,Description=\"Phred-scaled strand bias at this position\">\n");
+     vcf_printf(vcf_file, "##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"Counts for ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\">\n");
+     vcf_printf(vcf_file, "##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">\n");
+     vcf_printf(vcf_file, "##INFO=<ID=CONSVAR,Number=0,Type=Flag,Description=\"Indicates that the variant is a consensus variant (as opposed to a low frequency variant).\">\n");
+     vcf_printf(vcf_file, "##INFO=<ID=HRUN,Number=1,Type=Integer,Description=\"Homopolymer length to the right of report indel position\">\n");
+     vcf_printf(vcf_file, "%s\n", VCF_HEADER);
+}
+
+
+/* parse header, i.e. meta info until and including header from vcf
+ * file. will allocate memory for header. caller has to free. returns
+ * 0 on success. -1 on failure on which a minimal header is set anyway
+ * and you should rewind.
+ */
+int vcf_parse_header(char **header, vcf_file_t *vcf_file)
+{
+     char line[LINE_BUF_SIZE];
+     const int MAX_HEADER_LEN = 10000;
+     int line_no = 0;
+
+     /* make sure strlen below will work on header */
+     (*header) = malloc(sizeof(char));
+     (*header)[0] = '\0';
+
+     line_no = 0;
+     while (1) {
+          char *rc = vcf_file_gets(vcf_file, sizeof(line), line);
+          if (++line_no>MAX_HEADER_LEN) {
+               break;
+          }
+          if (NULL == rc) {
+               break;
+          }
+#if 0
+          fprintf(stderr, "Got line %s\n", line);
+#endif
+          (*header) = realloc((*header), (strlen(*header) + strlen(line) + 1 /* '\0' */) * sizeof(char));
+          (void) strcat((*header), line);
+          if (strlen(line) >= strlen(VCF_HEADER)) {
+               if (0 == strncmp(line, VCF_HEADER, strlen(VCF_HEADER))) {
+                    return 0;
+               }
+          }
+     }
+     /* failed. set default header */
+     (*header) = realloc((*header), (strlen(VCF_HEADER) + 1 + 1 /* \n+\0 */) * sizeof(char));
+     (void) strcpy(*header, VCF_HEADER);
+     (void) strcat(*header, "\n");
+
+     return -1;
+}
+
+
+int vcf_skip_header(vcf_file_t *vcf_file)
+{
+     char *vcf_header;
+     if (0 !=  vcf_parse_header(&vcf_header, vcf_file)) {
+          if (vcf_file_seek(vcf_file, 0, SEEK_SET)) {
+               LOG_FATAL("%s\n", "Couldn't rewind file to parse variants"
+                        " after header parsing failed");
+              return -1;
+         }
+     } else {
+          free(vcf_header);
+     }
+     return 0;
+}
+
+
+int vcf_parse_var_from_line(char *line, var_t *var)
+{
+     const char delimiter[] = "\t";
+     char *token;
+     char *line_ptr;
+     int field_no = 0;
+     char *line_backup;
+
+     chomp(line);
+     line_ptr = line;
+     line_backup = strdup(line);
+#if 0
+     LOG_DEBUG("parsing line: %s\n", line);
+#endif
+
+     /* note: strsep modifies line_ptr */
+     while (NULL != (token = strsep(&line_ptr, delimiter))) {
+          field_no+=1;
+          if (1 == field_no) {
+               var->chrom = strdup(token);
+
+          } else if (2 == field_no) {
+               var->pos = atol(token)-1;
+
+          } else if (3 == field_no) {
+               var->id = strdup(token);
+
+          } else if (4 == field_no) {
+               var->ref = strdup(token);
+
+          } else if (5 == field_no) {
+               var->alt = strdup(token);
+
+          } else if (6 == field_no) {
+               if (token[0]==VCF_MISSING_VAL_CHAR) {
+                    var->qual = -1;
+               } else {
+                    var->qual = atoi(token);
+               }
+
+          } else if (7 == field_no) {
+               var->filter = strdup(token);
+
+          } else if (8 == field_no) {
+               var->info = strdup(token);
+          } else if (9 == field_no) {
+               var->format = strdup(token);
+
+          } else if (field_no > 9) {
+               assert(field_no-10 == var->num_samples);
+               var->num_samples += 1;
+               var->samples = realloc(var->samples, var->num_samples * sizeof(char*));
+               var->samples[var->num_samples-1] = strdup(token);
+          }
+     }
+     if (field_no<5) {
+          LOG_WARN("Parsing of variant incomplete. Only got %d fields. Need at least 5 (line=%s)\n", field_no, line_backup);
+          return -1;
+     }
+     /* allow lenient parsing and fill in missing values*/
+     if (field_no<8) {
+          /* 6-8: qual, filter, info with qual already set */
+          var->filter = calloc(2, sizeof(char));
+          var->filter[0] = VCF_MISSING_VAL_CHAR;
+          var->info = calloc(2, sizeof(char));
+          var->info[0] = VCF_MISSING_VAL_CHAR;
+     }
+
+     free(line_backup);
+
+     return 0;
+}
+
+
+/* parse one variant from stream. returns -1 on error or EOF.
+ * note, multi-allelic entries are not treated specially. returns non-null on error
+ */
+int vcf_parse_var(vcf_file_t *vcf_file, var_t *var)
+{
+     char line[LINE_BUF_SIZE];
+     char *rc;
+
+     rc = vcf_file_gets(vcf_file, sizeof(line), line);
+     if (NULL == rc) {
+          return -1;
+     }
+     return vcf_parse_var_from_line(line, var);
+}
+
+
+/* parse all variants from stream and return number of parsed vars or
+ * -1 on error. memory for vars will be allocated here.
+ */
+int vcf_parse_vars(var_t ***vars, vcf_file_t *vcf_file, int only_passed)
+{
+     int rc;
+     int num_vars = 0;
+
+     (*vars) = malloc(1 * sizeof(var_t*));
+
+     while (1) { 
+          var_t *var;
+          vcf_new_var(&var);
+          rc = vcf_parse_var(vcf_file, var);
+          if (rc) {
+               /* would be nice to distinguish between eof and error */
+               free(var);
+               break;
+          }
+
+          if (only_passed==1) {
+               if (vcf_var_filtered(var)) {
+                    vcf_free_var(&var);
+                    continue;
+               }
+          }
+          num_vars += 1;
+          (*vars) = realloc((*vars), num_vars * sizeof(var_t*));
+          (*vars)[num_vars-1] = var;
+          if (verbose && num_vars && num_vars%1000000==0) {
+               LOG_VERBOSE("Still alive and happily parsing var %d\n", num_vars);
+          }
+#if 0
+          LOG_DEBUG("(*vars)[num_vars-1 = %d] = \n", num_vars-1);
+          vcf_write_var(stderr, (*vars)[num_vars-1]);
+#endif
+     }
+
+     return num_vars;
+}
+
+
+/* info needs to be terminated with a newline character */
+void vcf_header_add(char **header, const char *info)
+{
+     char *token;
+     int pos;
+
+     /* make sure to insert before VCF_HEADER */
+
+     token = strstr(*header, VCF_HEADER);
+     if (! token) {
+          LOG_WARN("%s\n", "Can't add info to empty header, because header line is missing");
+          return;
+     }
+     pos = (int)(token - (*header));
+
+     *header = realloc(*header, (strlen(*header) + strlen(info) + 1) * sizeof(char));
+
+#if 0
+     LOG_FIXME("header-len=%d; info len=%d; alloc=%d; pos=%d\n",
+        strlen(*header), strlen(info), (strlen(*header) + strlen(info) + 1), pos);
+#endif
+
+     (*header)[pos] = '\0'; /* can't just: token[0] = '\0'; since that would work on a copy?! */
+     (void) strcat(*header, info);
+     (void) strcat(*header, VCF_HEADER);
+     (void) strcat(*header, "\n");
+     return;
+}
+
+
+
+#ifdef VCF_MAIN
+
+/*
+gcc  -Wall -g -std=gnu99 -O2 -DVCF_MAIN -o vcf_main vcf.c utils.c log.c -lz
+valgrind --tool=memcheck --leak-check=full --show-reachable=yes --track-origins=yes ./vcf_main example.vcf
+*/
+int main(int argc, char *argv[]) {
+     char *header;
+     var_t **vars = NULL;
+     vcf_file_t vcf_file_in, vcf_file_out;
+     int num_vars = 0;
+     int i;
+     char *path_in, *path_out;
+     int gzip_in, gzip_out = 0;
+#if 0
+     debug = 1;
+     verbose = 1;
+#endif
+
+
+     if (argc < 3) {
+          LOG_FATAL("%s\n", "Need two args: vcf-in vcf-out");
+          return 1;
+     }
+     path_in = argv[1];
+     path_out = argv[2];
+     
+     if (HAS_GZIP_EXT(path_in)) {
+          gzip_in = 1;
+     } else {
+          gzip_in = 0;
+     }
+     LOG_INFO("Using %s (%s gzipped)\n", path_in, gzip_in ? "is" : "not");
+     if (vcf_file_open(& vcf_file_in, path_in, gzip_in, 'r')) {
+          LOG_FATAL("%s\n", "vcf_file_open() failed");
+          exit(1);
+     }
+
+     if (HAS_GZIP_EXT(path_out)) {
+          gzip_out = 1;
+     } else {
+          gzip_out = 0;
+     }
+     LOG_INFO("Using %s (%s gzipped)\n", path_out, gzip_out ? "is" : "not");
+     if (vcf_file_open(& vcf_file_out, path_out, gzip_out, 'w')) {
+          LOG_FATAL("%s\n", "vcf_file_open() failed");
+          exit(1);
+     }
+
+
+     if (0 !=  vcf_parse_header(&header, & vcf_file_in)) {
+          LOG_FATAL("%s\n", "vcf_parse_header() failed");
+          free(header);
+          return 1;
+     }
+     vcf_write_new_header(& vcf_file_out, NULL, NULL);
+     free(header);
+
+
+     num_vars = vcf_parse_vars(& vcf_file_in, &vars);
+     for (i=0; i<num_vars; i++) {
+          vcf_write_var(& vcf_file_out, vars[i]);
+     }
+
+     for (i=0; i<num_vars; i++) {
+          vcf_free_var(& vars[i]);
+     }
+     free(vars);
+
+     vcf_file_close(& vcf_file_in);
+     vcf_file_close(& vcf_file_out);
+
+     LOG_VERBOSE("%s\n", "successful exit");
+
+     return 0;
+}
+#endif
diff --git a/src/lofreq/vcf.h b/src/lofreq/vcf.h
new file mode 100644
index 0000000..52d6087
--- /dev/null
+++ b/src/lofreq/vcf.h
@@ -0,0 +1,130 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef VCF_H
+#define VCF_H
+
+#include <stdarg.h>
+
+#include "htslib/bgzf.h"
+/*#include "zlib.h"*/
+#include "uthash.h"
+
+
+typedef struct {
+     char *path;
+     int is_bgz;
+     FILE *fh;
+     BGZF *fh_bgz;
+     char mode;
+} vcf_file_t;
+
+typedef struct {
+     char *chrom;
+     long int pos; /* zero offset */
+     char *id;
+     char *ref;
+     char *alt;
+     int qual;
+     char *filter;
+     char *info;
+
+     /* genotyping info (not used in lofreq) */
+     char *format;
+     int num_samples;
+     char **samples;
+} var_t;
+
+typedef struct {
+     int ref_fw;
+     int ref_rv;
+     int alt_fw;
+     int alt_rv;
+} dp4_counts_t;
+
+
+typedef struct {
+     char *key; /* according to uthash doc this should be const but then we can't free it */
+     var_t *var;
+     UT_hash_handle hh;
+} var_hash_t;
+
+
+void var_hash_add(var_hash_t **var_hash, char *key, var_t *var);
+void var_hash_free_elem(var_hash_t *hash_elem_ptr);
+void  var_hash_free_table(var_hash_t *var_hash);
+
+
+#define VCF_MISSING_VAL_STR "."
+#define VCF_MISSING_VAL_CHAR VCF_MISSING_VAL_STR[0]
+
+
+#define VCF_VAR_PASSES(v) ((v)->filter[0]==VCF_MISSING_VAL_CHAR || 0==strncmp((v)->filter, "PASS", 4))
+
+
+
+int
+vcf_file_seek(vcf_file_t *f, long int offset, int whence);
+int
+vcf_file_open(vcf_file_t *f, const char *path, const int gzip, const char mode);
+int
+vcf_file_close(vcf_file_t *f);
+char *
+vcf_file_gets(vcf_file_t *f, int len, char *line);
+int
+vcf_printf(vcf_file_t *f, char *fmt, ...);
+
+int vcf_get_dp4(dp4_counts_t *dp4, var_t *var);
+
+void vcf_new_var(var_t **var);
+void vcf_free_var(var_t **var);
+void vcf_cp_var(var_t **dest, var_t *src);
+
+void vcf_var_key(char **key, var_t *var);
+void vcf_var_key_pos_only(char **key, var_t *var);
+
+int vcf_parse_header(char **header, vcf_file_t *vcf_file);
+int vcf_skip_header(vcf_file_t *vcf_file);
+int vcf_parse_var_from_line(char *line, var_t *var);
+int vcf_parse_var(vcf_file_t *vcf_file, var_t *var);
+int vcf_parse_vars(var_t ***vars, vcf_file_t *vcf_file, int only_passed);
+
+int vcf_var_is_indel(const var_t *var);
+int vcf_var_has_info_key(char **value, const var_t *var, const char *key);
+int vcf_var_filtered(const var_t *var);
+char *vcf_var_add_to_filter(var_t *var, const char *filter_name);
+char *vcf_var_add_to_info(var_t *var, const char *info_str);
+void vcf_var_sprintf_info(var_t *var,
+                          const int dp, const float af, const int sb,
+                          const dp4_counts_t *dp4,
+                          const int is_indel, const int hrun, const int is_consvar);
+void vcf_write_var(vcf_file_t *vcf_file, const var_t *var);
+void vcf_write_header(vcf_file_t *vcf_file, const char *header);
+void vcf_write_new_header(vcf_file_t *vcf_file, const char *srcprog, const char *reffa);
+void vcf_header_add(char **header, const char *info);
+#endif
diff --git a/src/lofreq/viterbi.c b/src/lofreq/viterbi.c
new file mode 100644
index 0000000..ebf3dc1
--- /dev/null
+++ b/src/lofreq/viterbi.c
@@ -0,0 +1,358 @@
+/* -*- c-file-style: "k&r"; indent-tabs-mode: nil; -*- */
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "viterbi.h"
+#include "utils.h"
+
+#define PHRED_TO_SANGERQUAL(i) ((char)(i)+33)
+#define SANGERQUAL_TO_PHRED(c) ((int)(c)-33)
+#define SANGERQUAL_TO_PROB(c)  (pow(10.0, -0.1*SANGERQUAL_TO_PHRED(c)))
+
+const int BQ2_DEFAULT = 20;
+
+#ifdef PACBIO_REALN
+static int pacbio_msg_printed = 0;
+#endif
+
+int left_align_indels(char *sref, char *squery, int slen, char *new_state_seq) {
+
+     char ref[slen+1];
+     char query[slen+1];
+     strcpy(ref, sref);
+     strcpy(query, squery);
+     
+     int i = 0;
+     // FIXME: can be further optimized
+     while (i < slen-1) {
+          if (ref[i] != '*' && query[i] != '*') {
+               if (ref[i+1] == '*') {
+                    int ilen = 0;
+                    while (ref[i+1+ilen] == '*') { ilen++; }
+                    if (query[i+ilen] == ref[i]) {
+                         ref[i+ilen] = ref[i];
+                         ref[i] = '*';
+                         i--;
+                         continue;
+                    }
+               } else if (query[i+1] == '*') {
+                    int dlen = 0;
+                    while (query[i+1+dlen] == '*') { dlen++; }
+                    if (query[i] == ref[i+dlen]) {
+                         query[i+dlen] = query[i];
+                         query[i] = '*';
+                         i--;
+                         continue;
+                    }
+               }
+          }
+          i++;
+     }
+
+     char state_seq[slen+1];
+     for (i = 0; i < slen; i++) {
+          if (ref[i] == '*') { state_seq[i] = 'I'; }
+          else if (query[i] == '*') { state_seq[i] = 'D'; }
+          else { state_seq[i] = 'M'; }
+     }
+     state_seq[i] = '\0';
+     //fprintf(stderr, "ref:%s, query:%s, state_seq:%s\n", ref, query, state_seq);
+     
+     if (new_state_seq) {
+          strcpy(new_state_seq, state_seq);
+     }
+     
+     return 0;
+}
+
+/* bqual is the base quality phred score representation as string. so use SANGERQUAL_TO_PROB for conversion */
+int viterbi(char *ref, char *query, char *bqual, char *aln, int quality)
+{
+     //printf("inside viterbi\n");
+     int qlen = strlen(query)+1;
+     int rlen = strlen(ref)+1;
+     
+     double *V_start;
+     double **V_match;
+     double **V_ins;
+     double **V_del;
+          
+     char **ptr_match;
+     char **ptr_ins;
+     char **ptr_del;
+
+     // Define transition probabilities
+     // FIXME: define globally to speed up
+#ifdef PACBIO_REALN
+     double alpha = 0.1;
+     if (! pacbio_msg_printed) {
+          fprintf(stderr, "WARN(%s|%s): Using pacbio viterbi params\n", __FILE__, __FUNCTION__);
+          pacbio_msg_printed = 1;
+     }
+#else
+     double alpha = 0.00001;
+#endif
+     double beta = 0.4;
+
+     double L = (double)rlen;
+     double gamma = 1/(2.*L);
+     int i, k;
+     double ep_ins = log10(.25); // Insertion emission probability
+     double tp[5][5] = {{0}};
+
+     tp[0][0] = log10((1 - 2*alpha)*(1 - gamma)); // M->M
+     tp[0][1] = log10(alpha*(1 - gamma)); // M->I
+     tp[0][2] = log10(alpha*(1 - gamma)); // M->D
+     tp[0][4] = log10(gamma); // M->E
+     tp[1][0] = log10((1 - beta)*(1 - gamma)); // I->M
+     tp[1][1] = log10(beta*(1 - gamma)); // I->I
+     tp[1][4] = log10(gamma); // I->E
+     tp[2][0] = log10(1- beta); // D->M
+     tp[2][2] = log10(beta); // D->D
+     tp[3][0] = log10((1 - alpha)/L); // S->M
+     tp[3][1] = log10(alpha/L); // S->I
+     
+
+     // Initialize
+     V_start = malloc(qlen * sizeof(double));
+     V_match = malloc(rlen * sizeof(double*));
+     V_ins = malloc(rlen * sizeof(double*));
+     V_del = malloc(rlen * sizeof(double*));
+     for (i = 0; i < rlen; i++) { 
+          V_match[i] = calloc(qlen, sizeof(double));
+          V_ins[i] = calloc(qlen, sizeof(double));
+          V_del[i] = calloc(qlen, sizeof(double));
+     }
+
+     for (i = 0; i < qlen; i++) { 
+          V_start[i] = INT_MIN; 
+     }
+     for (k = 0; k < rlen; k++) { 
+          V_match[k][0] = INT_MIN; 
+          V_ins[k][0] = INT_MIN;
+          V_del[k][0] = INT_MIN;
+     }
+     for (i = 0; i < qlen; i++) { 
+          V_match[0][i] = INT_MIN; 
+          V_ins[0][i] = INT_MIN;
+          V_del[0][i] = INT_MIN;
+     }
+     V_start[0] = 0;
+
+     ptr_match = malloc(rlen * sizeof(char*));
+     ptr_ins = malloc(rlen * sizeof(char*));
+     ptr_del = malloc(rlen * sizeof(char*));
+     for (i=0; i<rlen; i++) {
+          ptr_match[i] = calloc(qlen, sizeof(char));
+          ptr_ins[i] = calloc(qlen, sizeof(char));
+          ptr_del[i] = calloc(qlen, sizeof(char));
+     }    
+    
+     // Recursion
+     double bp;
+     for (i = 1; i < qlen; i++) {
+          double ep_match;
+          double ep_match_not;
+
+          // Define emission probabilities
+		  if ( SANGERQUAL_TO_PHRED(bqual[i-1]) == 2) {
+               bp = SANGERQUAL_TO_PROB(PHRED_TO_SANGERQUAL(quality));
+		  } else {
+               bp = SANGERQUAL_TO_PROB(bqual[i-1]);
+		  }
+          ep_match = log10(1-bp);
+          ep_match_not = log10(bp/3.);
+                    
+          for (k = 1; k < rlen; k++) {
+               int index;
+               
+               // V_Mk(i) = log(e_Mk(x_i)) + max( S_0(i-1) + log(a_(S_0,M_k)),
+               //                                 M_k-1(i-1) + log(a_(M_k-1,M_k)),
+               //                                 I_k-1(i-1) + log(a_(I_k-1,M_k)),
+               //                                 D_k-1(i-1) + log(a_(D_k-1,M_k)) )
+               double mterms[4] = {V_start[i-1] + tp[3][0],
+                                   V_match[k-1][i-1] + tp[0][0],
+                                   V_ins[k-1][i-1] + tp[1][0],
+                                   V_del[k-1][i-1] + tp[2][0]};
+               index = argmax_d(mterms, 4);
+               ptr_match[k][i] = "SMID"[index];
+               if (query[i-1] == ref[k-1]) {
+                    V_match[k][i] = ep_match + mterms[index];
+               } else {
+                    V_match[k][i] = ep_match_not + mterms[index];
+               }
+               
+               // V_Ik(i) = log(e_Ik(x_i)) + max( S_0(i-1) + log(a_(S_0,I_k)),
+               //                                 M_k(i-1) + log(a_(M_k,I_k)),
+               //                                 I_k(i-1) + log(a_(I_k,I_k)) )
+               
+               double iterms[3] = {V_start[i-1] + tp[3][1],
+                                   V_match[k][i-1] + tp[0][1],
+                                   V_ins[k][i-1] + tp[1][1]};
+               index = argmax_d(iterms, 3);
+               ptr_ins[k][i] = "SMI"[index];
+               V_ins[k][i] = ep_ins + iterms[index];
+               
+               // V_Dk(i) = max( M_k-1(i) + log(a_(M_k-1,D_k)),
+               //                D_k-1(i) + log(a_(D_k-1,D_k)) )
+               double dterms[2] = {V_match[k-1][i] + tp[0][2],
+                                   V_del[k-1][i] + tp[2][2]};
+               index = argmax_d(dterms, 2);
+               ptr_del[k][i] = "MD"[index];
+               V_del[k][i] = dterms[index];
+               
+               //fprintf(stderr, "k:%d, i:%d, %f, %f, %f\n", k, i, 
+               //                 V_match[k][i], V_ins[k][i], V_del[k][i]);
+          }
+     }
+     
+     // Termination
+     // max[M_L(N), I_L(N), D_L(N)]
+     char end_state = '!';
+     double best_score = INT_MIN;
+     int best_index = 0;
+     for (k = 0; k < rlen; k++) {
+          if (V_match[k][qlen-1] > best_score) {
+               end_state = 'M';
+               best_score = V_match[k][qlen-1];
+               best_index = k;
+          }
+          if (V_ins[k][qlen-1] > best_score) {
+               end_state = 'I';
+               best_score = V_ins[k][qlen-1];
+               best_index = k;
+          }
+     }
+     //fprintf(stderr, "ended on %c, best_score is %f, best_index is %d\n",
+     //     end_state, best_score, best_index);
+     for (i = 0; i < rlen; i++) { 
+          free(V_match[i]);
+          free(V_ins[i]);
+          free(V_del[i]);
+     }
+     free(V_match);
+     free(V_ins);
+     free(V_del);
+     free(V_start);
+
+     // Trace-back
+     i = qlen - 1;
+     k = best_index;
+     int maxslen = qlen+rlen;
+     char current_ptr = end_state;
+     char tmp_state_seq[maxslen], tmp_ref[maxslen], tmp_query[maxslen];
+     tmp_state_seq[qlen+rlen-1] = tmp_ref[qlen+rlen-1] = tmp_query[qlen+rlen-1] = '\0';
+     int si = qlen+rlen-2;
+     
+     while (i != 0 && k != 0) {
+          tmp_state_seq[si] = current_ptr;
+          if (current_ptr == 'S') {
+               break;
+          } else if (current_ptr == 'M') {
+               tmp_ref[si] = ref[k-1];
+               tmp_query[si] = query[i-1];
+               current_ptr = ptr_match[k][i];
+               i -= 1;
+               k -= 1;
+          } else if (current_ptr == 'I') {
+               tmp_ref[si] = '*';
+               tmp_query[si] = query[i-1];
+               current_ptr = ptr_ins[k][i];
+               i -= 1;
+          } else if (current_ptr == 'D') {
+               tmp_ref[si] = ref[k-1];
+               tmp_query[si] = '*';
+               current_ptr = ptr_del[k][i];
+               k -= 1;
+          } else {
+               return -1;
+          }
+          si--;
+     }
+     for (i=0; i<rlen; i++) {
+          free(ptr_match[i]);
+          free(ptr_ins[i]);
+          free(ptr_del[i]);
+     }
+     free(ptr_match);
+     free(ptr_ins);
+     free(ptr_del);
+
+
+     {
+          char *state_seq = tmp_state_seq+si+1;
+          char *new_ref = tmp_ref+si+1;
+          char *new_query = tmp_query+si+1;
+          //fprintf(stderr, "ref:%s, query:%s, state_seq:%s\n", ref+1, query+1, state_seq);
+          int state_seq_len = strlen(state_seq);
+          char *new_state_seq = malloc(sizeof(char)*(state_seq_len+1));
+          left_align_indels(new_ref, new_query, state_seq_len, new_state_seq);
+          
+          if (aln) {
+               strcpy(aln, new_state_seq);
+          }
+     
+          free(new_state_seq);
+     }
+
+    return k;
+
+}
+
+int viterbi_test()
+{
+
+     fprintf(stderr, "Testing viterbi realignment...\n");
+     viterbi("CCATATGG", "CCATGG", "??????", NULL, 20);
+
+     fprintf(stderr, "Testing left-alignment of indels...\n");
+     left_align_indels("CCATATGG", "CCAT**GG", 8, 0);
+     left_align_indels("CCAT**GG", "CCATATGG", 8, 0);
+     left_align_indels("CCATATGG*CC", "CCAT**GGGCC", 11, 0);
+
+     return 0;
+}
+
+//test to figure out quality calculation
+#ifdef VITERBI_MAIN
+int main(int argc, char *argv[])
+{
+  int i;
+  char *bq = "#5?";
+  for (i=0; i<strlen(bq); i++) {
+    char c = bq[i];
+    printf("#%d: %c=%g=%d (cast %d)\n", i+1, c, SANGERQUAL_TO_PROB(c), SANGERQUAL_TO_PHRED(c), (int)c);
+  }
+  return 0;
+}
+#endif
diff --git a/src/lofreq/viterbi.h b/src/lofreq/viterbi.h
new file mode 100644
index 0000000..c724678
--- /dev/null
+++ b/src/lofreq/viterbi.h
@@ -0,0 +1,33 @@
+/*********************************************************************
+* The MIT License (MIT)
+* 
+* Copyright (c) 2013,2014 Genome Institute of Singapore
+* 
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation files
+* (the "Software"), to deal in the Software without restriction,
+* including without limitation the rights to use, copy, modify, merge,
+* publish, distribute, sublicense, and/or sell copies of the Software,
+* and to permit persons to whom the Software is furnished to do so,
+* subject to the following conditions:
+* 
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+* 
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*
+************************************************************************/
+
+#ifndef VITERBI_H
+#define VITERBI_H
+int left_align_indels(char *sref, char *squery, int slen, char *res);
+int viterbi(char *ref, char *query, char *bqual, char *aln, int quality);
+int viterbi_test();
+#endif
diff --git a/src/scripts/Makefile.am b/src/scripts/Makefile.am
new file mode 100644
index 0000000..0adedaf
--- /dev/null
+++ b/src/scripts/Makefile.am
@@ -0,0 +1,4 @@
+dist_bin_SCRIPTS = lofreq2_somatic.py lofreq2_call_pparallel.py
+EXTRA_DIST = lofreq2_local.py
+
+
diff --git a/src/scripts/README b/src/scripts/README
new file mode 100644
index 0000000..96d9e3f
--- /dev/null
+++ b/src/scripts/README
@@ -0,0 +1 @@
+Directory for LoFreq's entirely independent scripts 
\ No newline at end of file
diff --git a/src/scripts/lofreq2_call_pparallel.py b/src/scripts/lofreq2_call_pparallel.py
new file mode 100755
index 0000000..6161ac9
--- /dev/null
+++ b/src/scripts/lofreq2_call_pparallel.py
@@ -0,0 +1,742 @@
+#!/usr/bin/env python
+"""Parallel wrapper for 'lofreq call': Runs one thread per seq/chrom
+listed in header (used as region to make use of indexing feature) and
+bed file (if given) and combines results at the end.
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2013, 2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+#--- standard library imports
+#
+import sys
+import logging
+import subprocess
+import multiprocessing
+import tempfile
+import shutil
+import os
+import gzip
+from collections import namedtuple
+
+#--- third-party imports
+#
+#/
+
+#--- project specific imports
+#
+# sets PATH so that scripts/binary presentin src dir are used first if
+# present, i.e. stuff can be run without installing it
+try:
+    import lofreq2_local
+except ImportError:
+    pass
+
+
+Region = namedtuple('Region', ['chrom', 'start', 'end'])
+# coordinates in Python-slice / bed format, i.e. zero-based half-open
+
+
+# global logger
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+BIN_PER_THREAD = 2
+
+
+def prob_to_phredqual(prob):
+    """WARNING: near-identical copy from utils.py. copied here to make
+    script independent.
+
+    Turns an error probability into a phred value
+
+    >>> prob_to_phredqual(0.01)
+    20
+    """
+
+    from math import log10
+    #MAX_INT = 2147483647
+
+    assert prob >= 0.0 and prob <= 1.0, (
+        "Probability must be >=0 and <=1, but got %f" % prob)
+    try:
+        return int(round(-10.0 * log10(prob)))
+    except ValueError:
+        # prob is zero
+        return sys.maxint
+        #return MAX_INT
+
+def split_region_(start, end):
+    """split region (given in zero-based half-open start and end
+    coordinates) in two halves
+    """
+    l = end - start
+    assert l > 1, ("Region too small to be split: %d--%d" % (start, end))
+    m = l//2# explicit integer divison
+    return ((start, start+m), (start+m, end))
+
+
+def region_length(reg):
+    return reg.end-reg.start
+
+
+def split_region(reg):
+    """split region (given in zero-based half-open start and end
+    coordinates) in two halves
+    """
+    return [Region(reg.chrom, x[0], x[1])
+            for x in split_region_(reg.start, reg.end)]
+
+
+def read_bed_coords(fbed):
+    """Fault-resistant reading of coordinates from bed file. Yields
+    regions as chrom, start, end tuple with zero-based half-open
+    coordinates. Based on the implementation in LoFreq 0.6.0
+    """
+
+    with open(fbed, 'r') as fh:
+        for line in fh:
+            if line.startswith('#') or len(line.strip()) == 0:
+                continue
+            # bed should use tab as delimiter. use whitespace if tab fails.
+            if len(line.split('\t')) >= 3:
+                (chrom, start, end) = line.split("\t")[0:3]
+            elif len(line.split()) >= 3:
+                (chrom, start, end) = line.split()[0:3]
+            else:
+                start = end = "NAN"# caught later
+            try:
+                # float conversion for support of scientific notation
+                (start, end) = [int(float(x)) for x in [start, end]]
+            except ValueError:
+                if line.startswith('browser') or line.startswith('track'):
+                    continue
+                else:
+                    #import pdb; pdb.set_trace()
+                    raise ValueError, (
+                        "Couldn't parse the following line"
+                        " from bed-file %s: %s" % (fbed, line))
+
+            if end <= start or end < 0 or start < 0:
+                LOG.fatal("Invalid coordinates start=%d end=%d read from %s" % (
+                    start, end, fbed))
+                raise ValueError
+
+            yield (chrom, start, end)
+
+
+def total_num_tests_from_logs(log_files):
+    """Extract number of performed tests from all log files and
+    returns their sum (for multiple testing correction)
+    """
+
+    total_num_snv_tests = 0
+    total_num_indel_tests = 0
+    for f in log_files:
+        fh = open(f, 'r')
+        num_snv_tests_found = False
+        num_indel_tests_found = False
+
+        for line in fh:
+            if line.startswith('Number of substitution tests performed'):
+                num_snv_tests = int(line.split(':')[1])
+                total_num_snv_tests += num_snv_tests
+                num_snv_tests_found = True
+            if line.startswith('Number of indel tests performed'):
+                num_indel_tests = int(line.split(':')[1])
+                total_num_indel_tests += num_indel_tests
+                num_indel_tests_found = True
+        if not num_snv_tests_found:
+            LOG.fatal("Didn't find number of snv tests in log-file %s" % (f))
+            return (-1, -1)
+        if not num_indel_tests_found:
+            LOG.fatal("Didn't find number of indel tests in log-file %s" % (f))
+            return (-1, -1)
+
+        fh.close()
+
+    return (total_num_snv_tests, total_num_indel_tests)
+
+
+def concat_vcf_files(vcf_files, vcf_out, source=None):
+    """FIXME source unused
+    """
+
+    assert not os.path.exists(vcf_out)
+
+    cmd = ['lofreq', 'vcfset', '-a', 'concat', '-o', vcf_out, '-1']
+    cmd.extend(vcf_files)
+    try:
+        subprocess.check_call(cmd)
+    except subprocess.CalledProcessError as e:
+        LOG.fatal("The following command failed with return code %d: %s" % (
+            e.returncode, ' '.join(cmd)))
+        sys.exit(1)
+
+
+def sq_list_from_bam_samtools(bam):
+    """Extract SQs listed in BAM head using samtools
+
+    Elements of returned list is a 2-tuple with sq, length.
+    """
+
+    assert os.path.exists(bam), ("BAM file %s does not exist" % bam)
+    cmd = 'samtools view -H %s' % (bam)
+    LOG.debug("cmd=%s" % cmd)
+    process = subprocess.Popen(cmd.split(),
+                               shell=False,
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = process.communicate()
+
+    retcode = process.returncode
+    if retcode != 0:
+        LOG.fatal("%s exited with error code '%d'." \
+                  " Command was '%s'. stderr was: '%s'" % (
+                      cmd.split()[0], retcode, cmd, stderrdata))
+        raise OSError
+    stdout_lines = str.splitlines(stdoutdata)
+
+    sq_list = []
+
+    for line in stdout_lines:
+        if not line.startswith("@SQ"):
+            continue
+        line_split = line.rstrip().split()
+        sn_field = [x for x in line_split if x.startswith("SN:")][0]
+        sq = sn_field[3:]
+        ln_field = [x for x in line_split if x.startswith("LN:")][0]
+        ln = int(ln_field[3:])
+        sq_list.append((sq, ln))
+
+    if len(sq_list) == 0:
+        LOG.error("No mapping reads in index for %s found."
+                  " Reindexing should solve this."
+                  " Trying samtools instead" % (bam))
+        sys.exit(1)
+
+    return sq_list
+
+
+def sq_list_from_bam(bam):
+    """Extract SQs listed in BAM. Elements of returned list is a
+    3-tuple with sq, length and number of mapped reads.
+    """
+
+    assert os.path.exists(bam), ("BAM file %s does not exist" % bam)
+    cmd = 'lofreq idxstats %s' % (bam)
+    LOG.debug("cmd=%s" % cmd)
+    process = subprocess.Popen(cmd.split(),
+                               shell=False,
+                               stdout=subprocess.PIPE,
+                               stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = process.communicate()
+
+    retcode = process.returncode
+    if retcode != 0:
+        LOG.fatal("%s exited with error code '%d'." \
+                  " Command was '%s'. stderr was: '%s'" % (
+                      cmd.split()[0], retcode, cmd, stderrdata))
+        raise OSError
+    stdout_lines = str.splitlines(stdoutdata)
+
+    # orig src: sq_list_from_header()
+    sq_list = []
+    for line in stdout_lines:
+        # chrom len #mapped #unmapped
+        (sq, sqlen, n_mapped, n_unmapped) = line.rstrip().split()
+        (sqlen, n_mapped, n_unmapped) = [int(x) for x in [
+            sqlen, n_mapped, n_unmapped]]
+        if sq != '*':
+            sq_list.append((sq, sqlen, n_mapped))
+
+    return sq_list
+
+
+def bins_from_bamheader(bam):
+    """Returns regions/bins determine by chromosomes listed in bam
+    file. Will skip chromosomes with no reads mapped.
+    """
+
+    sq_list = sq_list_from_bam(bam)
+
+    # get list of chromosome and their length. if supported also get
+    # number of mapped reads to remove chromosome with no reads mapped
+    #
+    # have three elements and 3rd is #reads
+    if len(sq_list[0]) == 3:
+        # remove those with no reads mapped
+        sq_list = [x for x in sq_list if x[2] > 0]
+        if len(sq_list) == 0:
+            LOG.warning("Looks like the index for %s is a bit old"
+                        " (idxstats reports no reads mapped). Reindexing"
+                        " should solve this. Will continue by calling samtools,"
+                        " so no need to worry for now though." % (bam))
+            sq_list = sq_list_from_bam_samtools(bam)
+            if len(sq_list) == 0:
+                LOG.fatal("Sorry, samtools failed as well :(")
+                sys.exit(1)
+
+    if len(sq_list) == 0:
+        LOG.fatal("Oops. Found no chromsomes in header of %s"
+                  " that have any reads mapped!?" % bam)
+        sys.exit(1)
+
+    return [(x[0], 0, x[1]) for x in sq_list]
+
+
+def lofreq_cmd_per_bin(lofreq_call_args, bins, tmp_dir):
+    """Returns argument for one lofreq call per bins (Regions()).
+    Order is by length byt file naming is according to input order
+    """
+
+    # longest bins first, but keep input order as index so that we can
+    # use this as file name and only need to concatenate later and
+    # output will be sorted by input order
+
+    enum_bins = sorted(enumerate(bins),
+                       key=lambda eb: region_length(eb[1]), reverse=True)
+
+    for (i, b) in enum_bins:
+        LOG.debug("length sorted bin keeping input index #%d: %s" % (i, b))
+        # maintain region order by using index
+        reg_str = "%s:%d-%d" % (b.chrom, b.start+1, b.end)
+        cmd = ' '.join(lofreq_call_args)
+        cmd += ' --no-default-filter'# needed here whether user-arg or not
+        cmd += ' -r "%s" -o %s/%d.vcf.gz > %s/%d.log 2>&1' % (
+            reg_str, tmp_dir, i, tmp_dir, i)
+        #LOG.warn("DEBUG: yielding %s" % cmd)
+        yield cmd
+
+
+def work(cmd):
+    """Command caller wrapper for multiprocessing"""
+
+    #print "DEBUG", os.environ["PATH"]
+    #from subprocess import Popen, PIPE
+    #call(["which", "lofreq"])
+    #which = Popen(['which', 'lofreq'], stdout=PIPE).stdout.read()
+    #which = Popen("which lofreq", shell=True, stdout=PIPE).stdout.read()
+    #LOG.warn("Executing (lofreq=%s): %s" % (which, cmd))
+    # res = subprocess.call("lofreq version", shell=True)
+    # cmd = 'valgrind --tool=memcheck ' + cmd
+    res = subprocess.call(cmd, shell=True)
+    if res:
+        LOG.fatal("Following command failed with status %d: %s" % (res, cmd))
+        # can't exit here: sys.exit(1)
+    return res
+
+
+def main():
+    """The main function
+    """
+
+    orig_argv = list(sys.argv[1:])
+
+    #
+    # 1. parse pparallel specific args: get and remove from list
+    #
+
+    # poor man's usage
+    #
+    if '-h' in orig_argv:
+        sys.stderr.write(__doc__ + "\n")
+        sys.stderr.write("All arguments except --pp-threads (mandatory),"
+                         " --pp-debug, --pp-verbose\nand --pp-dryrun will"
+                         " be passed down to 'lofreq call'. Make sure that"
+                         " the\nremaining args are valid 'lofreq call'"
+                         " args as no syntax check will be\nperformed.\n")
+        sys.exit(1)
+
+    verbose = True
+    try:
+        idx = orig_argv.index('--pp-verbose')
+        orig_argv = orig_argv[0:idx] +  orig_argv[idx+1:]
+        verbose = True
+    except (IndexError, ValueError):
+        pass
+    if verbose:
+        LOG.setLevel(logging.INFO)
+
+    debug = False
+    try:
+        idx = orig_argv.index('--pp-debug')
+        orig_argv = orig_argv[0:idx] +  orig_argv[idx+1:]
+        debug = True
+    except (IndexError, ValueError):
+        pass
+    if debug:
+        LOG.setLevel(logging.DEBUG)
+
+    dryrun = False
+    try:
+        idx = orig_argv.index('--pp-dryrun')
+        orig_argv = orig_argv[0:idx] +  orig_argv[idx+1:]
+        dryrun = True
+    except (IndexError, ValueError):
+        pass
+
+    # number of threads
+    #
+    num_threads = -1
+    try:
+        idx = orig_argv.index('--pp-threads')
+        num_threads = int(orig_argv[idx+1])
+        orig_argv = orig_argv[0:idx] +  orig_argv[idx+2:]
+    except (ValueError, IndexError) as e:
+        LOG.fatal("Parallel wrapper requires --pp-threads"
+                  " [int] as arg (number of threads)")
+        sys.exit(1)
+    if num_threads > multiprocessing.cpu_count():
+        LOG.fatal("Requested number of threads higher than number"
+                 " of CPUs. Will reduce value to match number of CPUs")
+        #num_threads = multiprocessing.cpu_count()
+        sys.exit(1)
+
+
+    # reference. if not indexed, multiple threads might want to index it
+    # before actually running call, which creates a race condition
+    idx = -1
+    for arg in ['-f', '--ref']:
+        if arg in orig_argv:
+            idx = orig_argv.index(arg)
+            break
+    ref = orig_argv[idx+1]
+    if not os.path.exists(ref + ".fai"):
+        LOG.fatal("Index for reference %s missing. Use samtools or lofreq faidx %s" % (ref, ref))
+        sys.exit(1)
+
+
+    # Doh!
+    if 'call' in orig_argv:
+        LOG.fatal("argument 'call' not needed")
+        sys.exit(1)
+
+    lofreq_call_args = list(orig_argv)
+    #LOG.warn("lofreq_call_args = %s" % ' '.join(lofreq_call_args))
+
+
+    #
+    # 2. check for disallowed args
+    #
+
+    # using region ourselves
+    #
+    # FIXME (re-) use of region could easily be merged into main logic
+    # by turning it into a region and intersecting with the rest
+    #
+    for disallowed_arg in ['--plp-summary-only', '-r', '--region']:
+        if disallowed_arg in lofreq_call_args:
+            LOG.fatal("%s not allowed in pparallel mode" % disallowed_arg)
+            sys.exit(1)
+
+
+    #
+    # 3. modify args that we use
+    #
+
+    # get final/original output file name and remove arg
+    #
+    final_vcf_out = "-"# default
+    idx = -1
+    for arg in ['-o', '--out']:
+        if arg in lofreq_call_args:
+            idx = lofreq_call_args.index(arg)
+            break
+    if idx != -1:
+        final_vcf_out = lofreq_call_args[idx+1]
+        if os.path.exists(final_vcf_out):
+            LOG.fatal("Cowardly refusing to overwrite already existing"
+                      " VCF output file %s" % final_vcf_out)
+            sys.exit(1)
+        lofreq_call_args = lofreq_call_args[0:idx] +  lofreq_call_args[idx+2:]
+
+    # bed-file
+    #
+    bed_file = None
+    idx = -1
+    for arg in ['-l', '--bed']:
+        if arg in lofreq_call_args:
+            idx = lofreq_call_args.index(arg)
+            break
+    if idx != -1:
+        bed_file = lofreq_call_args[idx+1]
+        if not os.path.exists(bed_file):
+            LOG.fatal("Bed-file %s does not exist" % bed_file)
+            sys.exit(1)
+        lofreq_call_args = lofreq_call_args[0:idx] +  lofreq_call_args[idx+2:]
+
+
+    # parse significance and bonf factor. needed for final filtering
+    # if bonf is computed dynamically.
+    #
+    # determine bonf option
+    #
+    bonf_opt = 'dynamic'# NOTE: needs to be default is in lofreq call
+    idx = -1
+    for arg in ['-b', '--bonf']:
+        if arg in lofreq_call_args:
+            idx = lofreq_call_args.index(arg)
+            break
+    if idx != -1:
+        bonf_opt = lofreq_call_args[idx+1]
+
+    if bonf_opt == 'auto':
+        raise NotImplementedError(
+            'FIXME bonf "auto" handling not implemented')
+
+    sig_opt = 0.01# WARN: needs to be default is in lofreq call
+    idx = -1
+    for arg in ['-a', '--sig']:
+        if arg in lofreq_call_args:
+            idx = lofreq_call_args.index(arg)
+            break
+    if idx != -1:
+        sig_opt = float(lofreq_call_args[idx+1])
+
+    # determine whether no-default-filter was given
+    #
+    no_default_filter = False
+    if '--no-default-filter' in lofreq_call_args:
+        no_default_filter = True
+
+
+    # prepend actual lofreq command
+    #
+    # lofreq2_local makes automatically sure we call the correct binary
+    lofreq_call_args.insert(0, 'lofreq')
+    lofreq_call_args.insert(1, 'call')
+
+    bam = None
+    for arg in lofreq_call_args:
+        ext = os.path.splitext(arg)[1].lower()
+        if ext in [".bam", ".sam"] and os.path.exists(arg):
+            bam = arg
+            break
+    if not bam:
+        LOG.fatal("Couldn't determine BAM file from argument list"
+                  " or file doesn't exist")
+        sys.exit(1)
+
+    # now use one thread per region. output is numerated per thread
+    # (%d.log and %d.vcf.gz) and goes into tmp_dir
+    #
+    tmp_dir = tempfile.mkdtemp(prefix='lofreq2_call_parallel')
+    LOG.debug("tmp_dir = %s" % tmp_dir)
+    LOG.debug("bonf_opt = %s" % bonf_opt)
+    LOG.debug("sig_opt = %s" % sig_opt)
+    LOG.debug("final_vcf_out = %s" % final_vcf_out)
+    LOG.debug("num_threads = %s" % num_threads)
+    LOG.debug("no_default_filter = %s" % (no_default_filter))
+    LOG.debug("lofreq_call_args = %s" % (lofreq_call_args))
+    #import pdb; pdb.set_trace()
+
+    LOG.info("Using %d threads with following basic args: %s\n" % (
+            num_threads, ' '.join(lofreq_call_args)))
+
+
+    # At this stage all basic args are known. In theory there are
+    # three major variables that determine the splitting logic:
+    #
+    # - bed file
+    # - region arg
+    # - sq+length from bam header
+    #
+    # We should in theory use intersection of all three, bed, region and sq
+    # (e.g. using pybedtools or a lightweight alternative)
+    # but for now we disallow regions (need it for ourselves; see above)
+
+    bam_bins = [Region._make(x) for x in bins_from_bamheader(bam)]
+    if bed_file:
+        bed_bins = [Region._make(x) for x in read_bed_coords(bed_file)]
+
+        # if the number of regions is huge and they are scattered
+        # across all major chromosomes/sequences, then it's much
+        # faster to use the bam_bins as region and bed as extra
+        # argument
+        bam_sqs = set([b[0] for b in bam_bins])
+        bed_sqs = set([b[0] for b in bed_bins])
+        if len(bed_bins) > 100*len(bam_bins) and len(bed_sqs) > len(bam_sqs)/10.0:
+            bed_sqs = set([b[0] for b in bed_bins])
+            bins = [b for b in bam_bins if b[0] in bed_sqs]
+            lofreq_call_args.extend(['-l', bed_file])
+        else:
+            bins = bed_bins
+    else:
+        bins = bam_bins
+
+    for (i, b) in enumerate(bins):
+        LOG.debug("initial bins: #%d %s %d %d len %d" % (
+            i, b.chrom, b.start, b.end, region_length(b)))
+
+    # split greedily into bins such that nregions ~ 2*threads:
+    # keep more bins than threads to make up for differences in regions
+    # even after split
+    #
+    total_length = sum([region_length(b) for b in bins])
+    while True:
+        #  inefficient but doesn't matter in practice: should split
+        #  max and insert new elements
+        # intelligently to avoid sorting whole list.
+        bins = sorted(bins, key=lambda b: region_length(b))
+        biggest = bins[-1]
+        biggest_length = region_length(biggest)
+
+        LOG.debug("biggest_length=%d total_length/(%d*num_threads)=%f" % (
+            biggest_length, BIN_PER_THREAD, total_length/(BIN_PER_THREAD*num_threads)))
+        if biggest_length < total_length/(BIN_PER_THREAD*num_threads):
+            break
+        elif biggest_length < 100:
+            LOG.warn("Regions getting too small to be efficiently processed")
+            break
+
+        biggest = bins.pop()
+        (b1, b2) = split_region(biggest)
+        bins.extend([b1, b2])
+
+    for (i, b) in enumerate(bins):
+        LOG.debug("bins after splitting: #%d %s %d %d len %d" % (
+            i, b.chrom, b.start, b.end, region_length(b)))
+
+
+    # need to make sure bins are order as chromosome order in BAM
+    # header (might not be the case in bed-file either which samtools
+    # parses the whole BAM when given a bed), otherwise output will
+    # not be sorted since it just concatenates
+    #
+    # sort first by start position and then by predefined chromsome
+    # order
+    bins = sorted(bins, key=lambda b: b.start)
+    sq_list = sq_list_from_bam(bam)
+    LOG.debug("sq_list  %s" % sq_list)
+    sdict = dict()
+    for (i, sq) in enumerate(sq_list):
+        sdict[sq[0]] = i
+    bins = sorted(bins, key=lambda b: sdict[b.chrom])
+
+    for (i, b) in enumerate(bins):
+        LOG.debug("bins after chrom ordering: #%d %s %d %d len %d" % (
+            i, b.chrom, b.start, b.end, region_length(b)))
+
+    #bins = [Region('chr22', 0, 50000000)]# TMPDEBUG
+    cmd_list = list(lofreq_cmd_per_bin(lofreq_call_args, bins, tmp_dir))
+    #FIXME assert len(cmd_list) > 1, (
+    #    "Oops...did get %d instead of multiple commands to run on BAM: %s" % (len(cmd_list), bam))
+    LOG.info("Adding %d commands to mp-pool" % len(cmd_list))
+
+    #import pdb; pdb.set_trace()
+    LOG.debug("cmd_list = %s" % cmd_list)
+    if dryrun:
+        for cmd in cmd_list:
+            print "%s" % (cmd)
+        LOG.critical("dryrun ending here")
+        sys.exit(1)
+
+    #def mycallback(x):
+    #    if x:
+    #        LOG.warn("multiprocessing.Pool call result: %s" % x)
+    #        pool.terminate()
+
+    pool = multiprocessing.Pool(processes=num_threads)
+    results = pool.map(work, cmd_list, chunksize=1)
+    #results = pool.map_async(work, cmd_list, chunksize=1, callback=mycallback)
+    pool.close()# not adding any more
+    pool.join()# wait until all done
+
+    if any(results):
+        # rerrors printed in work()
+        LOG.fatal("Some commands in pool failed. Can't continue")
+        sys.exit(1)
+
+    # concat the output by number
+    #
+    vcf_concat = os.path.join(tmp_dir, "concat.vcf.gz")
+    # maintain order
+    vcf_files = [os.path.join(tmp_dir, "%d.vcf.gz" % no)
+                 for no in range(len(cmd_list))]
+    if not all([os.path.exists(f) for f in vcf_files]):
+        LOG.fatal("Missing some vcf output from threads")
+        sys.exit(1)
+    concat_vcf_files(vcf_files, vcf_concat,
+                     "##source=%s" % ' '.join(sys.argv))
+
+    # filtering
+    #
+    log_files = [os.path.join(tmp_dir, "%d.log" % no)
+                 for no in range(len(cmd_list))]
+    num_snv_tests, num_indel_tests = total_num_tests_from_logs(log_files)
+    if num_snv_tests == -1 or num_indel_tests == -1:
+        sys.exit(1)
+    # same as in lofreq_call.c and used by lofreq2_somatic.py
+    sys.stderr.write("Number of substitution tests performed: %d\n" % num_snv_tests)
+    sys.stderr.write("Number of indel tests performed: %d\n" % num_indel_tests)
+
+    cmd = ['lofreq', 'filter', '-i', vcf_concat, '-o', final_vcf_out]
+    if no_default_filter:
+        cmd.append('--no-defaults')
+
+    if bonf_opt == 'dynamic':
+        # if bonf was computed dynamically, use bonf sum
+        sub_bonf = num_snv_tests
+        indel_bonf = num_indel_tests
+        if sub_bonf == 0:
+            sub_bonf = 1
+        if indel_bonf == 0:
+            indel_bonf = 1
+        sub_phredqual = prob_to_phredqual(sig_opt/float(sub_bonf))
+        indel_phredqual = prob_to_phredqual(sig_opt/float(indel_bonf))
+        cmd.extend(['--snvqual-thresh', "%s" % sub_phredqual])
+        cmd.extend(['--indelqual-thresh', "%s" % indel_phredqual])
+
+    elif bonf_opt == 'auto':
+        raise NotImplementedError
+
+    if bonf_opt not in ['auto', 'dynamic'] and no_default_filter:
+        # if bonf_opt was a fixed int, then it was already used properly and
+        # there's no need to filter against snv qual. if furthermore,
+        # --no-defaults is given we then don't filter at all
+        #
+        LOG.info("Copying concatenated vcf file to final destination")
+        LOG.debug("vcf_concat=%s final_vcf_out=%s" % (vcf_concat, final_vcf_out))
+
+        if final_vcf_out == "-":
+            fh_in = gzip.open(vcf_concat, 'r')
+            fh_out = sys.stdout
+            shutil.copyfileobj(fh_in, fh_out)
+            fh_in.close()
+        else:
+            # check again if final output doesn't exist, just to be sure
+            if os.path.exists(final_vcf_out):
+                LOG.fatal("Cowardly refusing to overwrite %s with %s" % (
+                    final_vcf_out, vcf_concat))
+                sys.exit(1)
+
+            shutil.copy(vcf_concat, final_vcf_out)
+
+            # try to copy index as well if exists
+            tidx_src = vcf_concat + ".tbi"
+            if os.path.exists(tidx_src):
+                tidx_dst = final_vcf_out + ".tbi"
+                shutil.copy(tidx_src, tidx_dst)
+
+    else:
+        cmd = ' '.join(cmd)# subprocess.call takes string
+        LOG.info("Executing %s\n" % (cmd))
+        if subprocess.call(cmd, shell=True):
+            LOG.fatal("Final filtering command failed."
+            " Commmand was %s" % (cmd))
+            sys.exit(1)
+
+    # remove temp files/dir
+    if False:
+        LOG.warn("Not deleting tmp dir %s" % tmp_dir)
+    else:
+        shutil.rmtree(tmp_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/scripts/lofreq2_local.py b/src/scripts/lofreq2_local.py
new file mode 100644
index 0000000..ea270a8
--- /dev/null
+++ b/src/scripts/lofreq2_local.py
@@ -0,0 +1,31 @@
+# add local dir to path to make source dir, i.e. not installed scripts
+# work straight-away
+
+import sys
+import os
+
+# Set sys.path/PYTHONPATH such that we find the local source dir first
+# by using: from lofreq_star import ...
+#d = os.path.normpath(os.path.join(
+#    os.path.dirname(sys.argv[0]), '..'))
+#if os.path.exists(os.path.join(d, "lofreq_star")):
+#    #sys.stderr.write("NOTE: Adding local dir %s to PYTHONPATH\n" % d)
+#    sys.path.insert(0, d)
+
+# Set PATH such that we find lofreq binary first
+d = os.path.normpath(os.path.join(
+    os.path.dirname(sys.argv[0]), '../lofreq'))
+if os.path.exists(os.path.join(d, 'lofreq')):
+    #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d)
+    os.environ["PATH"] = d + os.pathsep + os.environ["PATH"]
+
+# In theory need to find scripts because the main binary knows about them. However, there are circular cases where script call the binary which then can't find the scripts again (e.g. in parallel wrapper),so:
+#
+#d = os.path.normpath(os.path.join(
+#    os.path.dirname(sys.argv[0]), '../tools/scripts'))
+#if os.path.exists(d):
+#    #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d)
+#    os.environ["PATH"] = d + os.pathsep + os.environ["PATH"]
+    
+
+        
diff --git a/src/scripts/lofreq2_somatic.py b/src/scripts/lofreq2_somatic.py
new file mode 100755
index 0000000..5e5e44a
--- /dev/null
+++ b/src/scripts/lofreq2_somatic.py
@@ -0,0 +1,844 @@
+#!/usr/bin/env python
+"""LoFreq* Somatic SNV Caller: Predict somatic variants from a paired
+normal/disease sample.
+
+The script will produce several output files using the prefix specified.
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2013,2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+#--- standard library imports
+#
+import sys
+import logging
+import os
+import argparse
+import subprocess
+import tempfile
+from socket import gethostname
+
+#--- third-party imports
+#
+#/
+
+#--- project specific imports
+#
+# sets PATH so that local scripts/binary is used if present, i.e.
+# stuff can be run without installing it
+#
+try:
+    import lofreq2_local
+except ImportError:
+    pass
+
+
+
+#global logger
+# http://docs.python.org/library/logging.html
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+
+def bam_index_exists(bam):
+    """check if an index for an BAM file exists
+    """
+    for f in [bam + ".bai", os.path.splitext(bam)[0] + ".bai"]:
+        if os.path.exists(f):
+            return True
+    return False
+
+
+
+class SomaticSNVCaller(object):
+    """Somatic SNV caller using LoFreq
+    """
+
+    VCF_NORMAL_RLX_EXT = "normal_relaxed.vcf.gz"
+    VCF_NORMAL_RLX_LOG_EXT = "normal_relaxed.log"
+    VCF_NORMAL_STR_EXT = "normal_stringent.snvs.vcf.gz"
+    VCF_INDELS_NORMAL_STR_EXT = "normal_stringent.indels.vcf.gz"
+    #
+    VCF_TUMOR_RLX_EXT = "tumor_relaxed.vcf.gz"
+    VCF_TUMOR_RLX_LOG_EXT = "tumor_relaxed.log"
+    VCF_TUMOR_STR_EXT = "tumor_stringent.snvs.vcf.gz"
+    VCF_INDELS_TUMOR_STR_EXT = "tumor_stringent.indels.vcf.gz"
+    #
+    VCF_SOMATIC_RAW_EXT = "somatic_raw.snvs.vcf.gz"
+    VCF_INDELS_SOMATIC_RAW_EXT = "somatic_raw.indels.vcf.gz"
+    VCF_SOMATIC_FINAL_EXT = "somatic_final.snvs.vcf.gz"
+    VCF_INDELS_SOMATIC_FINAL_EXT = "somatic_final.indels.vcf.gz"
+    VCF_SOMATIC_FINAL_WO_DBSNP_EXT = "somatic_final_minus-dbsnp.snvs.vcf.gz"
+    VCF_INDELS_SOMATIC_FINAL_WO_DBSNP_EXT = "somatic_final_minus-dbsnp.indels.vcf.gz"
+    #
+    VCF_GERMLINE_EXT = "germline.snvs.vcf.gz"
+    VCF_GERMLINE_INDELS_EXT = "germline.indels.vcf.gz"
+
+    LOFREQ = 'lofreq'
+
+    # call parameters for relaxed calls in normal and tumor
+    DEFAULT_ALPHA_N = 0.10
+    DEFAULT_ALPHA_T = 0.01
+    # tumor only
+    DEFAULT_SRC_QUAL_ON = True
+    DEFAULT_SRC_QUAL_IGN_VCF = None
+    DEFAULT_MIN_COV = 7
+    DEFAULT_USE_ORPHAN = False# always on for normal
+    DEFAULT_BAQ_OFF = False# always off for normal
+
+    # stringent parameters for tumor
+    DEFAULT_MTC_T = 'bonf'
+    DEFAULT_MTC_ALPHA_T = 1
+    DEFAULT_INDEL_MTC_T = 'bonf'
+    DEFAULT_INDEL_MTC_ALPHA_T = 0.01# conservative value reduces dep on dbsnp
+
+    # stringent parameters for normal (only used for sq)
+    DEFAULT_MTC_N = 'fdr'
+    DEFAULT_MTC_ALPHA_N = 0.01
+
+    # uniq parameters
+    DEFAULT_SNV_UNIQ_MTC = 'fdr'
+    DEFAULT_SNV_UNIQ_MTC_ALPHA = 0.001
+    DEFAULT_INDEL_UNIQ_MTC = 'fdr'
+    DEFAULT_INDEL_UNIQ_MTC_ALPHA = 0.0001
+
+    # misc
+    DEFAULT_CALL_INDELS = False
+    DEFAULT_NUM_THREADS = 1
+    DEFAULT_DO_GERMLINE = False
+    DEFAULT_SB_MTC_ALPHA = 0.001
+    DEFAULT_MAX_COV = 100000
+
+    def __init__(self, bam_n, bam_t, ref, outprefix,
+                 bed=None, dbsnp=None, continue_interrupted=False):
+        """init function
+        """
+
+        assert all([bam_n, bam_t, ref, outprefix]), (
+            "Missing mandatory arguments")
+
+        # make sure infiles exist
+        #
+        infiles = [bam_n, bam_t, ref]
+        if bed:
+            infiles.append(bed)
+        if dbsnp:
+            infiles.append(dbsnp)
+        for f in infiles:
+            assert os.path.exists(f), (
+                "File %s does not exist" % f)
+
+        self.bam_n = bam_n
+        self.bam_t = bam_t
+        self.ref = ref
+        self.bed = bed
+        self.dbsnp = dbsnp
+        self.outprefix = outprefix
+
+        # continue interrupted program. use with caution. existing
+        # files will be treated as coming from a successfully
+        # completed process run with the same options as here
+        self.continue_interrupted = continue_interrupted
+
+        # setup output files
+        #
+        self.vcf_n_rlx = self.outprefix + self.VCF_NORMAL_RLX_EXT
+        self.vcf_n_rlx_log = self.outprefix + self.VCF_NORMAL_RLX_LOG_EXT
+        self.vcf_n_str = self.outprefix + self.VCF_NORMAL_STR_EXT
+        self.vcf_indels_n_str = self.outprefix + self.VCF_INDELS_NORMAL_STR_EXT
+        #
+        self.vcf_t_rlx = self.outprefix + self.VCF_TUMOR_RLX_EXT
+        self.vcf_t_rlx_log = self.outprefix + self.VCF_TUMOR_RLX_LOG_EXT
+        self.vcf_t_str = self.outprefix + self.VCF_TUMOR_STR_EXT
+        self.vcf_indels_t_str = self.outprefix + self.VCF_INDELS_TUMOR_STR_EXT
+        #
+        self.vcf_som_raw = self.outprefix + self.VCF_SOMATIC_RAW_EXT
+        self.vcf_indels_som_raw = self.outprefix + self.VCF_INDELS_SOMATIC_RAW_EXT
+        self.vcf_som_fin = self.outprefix + self.VCF_SOMATIC_FINAL_EXT
+        self.vcf_indels_som_fin = self.outprefix + self.VCF_INDELS_SOMATIC_FINAL_EXT
+        self.vcf_som_fin_wo_dbsnp = self.outprefix + self.VCF_SOMATIC_FINAL_WO_DBSNP_EXT
+        self.vcf_indels_som_fin_wo_dbsnp = self.outprefix + self.VCF_INDELS_SOMATIC_FINAL_WO_DBSNP_EXT
+        #
+        self.vcf_germl = self.outprefix + self.VCF_GERMLINE_EXT
+        self.vcf_germl_indels = self.outprefix + self.VCF_GERMLINE_INDELS_EXT
+
+        self.call_rlx_extra_args = None
+
+        # make sure output files don't exist if we are not in
+        # 'continue' mode
+        #
+        self.outfiles = []
+        self.outfiles = [self.vcf_n_rlx, self.vcf_n_rlx_log, self.vcf_n_str, self.vcf_indels_n_str,
+                         self.vcf_t_rlx, self.vcf_t_rlx_log, self.vcf_t_str, self.vcf_indels_t_str,
+                         self.vcf_som_raw, self.vcf_som_fin,
+                         self.vcf_indels_som_raw, self.vcf_indels_som_fin,
+                         self.vcf_som_fin_wo_dbsnp, self.vcf_indels_som_fin_wo_dbsnp,
+                         self.vcf_germl, self.vcf_germl_indels]
+        if not self.continue_interrupted:
+            for f in self.outfiles:
+                assert not os.path.exists(f), (
+                    "Cowardly refusing to overwrite already existing file %s" % f)
+
+        # other params
+        self.alpha_n = self.DEFAULT_ALPHA_N
+        self.alpha_t = self.DEFAULT_ALPHA_T
+
+        self.mtc_t = self.DEFAULT_MTC_T
+        self.mtc_alpha_t = self.DEFAULT_MTC_ALPHA_T
+        self.indel_mtc_t = self.DEFAULT_MTC_T
+        self.indel_mtc_alpha_t = self.DEFAULT_MTC_ALPHA_T
+
+        # stringent normal (SQ ign. only)
+        self.mtc_n = self.DEFAULT_MTC_N
+        self.mtc_alpha_n = self.DEFAULT_MTC_ALPHA_N
+
+        self.snv_uniq_mtc = self.DEFAULT_SNV_UNIQ_MTC
+        self.snv_uniq_mtc_alpha = self.DEFAULT_SNV_UNIQ_MTC_ALPHA
+        self.indel_uniq_mtc = self.DEFAULT_INDEL_UNIQ_MTC
+        self.indel_uniq_mtc_alpha = self.DEFAULT_INDEL_UNIQ_MTC_ALPHA
+
+        self.src_qual_on = self.DEFAULT_SRC_QUAL_ON
+        self.src_qual_ign_vcf = self.DEFAULT_SRC_QUAL_IGN_VCF
+        self.min_cov = self.DEFAULT_MIN_COV
+        self.use_orphan = self.DEFAULT_USE_ORPHAN
+        self.baq_off = self.DEFAULT_BAQ_OFF
+        self.num_threads = self.DEFAULT_NUM_THREADS
+        self.call_indels = self.DEFAULT_CALL_INDELS
+        self.do_germline = self.DEFAULT_DO_GERMLINE
+        self.sb_mtc_alpha = self.DEFAULT_SB_MTC_ALPHA
+        self.max_cov = self.DEFAULT_MAX_COV
+
+
+    @staticmethod
+    def subprocess_wrapper(cmd, close_tmp=True):
+        """Wrapper for subprocess.check_call
+
+        Returns (rewound) fh for cmd stdout and stderr if close_tmp is
+        False. Caller will then have to closer upon which the files
+        will be deleted automatically.
+        """
+
+        assert isinstance(cmd, list)
+        fh_stdout = tempfile.TemporaryFile()
+        fh_stderr = tempfile.TemporaryFile()
+
+        try:
+            LOG.info("Executing %s", ' '.join(cmd))
+            subprocess.check_call(cmd, stdout=fh_stdout, stderr=fh_stderr)
+        except subprocess.CalledProcessError as e:
+            LOG.fatal("The following command failed with code %d: %s" % (
+                e.returncode, ' '.join(cmd)))
+            try:
+                fh_stderr.seek(0)
+                LOG.fatal("Received the following on stderr:")
+                for line in fh_stderr:
+                    sys.stderr.write(line + "\n")
+            except:
+                pass
+            raise
+        except OSError as e:
+            LOG.fatal("The following command failed: %s (%s)" % (
+                ' '.join(cmd), str(e)))
+            LOG.fatal("Maybe the lofreq binary is not in your PATH")
+            raise
+
+        if close_tmp:
+            fh_stdout.close()
+            fh_stderr.close()
+            return (None, None)
+        else:
+            # will be destroyed upon closing, i.e. caller has to close!
+            fh_stdout.seek(0)
+            fh_stderr.seek(0)
+            return (fh_stdout, fh_stderr)
+
+
+    @staticmethod
+    def num_tests_from_log(stream):
+        """Extract number of performed SNV and indel tests from log file"""
+
+        num_subst_tests = -1
+        num_indel_tests = -1
+        for l in stream:
+            if l.startswith('Number of substitution tests performed'):
+                num_subst_tests = int(l.split(':')[1])
+            elif l.startswith('Number of indel tests performed'):
+                num_indel_tests = int(l.split(':')[1])
+            if num_subst_tests != -1 and num_indel_tests != -1:
+                break
+        if num_subst_tests == -1 and num_indel_tests == -1:
+            LOG.error("Couldn't parse number of tests from reused log")
+            raise ValueError
+        return (num_subst_tests, num_indel_tests)
+
+
+    def call_rlx(self, sample_type):
+        """Relaxed calling of variants in normal or tumor. Calls indels and
+        substitutions! Can be prevented by setting call_rlx_extra_args
+        accordingly.
+
+        """
+
+        assert sample_type in ['normal', 'tumor']
+
+        # shared arguments for both sample types
+        #
+        if self.num_threads < 2:
+            cmd = [self.LOFREQ, 'call']
+        else:
+            cmd = [self.LOFREQ, 'call-parallel',
+                   '--pp-threads', "%d" % self.num_threads]
+        cmd.extend(['-d', "%d" % int(self.max_cov*1.01)])
+        cmd.extend(['-f', self.ref])
+        cmd.append('--verbose')
+        cmd.append('--no-default-filter')# we filter later explicitely
+        cmd.extend(['-b', "%d" % 1])# bonferroni factor 1
+        if self.bed:
+            cmd.extend(['-l', self.bed])
+
+        if self.call_indels:
+            cmd.append("--call-indels")
+        if self.call_rlx_extra_args:
+            cmd.extend(self.call_rlx_extra_args)
+
+        # sample type specific arguments
+        #
+        if sample_type == "normal":
+            cmd.extend(['-a', "%f" % self.alpha_n])
+            cmd.append('--use-orphan')
+            cmd.append('-B')# BAQ off
+            cmd.append('-N')# MQ off
+            cmd.append('-A')# IDAQ off
+
+            out_vcf = self.vcf_n_rlx
+            out_log = self.vcf_n_rlx_log
+            cmd.append(self.bam_n)
+
+        elif sample_type == "tumor":
+            cmd.extend(['-a', "%f" % self.alpha_t])
+            if self.use_orphan:
+                cmd.append('--use-orphan')
+            if self.baq_off:
+                cmd.append('-B')
+            cmd.extend(['-C', "%d" % self.min_cov])
+            if self.src_qual_on:
+                cmd.append('-s')
+            if self.src_qual_ign_vcf:
+                cmd.extend(['-S', self.src_qual_ign_vcf])
+            out_vcf = self.vcf_t_rlx
+            out_log = self.vcf_t_rlx_log
+            cmd.append(self.bam_t)
+
+        else:
+            raise ValueError(sample_type)
+
+        # out_vcf now set
+        cmd.extend(['-o', out_vcf])
+
+
+        # before we actually do anything check existance of output
+        # files and whether we should reuse them
+        #
+        if self.continue_interrupted:
+            if os.path.exists(out_vcf):
+                assert os.path.exists(out_log), (
+                    "%s exists but %s is missing." % (out_vcf, out_log))
+                LOG.info("Skipping rlx call on %s" % sample_type)
+
+                LOG.info("Parsing number of tests from log file %s" % out_log)
+                fh = open(out_log, 'r')
+                elines = [l.replace("stderr: ", "") for l in fh.readlines()]
+                fh.close()
+                return self.num_tests_from_log(elines)
+            else:
+                assert not os.path.exists(out_log)
+
+        (o, e) = self.subprocess_wrapper(cmd, close_tmp=False)
+        fh = open(out_log, 'w')
+        fh.write('# %s\n' % ' '.join(cmd))
+        olines = o.readlines()
+        elines = e.readlines()
+        for l in elines:
+            fh.write("stderr: %s" % l)
+            LOG.info("cmd stderr: %s" % l.rstrip())
+        for l in olines:
+            fh.write("stdout: %s" % l)
+        fh.close()
+        o.close()
+        e.close()
+
+        return self.num_tests_from_log(elines)
+
+
+    def rlx_to_str(self, sample_type, (num_snv_tests, num_indel_tests)):
+        """Using tumor filtering settings to create stringent calls
+        from relaxed calls
+        """
+
+        assert sample_type in ['normal', 'tumor']
+
+        # filtering stringently using tumor stringent settings
+        if sample_type == "normal":
+            vcf_rlx = self.vcf_n_rlx
+            vcf_str = self.vcf_n_str
+            vcf_indels_str = self.vcf_indels_n_str
+
+            mtc = self.mtc_n
+            mtc_alpha = self.mtc_alpha_n
+            indel_mtc = mtc
+            indel_mtc_alpha = mtc_alpha
+
+        elif sample_type == "tumor":
+            vcf_rlx = self.vcf_t_rlx
+            vcf_str = self.vcf_t_str
+            vcf_indels_str = self.vcf_indels_t_str
+
+            mtc = self.mtc_t
+            mtc_alpha = self.mtc_alpha_t
+            indel_mtc = self.indel_mtc_t
+            indel_mtc_alpha = self.indel_mtc_alpha_t
+        else:
+            raise ValueError(sample_type)
+
+        # filter indels and snvs separately
+        #
+        filter_base_cmd = [
+            self.LOFREQ, 'filter', '-i', vcf_rlx,
+            '--sb-mtc', 'fdr', '--sb-alpha', '%f' % self.sb_mtc_alpha,
+            '--cov-max', "%d" % self.max_cov,
+            '--cov-min', '%d' % self.min_cov]
+        filter_snv_cmd = filter_base_cmd + [
+            '--only-snvs',
+            '--snvqual-mtc', "%s" % mtc,
+            '--snvqual-alpha', '%f' % mtc_alpha,
+            '--snvqual-ntests', '%d' % num_snv_tests]
+        filter_indel_cmd = filter_base_cmd + [
+            '--only-indels',
+            '--indelqual-mtc', "%s" % indel_mtc,
+            '--indelqual-alpha', '%f' % indel_mtc_alpha,
+            '--indelqual-ntests', '%d' % num_indel_tests]
+
+        for (vcf_out, cmd) in [(vcf_str, filter_snv_cmd),
+                               (vcf_indels_str, filter_indel_cmd)]:
+            if self.continue_interrupted and os.path.exists(vcf_out):
+                LOG.info('Reusing %s' % (vcf_out))
+            else:
+                cmd = cmd + ["-o", vcf_out]
+                self.subprocess_wrapper(cmd)
+
+
+    def call_germline(self):
+        """Call germline variants by taking the intersection between
+        the stringent tumor and relaxed normal calls
+
+        WARNING this is ad-hoc. There is no further downstream
+        filtering and we're using the meta-info from the vcf_n_rlx
+        entries.
+        """
+
+        cmd = [self.LOFREQ, 'vcfset',
+               '-a', 'intersect',
+               '-1', self.vcf_n_rlx, '-2', self.vcf_t_str,
+               '-o', self.vcf_germl]
+        cmd = [self.LOFREQ, 'vcfset',
+               '-a', 'intersect',
+               '-1', self.vcf_n_rlx, '-2', self.vcf_indels_t_str,
+               '-o', self.vcf_germl_indels]
+        self.subprocess_wrapper(cmd)
+
+
+    def remove_normal(self):
+        """Produce complement of tumor and normal variants and add SOMATIC tag
+        """
+
+        vcfset_base_cmd = [self.LOFREQ, 'vcfset', '-a', 'complement',
+                            '-2', self.vcf_n_rlx, '--add-info', 'SOMATIC']
+        vcfset_snv_cmd = vcfset_base_cmd + [
+            '--only-snvs', '-1', self.vcf_t_str, '-o', self.vcf_som_raw]
+        vcfset_indels_cmd = vcfset_base_cmd + [
+            '--only-indels', '--only-pos', '-1', self.vcf_indels_t_str,
+            '-o', self.vcf_indels_som_raw]
+
+        for (vcf_out, cmd) in [(self.vcf_som_raw, vcfset_snv_cmd),
+                               (self.vcf_indels_som_raw, vcfset_indels_cmd)]:
+            if self.continue_interrupted and os.path.exists(vcf_out):
+                LOG.info('Reusing %s' % self.vcf_som_raw)
+                continue
+            else:
+                assert not os.path.exists(vcf_out), (
+                    "%s already exists. Please remove or run me with"
+                    " --continue if you want to reuse this file" % vcf_out)
+            self.subprocess_wrapper(cmd)
+
+
+    def uniq(self):
+        """Run LoFreq uniq as final check on somatic variants
+        """
+
+        uniq_base_cmd = [self.LOFREQ, 'uniq', '--uni-freq', "0.5"]
+
+
+        uniq_snv_cmd = uniq_base_cmd + [
+            "-v", self.vcf_som_raw,
+            '--uniq-mtc', self.snv_uniq_mtc,
+            '--uniq-alpha', "%s" % self.snv_uniq_mtc_alpha]
+        uniq_indels_cmd = uniq_base_cmd + [
+            "-v", self.vcf_indels_som_raw,
+            '--uniq-mtc', self.indel_uniq_mtc,
+            '--uniq-alpha', "%s" % self.indel_uniq_mtc_alpha]
+
+        for (vcf_out, cmd) in [(self.vcf_som_fin, uniq_snv_cmd),
+                               (self.vcf_indels_som_fin, uniq_indels_cmd)]:
+
+            if self.continue_interrupted and os.path.exists(vcf_out):
+                LOG.info('Reusing %s' % vcf_out)
+                continue
+            else:
+                assert not os.path.exists(vcf_out), (
+                    "%s already exists. Please remove or run me with"
+                    " --continue if you want to reuse this file" % vcf_out)
+
+            cmd.extend(['-o', vcf_out])
+            cmd.append(self.bam_n)
+
+            (o, e) = self.subprocess_wrapper(cmd, close_tmp=False)
+            for l in e.readlines():
+                LOG.warn("uniq stderr: %s" % l)
+            o.close()
+            e.close()
+
+
+    def remove_dbsnp(self):
+        """Remove dbSNP from 'final' somatic calls
+        """
+
+        complement_base_cmd = [self.LOFREQ, 'vcfset',
+                               '-a', 'complement',
+                               '-2', self.dbsnp]
+        complement_snv_cmd = complement_base_cmd + [
+            '-1', self.vcf_som_fin, '--only-snvs']
+        complement_indels_cmd = complement_base_cmd + [
+            '-1', self.vcf_indels_som_fin, '--only-pos', '--only-indels']
+
+        for (vcf_out, cmd) in [(self.vcf_som_fin_wo_dbsnp, complement_snv_cmd),
+                               (self.vcf_indels_som_fin_wo_dbsnp, complement_indels_cmd)]:
+
+            if self.continue_interrupted and os.path.exists(vcf_out):
+                LOG.info('Reusing %s' % vcf_out)
+                return
+            else:
+                assert not os.path.exists(vcf_out), (
+                    "%s already exists. Please remove or"
+                    " run me with --continue if you want to reuse this file" % vcf_out)
+
+            cmd.extend(["-o", vcf_out])
+            self.subprocess_wrapper(cmd)
+
+
+    def run(self):
+        """Run the whole somatic SNV calling pipeline
+
+        Will raise an exception on error
+        """
+
+        LOG.info("Running on %s" % gethostname())
+
+        # sanity checks
+        #
+        for b in [self.bam_n, self.bam_t]:
+            if not bam_index_exists(b):
+                LOG.fatal("BAM file %s is not indexed."
+                          " Please create the index first"
+                          " with e.g. samtools index (or use lofreq)" % (b))
+                return ValueError
+
+        if self.src_qual_ign_vcf and not self.src_qual_on:
+            LOG.fatal("ign-vcf file was provided, but src-qual is off")
+            return ValueError
+
+
+        for (k, v) in [(x, self.__getattribute__(x)) for x in dir(self)
+                       if not x.startswith('_')]:
+            if callable(v):
+                continue
+            LOG.debug("%s %s" % (k, v))
+        #import pdb; pdb.set_trace()
+
+
+        try:
+            (num_subst_tests, num_indel_tests) = self.call_rlx("normal")
+            self.rlx_to_str("normal", (num_subst_tests, num_indel_tests))
+
+            (num_subst_tests, num_indel_tests) = self.call_rlx("tumor")
+            self.rlx_to_str("tumor", (num_subst_tests, num_indel_tests))
+        except:
+            #return False
+            raise
+
+        self.remove_normal()
+        self.uniq()
+        if self.dbsnp:
+            self.remove_dbsnp()
+
+        if self.do_germline:
+            self.call_germline()
+
+        # FIXME add source line (sys.argv) in final outputs
+
+
+
+def cmdline_parser():
+    """Returns an argparse instance
+    """
+
+    # http://docs.python.org/dev/howto/argparse.html
+    parser = argparse.ArgumentParser(prog="lofreq somatic",
+                                     description=__doc__)
+
+    basic = parser.add_argument_group('Basic Options')
+
+    basic.add_argument("-v", "--verbose",
+                        action="store_true",
+                        help="Be verbose")
+    basic.add_argument("-n", "--normal",
+                        required=True,
+                        help="Normal BAM file")
+    basic.add_argument("-t", "--tumor",
+                        required=True,
+                        help="Tumor BAM file")
+    basic.add_argument("-o", "--outprefix",
+                        required=True,
+                        help="Prefix for output files")
+    basic.add_argument("-f", "--ref",
+                        required=True,
+                        help="Reference fasta file")
+    basic.add_argument("-l", "--bed",
+                        help="BED file listing regions to restrict analysis to")
+    basic.add_argument("-d", "--dbsnp",
+                        help="vcf-file (bgzipped and index with tabix)"
+                       " containing known germline variants (e.g. dbsnp for human")
+
+    default = SomaticSNVCaller.DEFAULT_NUM_THREADS
+    basic.add_argument("--threads",
+                        type=int,
+                        default=default,
+                        dest="num_threads",
+                        help="Use this many threads for each call")
+
+    ###
+
+    advanced = parser.add_argument_group('Advanced Options (PLEASE read the documentation before changing any of these)')
+
+    default = SomaticSNVCaller.DEFAULT_MTC_T
+    choices = ['bonf', 'holm-bonf', 'fdr']
+    advanced.add_argument("--tumor-mtc",
+                        #required=True,
+                        default=default,
+                        choices=choices,
+                        help="Type of multiple testing correction for tumor"
+                        " (default: %s)" % default)
+
+    default = SomaticSNVCaller.DEFAULT_MTC_ALPHA_T
+    advanced.add_argument("--tumor-mtc-alpha",
+                        #required=True,
+                        default=default,
+                        type=float,
+                        help="Multiple testing correction alpha for tumor"
+                        " (default: %f)" % default)
+
+    default = SomaticSNVCaller.DEFAULT_INDEL_MTC_T
+    choices = ['bonf', 'holm-bonf', 'fdr']
+    advanced.add_argument("--indel-tumor-mtc",
+                        #required=True,
+                        default=default,
+                        choices=choices,
+                        help="Type of multiple testing correction for tumor"
+                        " (default: %s)" % default)
+
+    default = SomaticSNVCaller.DEFAULT_INDEL_MTC_ALPHA_T
+    advanced.add_argument("--indel-tumor-mtc-alpha",
+                        #required=True,
+                        default=default,
+                        type=float,
+                        help="Multiple testing correction alpha for tumor"
+                        " (default: %f)" % default)
+
+    advanced.add_argument("--call-indels",
+                        action="store_true",
+                        help="Also call indels (see documentation  on how to preprocess your BAM files)")
+
+
+    default = SomaticSNVCaller.DEFAULT_MIN_COV
+    advanced.add_argument("--min-cov",
+                        type=int,
+                        default=default,
+                        help="Minimum coverage for somatic calls"
+                        " (default: %d)" % default)
+
+    advanced.add_argument("--germline",
+                        action="store_true",
+                        help="Also list germline calls in separate file")
+
+
+    ###
+
+    experts = parser.add_argument_group('Experts (PLEASE do not use/change these, unless you know exactly what you are doing and'
+                                        ' if you change them nevertheless, light a candle first)')
+
+    default = SomaticSNVCaller.DEFAULT_ALPHA_N
+    experts.add_argument("--normal-alpha",
+                        #required=True,
+                        default=default,
+                        type=float,
+                        help=argparse.SUPPRESS,
+                        #help="Significance threshold (alpha) for SNV pvalues"
+                        #"  in (relaxed) normal vcf"
+                        #" (default: %f)" % default
+                     )
+    default = SomaticSNVCaller.DEFAULT_ALPHA_T
+    experts.add_argument("--tumor-alpha",
+                         #required=True,
+                         default=default,
+                         type=float,
+                         help=argparse.SUPPRESS,
+                         #help="Significance threshold (alpha) for SNV pvalues"
+                         #"  in (relaxed) tumor vcf"
+                         #" (default: %f)" % default
+                     )
+    default = "normal"
+    experts.add_argument("-S", "--ign-vcf",
+                        default=default,
+                        help="Ignore variants in this vcf-file for source"
+                        " quality computation in tumor (collides with "
+                        " --no-src-qual). Default is to use (stringently"
+                         " filtered) predictions in normal sample")
+
+    experts.add_argument("--use-orphan",
+                              action="store_true",
+                              help="Use orphaned/anomalous reads from pairs"
+                              " in all samples")
+    experts.add_argument("--baq-off",
+                              action="store_true",
+                              help="Switch use of BAQ off in all samples")
+    experts.add_argument("--call-rlx-extra-args",
+                              dest="call_rlx_extra_args",
+                              help="Extra arguments to call_rlx (replace dashes with @)")
+
+    experts.add_argument("--no-src-qual",
+                        action="store_true",
+                        help="Disable use of source quality in tumor (see also -V)")
+    experts.add_argument("--debug",
+                          action="store_true",
+                          help="Enable debugging")
+    experts.add_argument("--continue",
+                              dest="continue_interrupted",
+                              action="store_true",
+                              help="continue interrupted run. Will reuse"
+                              " existing files, assuming they are complete"
+                              " and created with identical options!")
+
+    return parser
+
+
+
+def main():
+    """The main function
+    """
+
+    parser = cmdline_parser()
+    args = parser.parse_args()
+
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)
+
+    for (in_file, descr) in [(args.normal, "BAM file for normal tissue"),
+                             (args.tumor, "BAM file for tumor tissue")]:
+        if not in_file:
+            LOG.error("%s input file argument missing." % descr)
+            #parser.print_help()
+            sys.exit(1)
+        if not os.path.exists(in_file): # and in_file != "-":
+            LOG.error("file '%s' does not exist.\n" % in_file)
+            #parser.print_help()
+            sys.exit(1)
+
+    LOG.debug("args = %s" % args)
+
+    # check if outdir exists
+    outdir = os.path.dirname(args.outprefix)
+    if outdir != "" and not os.path.exists(outdir):
+        LOG.error("The directory part of the given output prefix points"
+                  " to a non-existing directory: '%s').\n" % (outdir))
+        sys.exit(1)
+
+    if not args.dbsnp:
+        LOG.warn("No dbsnp file given. Using dbsnp is highly recommended"
+                 " when dealing with human data.")
+    elif not os.path.exists(args.dbsnp + ".tbi"):
+        LOG.warn("Looks like dbsnp was not indexed. Please run bgzip and tabix"
+                 " on your dbsnp vcf if 'lofreq somatic' fails and rerun with"
+                 " --continue")
+    try:
+        somatic_snv_caller = SomaticSNVCaller(
+            bam_n=args.normal, bam_t=args.tumor, ref=args.ref,
+            outprefix=args.outprefix, bed=args.bed, dbsnp=args.dbsnp,
+            continue_interrupted=args.continue_interrupted)
+    except AssertionError as e:
+        LOG.fatal("%s" % str(e))
+        sys.exit(1)
+
+    somatic_snv_caller.alpha_n = args.normal_alpha
+    somatic_snv_caller.alpha_t = args.tumor_alpha
+    somatic_snv_caller.mtc_t = args.tumor_mtc
+    somatic_snv_caller.mtc_alpha_t = args.tumor_mtc_alpha
+    somatic_snv_caller.indel_mtc_t = args.indel_tumor_mtc
+    somatic_snv_caller.indel_mtc_alpha_t = args.indel_tumor_mtc_alpha
+    somatic_snv_caller.num_threads = args.num_threads
+    somatic_snv_caller.min_cov = args.min_cov
+    if args.baq_off:
+        somatic_snv_caller.baq_off = True
+    else:
+        somatic_snv_caller.baq_off = False
+    if args.use_orphan:
+        somatic_snv_caller.use_orphan = True
+    else:
+        somatic_snv_caller.use_orphan = False
+    if args.call_indels:
+        somatic_snv_caller.call_indels = True
+    if args.call_rlx_extra_args:
+        extra_args = args.call_rlx_extra_args.replace('@', '-').split(" ")
+        somatic_snv_caller.call_rlx_extra_args = extra_args
+
+    if args.no_src_qual:
+        somatic_snv_caller.src_qual_on = False
+    else:
+        somatic_snv_caller.src_qual_on = True
+        if args.ign_vcf:
+            if args.ign_vcf == "normal":
+                somatic_snv_caller.src_qual_ign_vcf = ",".join([
+                    somatic_snv_caller.vcf_n_str,
+                    somatic_snv_caller.vcf_indels_n_str
+                    ])
+            else:
+                somatic_snv_caller.src_qual_ign_vcf = args.ign_vcf
+
+    somatic_snv_caller.do_germline = args.germline
+
+    try:
+        somatic_snv_caller.run()
+    except:
+        LOG.fatal("Somatic SNV caller failed. Exiting")
+        #raise
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
diff --git a/src/tools/.gitignore b/src/tools/.gitignore
new file mode 100644
index 0000000..c739b43
--- /dev/null
+++ b/src/tools/.gitignore
@@ -0,0 +1,2 @@
+./build
+setup_conf.py
diff --git a/src/tools/Makefile.am b/src/tools/Makefile.am
new file mode 100644
index 0000000..77a0684
--- /dev/null
+++ b/src/tools/Makefile.am
@@ -0,0 +1,32 @@
+# This is really just a wrapper to setup.py since that script takes
+# care of building and installing the Python stuff
+#
+# Python scripts and other source has to be added manually here to
+# make sure they are incorporated when calling 'make dist'
+#
+# How to get this list:
+# source=$(python setup.py --dry-run sdist 2>/dev/null | grep 'hard linking'  | cut -f 3 -d ' ');
+# source="$source setup_conf.py $(ls ./utils/*py)";
+# echo $source | sed -e 's, , \\\n,g' | sed -e 's,^,\t,';
+#
+EXTRA_DIST = setup.py \
+	lofreq_star/__init__.py \
+	lofreq_star/utils.py \
+	scripts/lofreq2_cluster.py \
+	scripts/lofreq2_indel_ovlp.py \
+	scripts/lofreq2_local.py \
+	scripts/lofreq2_vcfplot.py \
+	setup_conf.py.in
+
+all:
+	$(PYTHON) setup.py build
+
+# make sure to actually install stuff via python's setup.py
+install-exec-hook:
+	$(PYTHON) setup.py install --prefix '$(prefix)'
+# FIXME tell users where stuff was installed and how to set PYTHONPATH
+
+# local clean target: call setup.py and remove ./build/
+clean-local:
+	$(PYTHON) setup.py clean
+	rm -rf ./build
diff --git a/src/tools/README b/src/tools/README
new file mode 100644
index 0000000..7badc5c
--- /dev/null
+++ b/src/tools/README
@@ -0,0 +1,2 @@
+Directory for all non-essential utility scripts installed via Python's setup
+tools that depend on project specific or third party modules
diff --git a/src/tools/lofreq_star/__init__.py b/src/tools/lofreq_star/__init__.py
new file mode 100644
index 0000000..334db32
--- /dev/null
+++ b/src/tools/lofreq_star/__init__.py
@@ -0,0 +1,2 @@
+import sys
+assert sys.version_info.major == 2 and sys.version_info.minor == 7, ("Need Python 2.7")
diff --git a/src/tools/lofreq_star/fdr.py b/src/tools/lofreq_star/fdr.py
new file mode 100644
index 0000000..de3d92b
--- /dev/null
+++ b/src/tools/lofreq_star/fdr.py
@@ -0,0 +1,56 @@
+"""FDR routines
+"""
+
+
+__author__ = "Grace Hui Ting Yeo"
+__email__ = "yeohtg at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+
+#--- standard library imports
+#
+from itertools import izip
+
+#--- third-party imports
+#
+# /
+
+#--- project specific imports
+#
+# /
+
+
+def fdr(pvals, a=0.05, n=None):
+    """ 
+    Implementation of the Benjamini-Hochberg procedure.
+    Takes a list of p-values and returns a list of the indices of those p-values that pass. 
+    Does not adjust p-values.  
+    See http://sas-and-r.blogspot.sg/2012/05/example-931-exploring-multiple-testing.html 
+    for pseudocode.
+
+    Test data from : http://udel.edu/~mcdonald/statmultcomp.html
+    >>> import random
+    >>> pvals = [0.6, 0.07, 0.49, 0.2, 0.48, 0.74, 0.68, 0.01, 0.97, 0.38, 0.032, 0.07]
+    >>> random.shuffle(pvals)
+    >>> sorted([pvals[i] for i in fdr(pvals, a=0.20)])
+    [0.01, 0.032]
+    >>> fdr([])
+    []
+    >>> fdr([1])
+    []
+    """
+
+    if n != None:
+        assert n>=len(pvals)
+    else:
+        n=len(pvals)
+        
+    sorted_pvals_indices = sorted(xrange(len(pvals)), key=lambda k:pvals[k])
+    t = next((rank for rank, spi in izip(xrange(len(pvals), 0, -1), 
+                                         reversed(sorted_pvals_indices)) 
+              if pvals[spi] < rank*a/n), None)
+    if t:
+        return sorted_pvals_indices[:t]
+    return []
diff --git a/src/tools/lofreq_star/multiple_testing.py b/src/tools/lofreq_star/multiple_testing.py
new file mode 100644
index 0000000..87f7ab1
--- /dev/null
+++ b/src/tools/lofreq_star/multiple_testing.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python
+"""Commonly used multiple correction routines
+
+Original source: multiple_testing.py from goatools (see below).
+https://github.com/tanghaibao/goatools
+f75455067a7f7aad66f5b229ab514977b70c34d9
+
+AW:
+- Modified to get rid of numpy dependence.
+- Added n argument (for input of clipped pvalues)
+
+Original Authors:
+- Haibao Tang (tanghaibao),
+- Brent Pedersen (brentp),
+- Aurelien Naldi (aurelien-naldi)
+Email: tanghaibao at gmail.com
+License: BSD
+"""
+
+__author__ = "Haibao Tang, Brent Pedersen, Aurelien Naldi"
+__email__ = "tanghaibao at gmail.com"
+#__copyright__ = ""
+__license__ = "BSD"
+
+from itertools import groupby
+
+
+class AbstractCorrection(object):
+    
+    def __init__(self, pvals, a=.05, n=None):
+        self.pvals = self.corrected_pvals = list(pvals)
+
+        # number of multiple tests
+        if n:
+            assert n>len(pvals)
+            self.n = n 
+        else:
+            self.n = len(self.pvals)
+        # type-1 error cutoff for each test   
+        self.a = a                  
+
+        self.set_correction()
+
+    def set_correction(self):
+        # the purpose of multiple correction is to lower the alpha
+        # instead of the canonical value (like .05)
+        pass
+
+
+    
+class Bonferroni(AbstractCorrection):
+    """http://en.wikipedia.org/wiki/Bonferroni_correction
+    >>> ["%.4f" % v for v in Bonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals]
+    ['0.0500', '0.0500', '0.1500', '0.2500', '0.0250']
+    """
+    
+    def set_correction(self):
+        self.corrected_pvals = [pv * self.n
+                                for pv in self.corrected_pvals]
+
+
+        
+class Sidak(AbstractCorrection):
+    """http://en.wikipedia.org/wiki/Bonferroni_correction
+    >>> ["%.8f" % v for v in Sidak([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals]
+    ['0.04898974', '0.04898974', '0.14696923', '0.24494871', '0.02449487']
+    """
+    def set_correction(self):
+        if self.n != 0:
+            correction = self.a * 1. / (1 - (1 - self.a) ** (1. / self.n))
+        else:
+            correction = 1
+        self.corrected_pvals = [pv * correction
+                                for pv in self.corrected_pvals]
+
+
+        
+class HolmBonferroni(AbstractCorrection):
+    """http://en.wikipedia.org/wiki/Holm-Bonferroni_method
+    given a list of pvals, perform the Holm-Bonferroni correction
+    and return the indexes from original list that are significant.
+    (cant use p-value as that may be repeated.)
+    >>> ["%.4f" % v for v in HolmBonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals]
+    ['0.0400', '0.0400', '0.0600', '0.0500', '0.0250']
+    """
+
+    def set_correction(self):
+        if len(self.pvals):
+            for (i, c) in self.generate_significant():
+                self.corrected_pvals[i] *= c
+        
+    def generate_significant(self):
+        pvals = self.pvals
+        pvals_idxs = zip(pvals, xrange(len(pvals)))
+        pvals_idxs.sort()
+
+        #lp = len(self.pvals)
+        lp = self.n
+
+        for pval, idxs in groupby(pvals_idxs, lambda x: x[0]):
+            idxs = list(idxs)
+            for p, i in idxs:
+                if p * 1. / lp < self.a:
+                    yield (i, lp)
+            lp -= len(idxs) 
+
+# also in the original file, but removed here:
+#class FDR
+#def calc_qval
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()        
diff --git a/src/tools/lofreq_star/multiple_testing.py.README b/src/tools/lofreq_star/multiple_testing.py.README
new file mode 100644
index 0000000..b5e2c5e
--- /dev/null
+++ b/src/tools/lofreq_star/multiple_testing.py.README
@@ -0,0 +1,12 @@
+Original source: multiple_testing.py from goatools (see below).
+https://github.com/tanghaibao/goatools
+f75455067a7f7aad66f5b229ab514977b70c34d9
+
+Modified to get rid of numpy dependence.
+
+Original Authors:
+- Haibao Tang (tanghaibao),
+- Brent Pedersen (brentp),
+- Aurelien Naldi (aurelien-naldi)
+Email: tanghaibao at gmail.com
+License: BSD
diff --git a/src/tools/lofreq_star/multiple_testing.py.org b/src/tools/lofreq_star/multiple_testing.py.org
new file mode 100644
index 0000000..5f15199
--- /dev/null
+++ b/src/tools/lofreq_star/multiple_testing.py.org
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+A list of commonly used multiple correction routines
+"""
+
+import sys
+import random
+import fisher
+import numpy as np
+import go_enrichment
+
+
+class AbstractCorrection(object):
+    
+    def __init__(self, pvals, a=.05):
+        self.pvals = self.corrected_pvals = np.array(pvals)
+        self.n = len(self.pvals)    # number of multiple tests
+        self.a = a                  # type-1 error cutoff for each test
+
+        self.set_correction()
+
+    def set_correction(self):
+        # the purpose of multiple correction is to lower the alpha
+        # instead of the canonical value (like .05)
+        pass
+
+
+class Bonferroni(AbstractCorrection):
+
+    """
+    >>> Bonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals
+    array([ 0.05 ,  0.05 ,  0.15 ,  0.25 ,  0.025])
+    """
+    def set_correction(self):
+        self.corrected_pvals *= self.n
+
+
+class Sidak(AbstractCorrection):
+    
+    """http://en.wikipedia.org/wiki/Bonferroni_correction
+    >>> Sidak([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals
+    array([ 0.04898974,  0.04898974,  0.14696923,  0.24494871,  0.02449487])
+    """
+    def set_correction(self):
+        if self.n != 0:
+            correction = self.a * 1. / (1 - (1 - self.a) ** (1. / self.n))
+        else:
+            correction = 1
+        self.corrected_pvals *= correction
+
+
+class HolmBonferroni(AbstractCorrection):
+
+    """http://en.wikipedia.org/wiki/Holm-Bonferroni_method
+    given a list of pvals, perform the Holm-Bonferroni correction
+    and return the indexes from original list that are significant.
+    (cant use p-value as that may be repeated.)
+    >>> HolmBonferroni([0.01, 0.01, 0.03, 0.05, 0.005], a=0.05).corrected_pvals
+    array([ 0.04 ,  0.04 ,  0.06 ,  0.05 ,  0.025])
+    """
+    def set_correction(self):
+        if len(self.pvals):
+            idxs, correction = zip(*self.generate_significant())
+            idxs = list(idxs)
+            self.corrected_pvals[idxs] *= correction
+
+    def generate_significant(self):
+
+        pvals = self.pvals
+        pvals_idxs = zip(pvals, xrange(len(pvals)))
+        pvals_idxs.sort()
+
+        lp = len(self.pvals)
+
+        from itertools import groupby
+        for pval, idxs in groupby(pvals_idxs, lambda x: x[0]):
+            idxs = list(idxs)
+            for p, i in idxs:
+                if p * 1. / lp < self.a:
+                    yield (i, lp)
+            lp -= len(idxs) 
+
+
+class FDR(object):
+    def __init__(self, p_val_distribution, results, a=.05):
+        self.corrected_pvals = fdr = []
+        for rec in results:
+            q = sum(1 for x in p_val_distribution if x < rec.p_uncorrected) \
+                    * 1./len(p_val_distribution)
+            fdr.append(q)
+
+
+
+"""
+Generate a p-value distribution based on re-sampling, as described in:
+http://www.biomedcentral.com/1471-2105/6/168
+"""
+#class FalseDiscoveryRate(AbstractCorrection):
+def calc_qval(study_count, study_n, pop_count, pop_n, pop, assoc, term_pop, obo_dag):
+    print >>sys.stderr, "generating p-value distribution for FDR calculation " \
+            "(this might take a while)"
+    T = 1000 # number of samples
+    distribution = []
+    for i in xrange(T):
+        new_study = random.sample(pop, study_n)
+        new_term_study = go_enrichment.count_terms(new_study, assoc, obo_dag)
+
+        smallest_p = 1
+        for term, study_count in new_term_study.items():
+            pop_count = term_pop[term]
+            p = fisher.pvalue_population(study_count, study_n, pop_count, pop_n)
+            if p.two_tail < smallest_p: smallest_p = p.two_tail
+
+        distribution.append(smallest_p)
+        print >>sys.stderr, i, smallest_p
+    return distribution
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/src/tools/lofreq_star/utils.py b/src/tools/lofreq_star/utils.py
new file mode 100644
index 0000000..9317b81
--- /dev/null
+++ b/src/tools/lofreq_star/utils.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+"""Generic utils for LoFreq
+"""
+
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2011 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+
+#--- standard library imports
+#
+from math import log10, log
+import sys
+from time import strftime
+import string
+
+MAX_INT = 2147483647
+# instead of sys.maxint
+
+#--- third-party imports
+#
+from scipy.stats import chi2
+
+
+#--- project specific imports
+#
+# nothing should go here by definition
+
+
+
+
+#def mean_and_stdv(x):
+#    """
+#    Calculate mean and standard deviation of data x[]:
+#    mean = {\sum_i x_i \over n}
+#    std = sqrt(\sum_i (x_i - mean)^2 \over n-1)
+# 
+#    Based on
+#    http://www.physics.rutgers.edu/~masud/computing/WPark_recipes_in_python.html
+#    """
+# 
+#    num = len(x)
+#    assert num != 0
+#    if num == 1:
+#        return (x[0], 0.0)
+#        
+#    mean = sum(x)/float(num)
+#    std = sum([(a-mean)**2 for a in x])
+#    std = sqrt(std / float(num-1))
+# 
+#    return mean, std
+
+
+
+def now():
+    return strftime("%Y-%m-%d %H:%M:%S")
+
+
+
+def fisher_comb(pv1, pv2):
+    """
+    Fisher's method for combining p-values
+    
+    See for example
+    http://en.wikipedia.org/wiki/Fisher's_method
+    and
+    breseq-0.18b:polymorphism_statistics.r
+    """
+    
+    if pv1 == 0 or pv2 == 0:
+        # not sure if this is correct.
+        # see also http://stats.stackexchange.com/questions/58537/fishers-method-when-p-value-0
+        return 0.0
+    
+    comb_log = -2.0 * (log(pv1) + log(pv2))
+    # http://stackoverflow.com/questions/11725115/p-value-from-chi-sq-test-statistic-in-python
+    comb_pv = 1.0 - chi2.cdf(comb_log, 4)    
+    return comb_pv
+
+
+
+def complement(strand, na_type='DNA'):
+    """return complement of nucleic acid seqeunce
+
+    original source http://stackoverflow.com/questions/1738633/more-pythonic-way-to-find-a-complementary-dna-strand
+    Nadia Alramli
+
+    Added DNA/RNA handling
+    """
+
+    if na_type == 'DNA':
+        tr = string.maketrans('UTAGCutagc', 'AATCGaatcg')
+    elif na_type == 'RNA':
+        tr = string.maketrans('UTAGCutagc', 'AAUCGaaucg')
+    else:
+        raise ValueError, ("Unknown NA type %s" % na_type)
+    return strand.translate(tr)
+
+
+
+def prob_to_phredqual(prob):
+    """
+    Turns an error probability into a phred value
+    
+    >>> prob_to_phredqual(0.01)
+    20
+    
+    """
+
+    assert prob >= 0.0, (
+        "Probability can't be smaller than 0 but got %f" % prob)
+    try:
+        return int(round(-10.0 * log10(prob)))
+    except ValueError:
+        # prob is zero
+        #return sys.maxint
+        return MAX_INT
+
+
+    
+def phredqual_to_prob(phredqual):
+    """
+    Turns a phred quality into an error probability
+
+    >>> '%.2f' % phredqual_to_prob(20)
+    '0.01'
+
+    """
+
+    assert isinstance(phredqual, int)
+    #assert phredqual >= 0, ("Phred-quality must be >= 0, but is %s" % phredqual)
+    # also works for phredqual=0
+    return 10**(-phredqual/10.0)
+
+    
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()        
+            
diff --git a/src/tools/phased_out/lofreq2_filter.py b/src/tools/phased_out/lofreq2_filter.py
new file mode 100755
index 0000000..b1439d8
--- /dev/null
+++ b/src/tools/phased_out/lofreq2_filter.py
@@ -0,0 +1,577 @@
+#!/usr/bin/env python
+"""Apply number of filters to given list of SNVs.
+
+Each filter is applied to all SNVs, i.e. not just the previously
+PASSED ones!
+"""
+
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2013 Genome Institute of Singapore"
+__license__ = "GPL2"
+
+
+#--- standard library imports
+#
+import sys
+import logging
+import os
+# optparse deprecated from Python 2.7 on. need optparse here to mess
+# with the default options if needed.
+from optparse import OptionParser#, SUPPRESS_HELP
+import gzip
+
+
+#--- third-party imports
+#
+#/
+
+#--- project specific imports
+#
+try:
+    import lofreq2_local
+except ImportError:
+    pass
+
+try:
+    from lofreq_star import vcf
+except ImportError:
+    sys.stderr.write("FATAL(%s): Couldn't find LoFreq's vcf module."
+                     " Are you sure your PYTHONPATH is set correctly (= %s)?\n" % (
+                         (sys.argv[0], os.environ['PYTHONPATH'])))
+    sys.exit(1)
+from lofreq_star import multiple_testing
+from lofreq_star import fdr
+from lofreq_star.utils import prob_to_phredqual, phredqual_to_prob, MAX_INT
+
+
+# invocation of ipython on exceptions
+#import sys, pdb
+#from IPython.core import ultratb
+#sys.excepthook = ultratb.FormattedTB(mode='Verbose',
+#                                     color_scheme='Linux', call_pdb=1)
+
+
+#global logger
+# http://docs.python.org/library/logging.html
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+
+def win_filter(snvs_on_cur_chrom, win_size, vcf_info_id):
+    """Makes snv INFO[vcf_info_id] with 0 if there is a neigbouring
+    snv within win_size, otherwise 1
+    """
+
+    # make sure snvs are sorted
+    # snvs_on_cur_chrom = sorted(snvs_on_cur_chrom, 
+    #                            key = lambda x: x.POS)
+    # disabled because possible overkill: lofreq produces ordered
+    # lists by default since BAM file is sorted
+
+    for (ci, cur_snv) in enumerate(snvs_on_cur_chrom):
+        
+        # prev_snv: snv at < pos on same chrom
+        prev_snv = None
+        for pi in reversed(xrange(ci)):
+            tmp = snvs_on_cur_chrom[pi]
+            assert tmp.POS <= cur_snv.POS
+            assert tmp.CHROM == cur_snv.CHROM
+            if tmp.POS != cur_snv.POS:
+                prev_snv = tmp
+                break
+            
+        # next_snv: snv at > pos on same chrom
+        next_snv = None
+        for ni in xrange(ci+1, len(snvs_on_cur_chrom)):
+            tmp = snvs_on_cur_chrom[ni]
+            assert tmp.POS >= cur_snv.POS
+            assert tmp.CHROM == cur_snv.CHROM
+            if tmp.POS != cur_snv.POS:
+                next_snv = tmp
+                break
+            
+        LOG.debug("prev_snv=%d cur_snv=%d next_snv=%d" % (
+            prev_snv.POS if prev_snv else -1,
+            cur_snv.POS,
+            next_snv.POS if next_snv else -1))
+
+        cur_snv.INFO[vcf_info_id] = 1 # pass by default
+        if prev_snv != None:
+            if cur_snv.POS-prev_snv.POS <= win_size:
+                cur_snv.INFO[vcf_info_id] = 0
+        if next_snv != None:
+            if next_snv.POS-cur_snv.POS <= win_size:
+                cur_snv.INFO[vcf_info_id] = 0
+            
+                
+    
+def cmdline_parser():
+    """
+    creates an OptionParser instance
+    """
+
+    # http://docs.python.org/library/optparse.html
+    usage = "%prog [Options]\n" \
+      + "\n" + __doc__
+    parser = OptionParser(usage=usage)
+
+    parser.add_option("-v", "--verbose",
+                      action="store_true",
+                      dest="verbose",
+                      help="be verbose")
+    parser.add_option("", "--debug",
+                      action="store_true",
+                      dest="debug",
+                      help="enable debugging")
+    parser.add_option("-i", "--vcf_in",
+                      dest="vcf_in",
+                      help="Input vcf file (gzip supported; - for stdin).")
+    default = "-"
+    parser.add_option("-o", "--outfile",
+                      dest="vcf_out",
+                      default=default,
+                      help="Output vcf file (gzip supported; - for stdout)."
+                      " Default = %s)" % default)
+
+    parser.add_option("-p", "--pass-only",
+                      action="store_true",
+                      dest="pass_only",
+                      help="Only print PASSed variants")
+
+    default = "holm-bonf"
+    parser.add_option("", "--strandbias",
+                      default=default,
+                      help="Filter variants with strandbias."
+                      " Valid values are 'bonf' (Bonferroni),"
+                      " 'holm-bonf' (Holm-Bonferroni), an integer value"
+                      " or 'off'. If 'bonf' or 'holm-bonf', variants with"
+                      " accordingly corrected strand-bias pvalue"
+                      " < strandbias-alpha will be filtered. If an int"
+                      " was given, variants with strand-bias phred-scores"
+                      " larger than this value will be filtered."
+                      " (default: %s)" % default)
+    default = 0.05
+    parser.add_option("", "--strandbias-alpha",
+                      default=default,
+                      type="float",
+                      help="Alpha/significance-level for strandbias testing."
+                      " (applies only to 'bonf' and 'holm-bonf'; "
+                      " default: %s)" % default)
+    
+    default = 10
+    parser.add_option("", "--min-cov",
+                      dest="min_cov",
+                      type='int',
+                      default=default,
+                      help="Filter variant if coverage is"
+                      " below this value (int; default = %d)" % default)
+    parser.add_option("", "--max-cov",
+                      dest="max_cov",
+                      type='int',
+                      help="Filter variant if coverage is"
+                      " above this cap (int)")
+    parser.add_option("", "--min-af",
+                      dest="min_af",
+                      type="float",
+                      help="Filter if (allele) freq is"
+                      " below this threshold (float)")
+
+    parser.add_option("", "--snv-qual",
+                      help="Filter variants based on quality. Valid values"
+                      " are 'fdr', 'bonf', 'holmbonf' or an integer value."
+                      " If FDR Benjamini-Hochberg correction will be used."
+                      " If 'bonf' Bonferroni- and if 'holm-bonf'"
+                      " Holm-Bonferroni-correction will be used."
+                      " If an int was given, variants with a phred-score"
+                      " below this value will be filtered")
+    parser.add_option("", "--snv-qual-alpha",
+                      type="float",
+                      help="Alpha/significance threshold for multiple testing"
+                      " correction routines during SNV quality filtering."
+                      " Only applies to 'bonf', 'holm-bonf' and 'fdr'")
+    parser.add_option("", "--snv-qual-numtests",
+                      type="int",
+                      help="Set number of tests for multiple testing"
+                      " correction routines during SNV quality filtering."
+                      " Only applies to 'fdr', 'bonf' and 'holm-bonf'."
+                      " Defaults to number of pvalues.")
+
+    parser.add_option("", "--window-size",
+                      dest="window_size",
+                      type='int',
+                      help='Ignore variants, if another'
+                      ' variant is present within a window of this size'
+                      ' (ignoring multi-allelic vars at same pos).')
+
+    #parser.add_option("--force",
+    #                  #help=SUPPRESS_HELP,
+    #                  dest="force_overwrite", action="store_true")
+
+    return parser
+
+
+
+def main():
+    """main function
+    """
+
+    tmp_vcf_markup = []
+
+    parser = cmdline_parser()
+
+    # WARNING: undocumented arg to remove all defaults (and the reason
+    # why we have to use OptParse)
+    if '--no-defaults' in sys.argv:
+        for (k, v) in parser.defaults.items():
+            parser.defaults[k] = None
+        sys.argv = [x for x in sys.argv if x != "--no-defaults"]
+
+    (opts, args) = parser.parse_args()
+
+    if len(args):
+        parser.error("Unrecognized arguments found: %s." % (
+            ' '.join(args)))
+        sys.exit(1)
+
+
+    if opts.verbose:
+        LOG.setLevel(logging.INFO)
+    if opts.debug:
+        LOG.setLevel(logging.DEBUG)
+
+    for (in_file, descr) in [(opts.vcf_in, "VCF")]:
+        if not in_file:
+            parser.error("%s input file argument missing." % descr)
+            sys.exit(1)
+        if not os.path.exists(in_file) and in_file != "-":
+            sys.stderr.write(
+                "file '%s' does not exist.\n" % in_file)
+            sys.exit(1)
+
+    for (out_file, descr) in [(opts.vcf_out, "VCF output file")]:
+        if not out_file:
+            parser.error("%s output file argument missing." % descr)
+            sys.exit(1)
+        if os.path.exists(out_file) and out_file!="-":
+            sys.stderr.write("Cowardly refusing to overwrite existing"
+                             " output file '%s'.\n" % out_file)
+            sys.exit(1)
+
+
+    if opts.vcf_in == '-':
+        vcf_reader = vcf.VCFReader(sys.stdin)
+    else:
+        if opts.vcf_in[-3:] == '.gz':
+            vcf_reader = vcf.VCFReader(gzip.open(opts.vcf_in,'r'))
+        else:
+            vcf_reader = vcf.VCFReader(open(opts.vcf_in,'r'))
+    snvs = [r for r in vcf_reader]
+    LOG.info("Parsed %d SNVs from %s" % (len(snvs), opts.vcf_in))
+
+
+    
+    # list of tuples: first element is a filter func, which takes a
+    # snv and a filter-id as input. second is the filter id. variant
+    # will be marked as filtered if func returns True
+    filters = []
+
+    
+    if opts.min_af != None:
+        vcf_filter = vcf._Filter(
+            id=("minaf%f" % opts.min_af).rstrip('0'),
+            desc="Minimum allele frequency")
+        vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+        filters.append((
+            lambda s, f_id: f_id if s.INFO['AF'] < opts.min_af else None,
+            vcf_filter.id
+            ))
+
+
+    if opts.max_cov != None:
+        if not all([s.INFO.has_key('DP') for s in snvs]):
+            LOG.error("At least one SNV was not annotated with depth info (DP)"
+                      " (was this file produced with LoFreq?).")
+            sys.exit(1)
+
+        vcf_filter = vcf._Filter(
+            id="maxcov%d" % opts.max_cov,
+            desc="Maximum coverage")
+        vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+        filters.append((
+            lambda s, f_id: f_id if s.INFO['DP'] > opts.max_cov else None,
+            vcf_filter.id
+            ))
+
+
+    if opts.min_cov != None:
+        if not all([s.INFO.has_key('DP') for s in snvs]):
+            LOG.error("At least one SNV was not annotated with depth info (DP)"
+                      " (was this file produced with LoFreq?).")
+            sys.exit(1)
+
+        vcf_filter = vcf._Filter(
+            id="mincov%d" % opts.min_cov,
+            desc="Minimum coverage")
+        vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+        filters.append((
+            lambda s, f_id: f_id if s.INFO['DP'] < opts.min_cov else None,
+            vcf_filter.id
+            ))
+
+    # structured as opts.snv_qual filtering, but keeps corrected
+    # values.
+    if opts.strandbias != None:
+
+        if opts.strandbias in ['bonf', 'holm-bonf']:
+            if not opts.strandbias_alpha:
+                LOG.fatal("Need alpha/significance threshold for strandbias"
+                          " multiple testing correction")
+                sys.exit(1)
+
+            vcf_filter = vcf._Filter(
+                id="strandbias%s" % opts.strandbias.replace("-", ""),
+                desc="Strand-bias filter (%s corrected < %g)" % (
+                    opts.strandbias, opts.strandbias_alpha))
+            vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+            if opts.strandbias == 'bonf':
+                vcf_info_id = "SBBC"
+            elif opts.strandbias == 'holm-bonf':
+                vcf_info_id = "SBHBC"
+            else:
+                raise ValueError
+            vcf_info = vcf._Info(
+                id=vcf_info_id, num=1, type='Integer',
+                desc="Strand-bias %s corrected" % opts.strandbias)
+            vcf_reader.infos[vcf_info.id] = vcf_info
+
+            try:
+                pvals = (phredqual_to_prob(s.INFO['SB']) for s in snvs)
+            except (KeyError, AssertionError) as e:
+                LOG.error("At least one SNV was not annotated properly with"
+                          " strandbias info (SB)"
+                          " (was this file produced with LoFreq?)"
+                          " You will need to switch strandbias filtering off")
+                sys.exit(1)
+
+            if opts.strandbias == 'bonf':
+                corr_pvals = multiple_testing.Bonferroni(
+                    pvals).corrected_pvals
+            elif opts.strandbias == 'holm-bonf':
+                corr_pvals = multiple_testing.HolmBonferroni(
+                    pvals).corrected_pvals
+            else:
+                raise ValueError
+            for (cp, s) in zip(corr_pvals, snvs):
+                s.INFO[vcf_info.id] = prob_to_phredqual(cp)
+                if s.INFO[vcf_info.id] > MAX_INT:
+                    s.INFO[vcf_info.id] = MAX_INT
+
+            filters.append((
+                lambda s, f_id: f_id if s.INFO[vcf_info.id] > prob_to_phredqual(opts.strandbias_alpha) else None,
+                vcf_filter.id
+                ))
+
+        # int
+        elif opts.strandbias != 'off':
+            try:
+                max_strandbias_phred = int(opts.strandbias)
+                assert max_strandbias_phred >= 0
+            except (ValueError, AssertionError) as e:
+                LOG.fatal("Invalid strandbias argument: %s" % (opts.strandbias))
+                sys.exit(1)
+
+            vcf_filter = vcf._Filter(
+                max_strandbias_phred = int(
+                id="sbp%d" % opts.max_strandbias_phred,
+                desc="Phred-based strand-bias filter (max)"))
+            vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+            filters.append((
+                lambda s, f_id: f_id if float(s.INFO['SB']) > opts.max_strandbias_phred else None,
+                vcf_filter.id
+                ))
+            
+
+    # structured as opts.strandbias filtering, but doesn't keep
+    # corrected values.
+    if opts.snv_qual != None:
+
+        if opts.snv_qual in ['bonf', 'holm-bonf', 'fdr']:
+            if not opts.snv_qual_alpha:
+                LOG.fatal("Need alpha/significance threshold for snv quality"
+                          " multiple testing correction")
+                sys.exit(1)
+
+            vcf_filter = vcf._Filter(
+                id="snvqual%s" % opts.snv_qual.replace("-", ""),
+                desc="SNV quality filter (%s corrected < %g)" % (
+                    opts.snv_qual, opts.snv_qual_alpha))
+            vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+            vcf_info_id = "SNVQUALPASS" # tmp markup
+            tmp_vcf_markup.append(vcf_info_id)
+
+            pvals = []
+            pidx = []
+            for (i, s) in enumerate(snvs):
+                # if qual is not NA, convert to pvalue, else don't
+                # use filter (set filter to NA)
+                if s.QUAL != '.':
+                    pvals.append(phredqual_to_prob(s.QUAL))
+                    pidx.append(i)
+                    s.INFO[vcf_info_id] = 0
+                else:
+                    s.INFO[vcf_info_id] = '.'
+
+            if opts.snv_qual == 'bonf':
+                for (i, p) in enumerate(
+                        multiple_testing.Bonferroni(
+                            pvals, n=opts.snv_qual_numtests).corrected_pvals):
+                    if p <= opts.snv_qual_alpha:
+                        snvs[pidx[i]].INFO[vcf_info_id] = 1
+
+            elif opts.snv_qual == 'holm-bonf':
+                for (i, p) in enumerate(
+                        multiple_testing.HolmBonferroni(
+                            pvals, n=opts.snv_qual_numtests).corrected_pvals):
+                    if p <= opts.snv_qual_alpha:
+                        snvs[pidx[i]].INFO[vcf_info_id] = 1
+ 
+            elif opts.snv_qual == 'fdr':
+                for i in fdr.fdr(pvals, a=opts.snv_qual_alpha, 
+                                 n=opts.snv_qual_numtests):
+                    snvs[pidx[i]].INFO[vcf_info_id] = 1
+
+            else:
+                raise ValueError
+
+            filters.append((
+                lambda s, f_id: f_id if s.INFO[vcf_info_id] != '.' and s.INFO[vcf_info_id] == 0 else None,
+                vcf_filter.id
+                ))
+
+        elif opts.snv_qual != 'off':
+            try:
+                min_qual = int(opts.snv_qual)
+                assert min_qual >= 0
+            except (ValueError, AssertionError) as e:
+                LOG.fatal("Invalid snv quality argument: %s" % (opts.snv_qual))
+                sys.exit(1)
+
+            vcf_filter = vcf._Filter(
+                id="minqual%d" % min_qual,
+                desc="Minimum SNV quality")
+            vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+            filters.append((
+                lambda s, f_id: f_id if s.QUAL != '.' and s.QUAL < min_qual else None,
+                vcf_filter.id
+                ))
+
+
+    if opts.window_size != None:
+        vcf_filter = vcf._Filter(
+            id="snvwin%d" % opts.window_size,
+            desc="SNV window filter (SNVs within %d bp distance)" % (
+                opts.window_size))
+        vcf_reader.filters[vcf_filter.id] = vcf_filter# reader serves as template for writer
+
+        vcf_info_id = "SNVWINPASS" # tmp markup
+        tmp_vcf_markup.append(vcf_info_id)
+
+        snvs_on_cur_chrom = []
+        last_chrom = None
+        seen_chroms = []
+        for (i, cur_snv) in enumerate(snvs): # assumes snvs are sorted by chrom
+            if i == 0:
+                last_chrom = cur_snv.CHROM
+                
+            if cur_snv.CHROM != last_chrom:
+                assert cur_snv.CHROM not in seen_chroms, (
+                    "SNV input not ordered by chromosome."
+                    " Sure this file was procuced by LoFreq?")
+                win_filter(snvs_on_cur_chrom, opts.window_size, vcf_info_id)
+                seen_chroms.append(last_chrom)
+                last_chrom = cur_snv.CHROM
+                snvs_on_cur_chrom = [cur_snv]
+                
+            else:
+                snvs_on_cur_chrom.append(cur_snv)
+
+        # don't forget last chrom
+        win_filter(snvs_on_cur_chrom, opts.window_size, vcf_info_id)
+
+        
+        filters.append((
+            lambda s, f_id: f_id if s.INFO[vcf_info_id] != '.' and s.INFO[vcf_info_id] == 0 else None,
+            vcf_filter.id
+            ))
+            
+
+    # The actual filtering: if filter function returns 1 the
+    # corresponding snv has to be filtered
+    #
+    # FIXME can't this be done easier with map()?
+    #
+    if len(filters) == 0:
+        LOG.error("No filters activated.")
+        sys.exit(1)
+
+    #import pdb; pdb.set_trace()
+    for (filter_func, filter_id) in filters:
+        for (i, s) in enumerate(snvs):
+            f = filter_func(s, filter_id)
+            if f:
+                # just s = s.__replace() can't work
+                if s.FILTER == '.' or s.FILTER == 'PASS':
+                    snvs[i] = s._replace(FILTER=f)
+                else:
+                    snvs[i] = s._replace(FILTER="%s;%s" % (s.FILTER, f))
+
+                        
+    
+    # should all also work if we get already PASSed input
+
+    n_passed = 0
+    for (i, s) in enumerate(snvs):
+        if s.FILTER == '.':
+            snvs[i] = s._replace(FILTER="PASS")
+            n_passed += 1
+    LOG.info("%d SNVs passed all filters." % n_passed)
+
+    # remove temporary markup
+    for tmpkey in tmp_vcf_markup:
+        for s in snvs:
+            if s.INFO.has_key(tmpkey):
+                del s.INFO[tmpkey]
+
+    if opts.pass_only:
+        snvs = (s for s in snvs if s.FILTER == 'PASS')
+
+    if opts.vcf_out == '-':
+        fh_out = sys.stdout
+    else:
+        if opts.vcf_out[-3:] == '.gz':
+            fh_out = gzip.open(opts.vcf_out, 'w')
+        else:
+            fh_out = open(opts.vcf_out, 'w')
+
+    vcf_writer = vcf.VCFWriter(fh_out)
+    vcf_writer.meta_from_reader(vcf_reader)
+    vcf_writer.write(snvs)
+
+    if fh_out != sys.stdout:
+        fh_out.close()
+
+
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
diff --git a/src/tools/phased_out/lofreq2_vcfset.py b/src/tools/phased_out/lofreq2_vcfset.py
new file mode 100755
index 0000000..eaed93e
--- /dev/null
+++ b/src/tools/phased_out/lofreq2_vcfset.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python
+"""Perform 'set' operations on two vcf-files (variant call format).
+
+Two SNV are regarded identical if their chromosome, their position and
+their (ref and) alt base are identical. Note, this definition differs
+from vcftools were the bases are ignored.
+
+The VCF meta information will be copied from first file.
+
+The exact SNV annotation will always be taken from SNV coming from
+first file.
+"""
+
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2013 Genome Institute of Singapore"
+__license__ = "GPL2"
+
+
+#--- standard library imports
+#
+import sys
+import logging
+import os
+import argparse
+import gzip
+
+#--- third-party imports
+#
+#/
+
+#--- project specific imports
+#
+try:
+    import lofreq2_local
+except ImportError:
+    pass    
+
+try:
+    from lofreq_star import vcf
+except ImportError:
+    sys.stderr.write("FATAL(%s): Couldn't find LoFreq's vcf module."
+                     " Are you sure your PYTHONPATH is set correctly (= %s)?\n" % (
+                         (sys.argv[0], os.environ['PYTHONPATH'])))
+    sys.exit(1)
+    
+# invocation of ipython on exceptions
+#import sys, pdb
+#from IPython.core import ultratb
+#sys.excepthook = ultratb.FormattedTB(mode='Verbose',
+#                                     color_scheme='Linux', call_pdb=1)
+
+
+#global logger
+# http://docs.python.org/library/logging.html
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+def key_for_var(var):
+    """FIXME:add-doc"""
+
+    return "%s %d %s %s %s" % (var.CHROM, var.POS, 
+                            var.REF, ''.join(var.ALT), "INDEL" if var.INFO.has_key('INDEL') else "SNV")
+
+
+def get_vcfreader(vcffile):
+    """gzip aware convenience wrapper for vcf.VCFReader
+    """
+    
+    if vcffile[-3:] == '.gz':
+        return vcf.VCFReader(gzip.open(vcffile, 'r'))
+    else:
+        return vcf.VCFReader(open(vcffile, 'r'))
+
+        
+def cmdline_parser():
+    """FIXME:add-doc
+    """
+
+    # http://docs.python.org/dev/howto/argparse.html
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument("-v", "--verbose", 
+                        action="store_true",
+                        help="be verbose")
+    parser.add_argument("--debug", 
+                        action="store_true",
+                        help="enable debugging")
+    parser.add_argument("--ign-filtered", 
+                        action="store_true",
+                        help="only consider passed i.e. un-filtered variants")
+    parser.add_argument("-1", "--vcf1", 
+                        required=True,
+                        help="1st vcf file (gzip supported)")
+    parser.add_argument("-2", "--vcf2", 
+                        required=True,
+                        help="2nd vcf file (gzip supported)")
+    parser.add_argument("-a", "--action", 
+                        required=True, 
+                        choices=['intersect', 'complement'],
+                        help="Set operation to perform. "
+                        " intersect: vcf1 and vcf2."
+                        " complement (rel.): vcf1 \ vcf2")
+    parser.add_argument("-o", "--vcfout", 
+                        default="-",
+                        help="Output file or '-' for stdout (default)."
+                        " Meta-data will be copied from vcf1")
+    return parser
+
+
+
+def main():
+    """FIXME:add-doc
+    """
+    
+    parser = cmdline_parser()
+    args = parser.parse_args()
+    
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)
+
+    if not args.action:
+        LOG.error("Missing action argument")
+        #parser.print_help()
+        sys.exit(1)
+
+    for (in_file, descr) in [(args.vcf1, "1st vcf file"),
+                             (args.vcf2, "2nd vcf file")]:
+        if not in_file:
+            LOG.error("%s input file argument missing." % descr)
+            #parser.print_help()
+            sys.exit(1)
+        if not os.path.exists(in_file): # and in_file != "-":
+            LOG.error("file '%s' does not exist.\n" % in_file)
+            #parser.print_help()
+            sys.exit(1)
+            
+    for (out_file, descr) in [(args.vcfout, "VCF output file")]:
+        if not out_file:
+            LOG.error("%s output file argument missing." % descr)
+            #parser.print_help()
+            sys.exit(1)
+        if os.path.exists(out_file) and out_file!="-":
+            LOG.error("Cowardly refusing to overwrite existing"
+                      " output file '%s'.\n" % out_file)
+            sys.exit(1)
+
+    LOG.debug("args = %s" % args)
+
+    # ----------------------------------------------------------------
+    # arg logic check done
+    # ----------------------------------------------------------------
+    
+    vcf1_reader = get_vcfreader(args.vcf1)
+    vcf2_reader = get_vcfreader(args.vcf2)
+
+    num_vars_stats = dict()
+    num_vars_stats['vcf1 total'] = 0
+    num_vars_stats['vcf1 pass'] = 0
+    num_vars_stats['vcf2 total'] = 0
+    num_vars_stats['vcf2 pass'] = 0
+    num_vars_stats['vcfout total'] = 0
+    
+    if args.vcfout == '-':
+        fh_vcfout = sys.stdout
+    else:
+        fh_vcfout = open(args.vcfout, 'w')
+    vcf_writer = vcf.VCFWriter(fh_vcfout)
+    # meta-data copied from first vcf file
+    vcf_writer.meta_from_reader(vcf1_reader)
+    # FIXME should we add ourselve as source just like the vcftools folks do?
+    vcf_writer.write_metainfo()
+    vcf_writer.write_header()
+
+    #
+    # recipe: read B into memory and parse from A one by one
+    #
+    
+    snvs2 = dict()
+    for var in vcf2_reader:
+        num_vars_stats['vcf2 total'] += 1
+        if args.ign_filtered and var.FILTER not in ['.', 'PASS']:
+            continue
+        num_vars_stats['vcf2 pass'] += 1
+            
+        assert len(var.ALT) == 1, (
+            "Can't handle more then one alt base" 
+            " (doesn't look like this file came from LoFreq)"
+            " and therefore can't process: %s" % str(var))
+        #if var.INFO.has_key('INDEL'):
+        #    assert not var.INFO['INDEL'], (
+        #        "Can't handle indels and therefore can't process"
+        #        " : %s" % str(var))
+        
+        k = key_for_var(var)
+        assert not snvs2.has_key(k), (
+            "I'm confused. Looks like I've already seen a SNV with"
+            " key %s" % k)
+        snvs2[k] = var        
+
+    for var in vcf1_reader:
+        num_vars_stats['vcf1 total'] += 1
+        if args.ign_filtered and var.FILTER not in ['.', 'PASS']:
+            continue
+        num_vars_stats['vcf1 pass'] += 1
+        k = key_for_var(var)
+
+        if args.action == 'complement':
+            # relative complement : elements in A but not B
+            if not snvs2.has_key(k):
+                vcf_writer.write_rec(var)
+                num_vars_stats['vcfout total'] += 1
+            else:
+                del snvs2[k]
+        elif args.action == 'intersect':
+            if snvs2.has_key(k):
+                vcf_writer.write_rec(var)
+                num_vars_stats['vcfout total'] += 1
+        else:
+            raise ValueError
+            
+    if fh_vcfout != sys.stdout:
+        fh_vcfout.close()
+
+    for (k, v) in sorted(num_vars_stats.items()):
+        LOG.info("%s: %d" % (k, v))
+        
+if __name__ == "__main__":
+    main()
+    #LOG.info("Successful program exit")
diff --git a/src/tools/phased_out/vcf.py b/src/tools/phased_out/vcf.py
new file mode 100644
index 0000000..285c464
--- /dev/null
+++ b/src/tools/phased_out/vcf.py
@@ -0,0 +1,650 @@
+#!/usr/bin/env python
+'''A VCFv4.0 parser for Python.
+
+The intent of this module is to mimic the ``csv`` module in the Python stdlib,
+as opposed to more flexible serialization formats like JSON or YAML.  ``vcf``
+will attempt to parse the content of each record based on the data types
+specified in the meta-information lines --  specifically the ##INFO and
+##FORMAT lines.  If these lines are missing or incomplete, it will check
+against the reserved types mentioned in the spec.  Failing that, it will just
+return strings.
+
+There is currently one piece of interface: ``VCFReader``.  It takes a file-like
+object and acts as a reader::
+
+    >>> import contextlib
+    >>> import StringIO
+    >>> import textwrap
+    >>> import vcf
+    >>> buff = EXAMPLE_VCF_STR
+    >>> with contextlib.closing(StringIO.StringIO(textwrap.dedent(buff))) as sock:
+    ...    #vcf_reader = vcf.VCFReader(open('example.vcf', 'rb'))
+    ...    vcf_reader = vcf.VCFReader(sock)
+    ...    record = vcf_reader.next()
+    ...    print record
+    Record(CHROM='20', POS=14370, ID='rs6054257', REF='G', ALT=['A'], QUAL=29, FILTER='PASS', INFO={'H2': True, 'NS': 3, 'DB': True, 'DP': 14, 'AF': [0.5]}, FORMAT='GT:GQ:DP:HQ', samples=[{'GT': '0|0', 'HQ': [51, 51], 'DP': [1], 'GQ': [48], 'name': 'NA00001'}, {'GT': '1|0', 'HQ': [51, 51], 'DP': [8], 'GQ': [48], 'name': 'NA00002'}, {'GT': '1/1', 'HQ': ['.', '.'], 'DP': [5], 'GQ': [43], 'name': 'NA00003'}])
+
+This produces a great deal of information, but it is conveniently accessed.
+The attributes of a Record are the 8 fixed fields from the VCF spec plus two
+more.  That is:
+
+    * ``Record.CHROM``
+    * ``Record.POS``
+    * ``Record.ID``
+    * ``Record.REF``
+    * ``Record.ALT``
+    * ``Record.QUAL``
+    * ``Record.FILTER``
+    * ``Record.INFO``
+
+plus two more attributes to handle genotype information:
+
+    * ``Record.FORMAT``
+    * ``Record.samples``
+
+``samples``, not being the title of any column, is left lowercase.  The format
+of the fixed fields is from the spec.  Comma-separated lists in the VCF are
+converted to lists.  In particular, one-entry VCF lists are converted to
+one-entry Python lists (see, e.g., ``Record.ALT``).  Semicolon-delimited lists
+of key=value pairs are converted to Python dictionaries, with flags being given
+a ``True`` value. Integers and floats are handled exactly as you'd expect::
+
+    ...    record = vcf_reader.next()
+    ...    print record.POS
+    17330
+    ...    print record.ALT
+    ['A']
+    ...   print record.INFO['AF']
+    [0.017]
+
+``record.FORMAT`` will be a string specifying the format of the genotype
+fields.  In case the FORMAT column does not exist, ``record.FORMAT`` is
+``None``.  Finally, ``record.samples`` is a list of dictionaries containing the
+parsed sample column::
+
+    ...    record = vcf_reader.next()
+    ...    for sample in record.samples:
+    ...       print sample['GT']
+    '1|2'
+    '2|1'
+    '2/2'
+
+Metadata regarding the VCF file itself can be investigated through the
+following attributes:
+
+    * ``VCFReader.metadata``
+    * ``VCFReader.infos``
+    * ``VCFReader.filters``
+    * ``VCFReader.formats``
+    * ``VCFReader.samples``
+
+For example::
+
+    ...    vcf_reader.metadata['fileDate']
+    '20090805'
+    ...    vcf_reader.samples
+    ['NA00001', 'NA00002', 'NA00003']
+    ...    vcf_reader.filters
+    {'q10': Filter(id='q10', desc='Quality below 10'), 's50': Filter(id='s50', desc='Less than 50% of samples have data')}
+    ...    vcf_reader.infos['AA'].desc
+    'Ancestral Allele'
+
+'''
+
+
+__author__ = "James Casbon"
+# some modifications by Andreas Wilm
+#__email__ = ""
+__copyright__ = "2011 John Dougherty"
+#__license__ = ""
+
+
+
+import collections
+import re
+import sys
+
+EXAMPLE_VCF_STR = '''\
+##fileformat=VCFv4.0
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=1000GenomesPilot-NCBI36
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Total number of alternate alleles in called genotypes">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tNA00001\tNA00002\tNA00003
+20\t14370\trs6054257\tG\tA\t29\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.
+20\t17330\t.\tT\tA\t3\tq10\tNS=3;DP=11;AF=0.017\tGT:GQ:DP:HQ\t0|0:49:3:58,50\t0|1:3:5:65,3\t0/0:41:3
+20\t1110696\trs6040355\tA\tG,T\t67\tPASS\tNS=2;DP=10;AF=0.333,0.667;AA=T;DB\tGT:GQ:DP:HQ\t1|2:21:6:23,27\t2|1:2:0:18,2\t2/2:35:4
+20\t1230237\t.\tT\t.\t47\tPASS\tNS=3;DP=13;AA=T\tGT:GQ:DP:HQ\t0|0:54:7:56,60\t0|0:48:4:51,51\t0/0:61:2
+20\t1234567\tmicrosat1\tGTCT\tG,GTACT\t50\tPASS\tNS=3;DP=9;AA=G\tGT:GQ:DP\t./.:35:4\t0/2:17:2\t1/1:40:3
+'''
+
+
+# Metadata parsers/constants
+RESERVED_INFO = {
+    'AA': 'String', 'AC': 'Integer', 'AF': 'Float', 'AN': 'Integer',
+    'BQ': 'Float', 'CIGAR': 'String', 'DB': 'Flag', 'DP': 'Integer',
+    'END': 'Integer', 'H2': 'Flag', 'MQ': 'Float', 'MQ0': 'Integer',
+    'NS': 'Integer', 'SB': 'String', 'SOMATIC': 'Flag', 'VALIDATED': 'Flag'
+}
+
+RESERVED_FORMAT = {
+    'GT': 'String', 'DP': 'Integer', 'FT': 'String', 'GL': 'Float',
+    'GQ': 'Float', 'HQ': 'Float'
+}
+
+
+_Info = collections.namedtuple('Info', ['id', 'num', 'type', 'desc'])
+_Filter = collections.namedtuple('Filter', ['id', 'desc'])
+_Format = collections.namedtuple('Format', ['id', 'num', 'type', 'desc'])
+
+
+class _vcf_metadata_parser(object):
+    '''Parse the metadat in the header of a VCF file.'''
+    def __init__(self, aggressive=False):
+        super(_vcf_metadata_parser, self).__init__()
+        self.aggro = aggressive
+        self.info_pattern = re.compile(r'''\#\#INFO=<
+            ID=(?P<id>[^,]+),
+            Number=(?P<number>\d+|\.|[AG]),
+            Type=(?P<type>Integer|Float|Flag|Character|String),
+            Description="(?P<desc>[^"]*)"
+            >''', re.VERBOSE)
+        self.filter_pattern = re.compile(r'''\#\#FILTER=<
+            ID=(?P<id>[^,]+),
+            Description="(?P<desc>[^"]*)"
+            >''', re.VERBOSE)
+        self.format_pattern = re.compile(r'''\#\#FORMAT=<
+            ID=(?P<id>.+),
+            Number=(?P<number>\d+|\.|[AG]),
+            Type=(?P<type>.+),
+            Description="(?P<desc>.*)"
+            >''', re.VERBOSE)
+        self.meta_pattern = re.compile(r'''##(?P<key>.+)=(?P<val>.+)''')
+
+    def read_info(self, info_string):
+        '''Read a meta-information INFO line.'''
+        match = self.info_pattern.match(info_string)
+        if not match:
+            raise SyntaxError(
+                "One of the INFO lines is malformed: {}".format(info_string))
+
+        try:
+            num = int(match.group('number'))
+        except ValueError:
+            num = None if self.aggro else '.'
+
+        info = _Info(match.group('id'), num,
+                     match.group('type'), match.group('desc'))
+
+        return (match.group('id'), info)
+
+    def read_filter(self, filter_string):
+        '''Read a meta-information FILTER line.'''
+        match = self.filter_pattern.match(filter_string)
+        if not match:
+            raise SyntaxError(
+                "One of the FILTER lines is malformed: {}".format(
+                    filter_string))
+
+        filt = _Filter(match.group('id'), match.group('desc'))
+
+        return (match.group('id'), filt)
+
+    def read_format(self, format_string):
+        '''Read a meta-information FORMAT line.'''
+        match = self.format_pattern.match(format_string)
+        if not match:
+            raise SyntaxError(
+                "One of the FORMAT lines is malformed: {}".format(
+                    format_string))
+
+        try:
+            num = int(match.group('number'))
+        except ValueError:
+            num = None if self.aggro else '.'
+
+        form = _Format(match.group('id'), num,
+                       match.group('type'), match.group('desc'))
+
+        return (match.group('id'), form)
+
+    def read_meta(self, meta_string):
+        match = self.meta_pattern.match(meta_string)
+        return match.group('key'), match.group('val')
+
+
+# Reader class
+class _meta_info(object):
+    '''Decorator for a property stored in the header info.'''
+    def __init__(self, func):
+        self.func = func
+
+    def __call__(self, fself):
+        if getattr(fself, "_%s" % self.func.__name__) is None:
+            fself._parse_metainfo()
+
+        return self.func(fself)
+
+    def __repr__(self):
+        '''Return the function's docstring.'''
+        return self.func.__doc__
+
+    def __doc__(self):
+        '''Return the function's docstring.'''
+        return self.func.__doc__
+
+_Record = collections.namedtuple('Record', [
+    'CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT',
+    'samples'
+])
+
+
+class VCFReader(object):
+    '''Read and parse a VCF v 4.0 file'''
+    def __init__(self, fsock, aggressive=False):
+        super(VCFReader, self).__init__()
+        self.aggro = aggressive
+        self._metadata = None
+        self._infos = None
+        self._filters = None
+        self._formats = None
+        self._samples = None
+        self.reader = fsock
+        if aggressive:
+            self._mapper = self._none_map
+        else:
+            self._mapper = self._pass_map
+
+    def __iter__(self):
+        return self
+
+    @property
+    @_meta_info
+    def metadata(self):
+        '''Return the information from lines starting "##"'''
+        return self._metadata
+
+    @property
+    @_meta_info
+    def infos(self):
+        '''Return the information from lines starting "##INFO"'''
+        return self._infos
+
+    @property
+    @_meta_info
+    def filters(self):
+        '''Return the information from lines starting "##FILTER"'''
+        return self._filters
+
+    @property
+    @_meta_info
+    def formats(self):
+        '''Return the information from lines starting "##FORMAT"'''
+        return self._formats
+
+    @property
+    @_meta_info
+    def samples(self):
+        '''Return the names of the genotype fields.'''
+        return self._samples
+
+    def _parse_metainfo(self):
+        '''Parse the information stored in the metainfo of the VCF.
+
+        The end user shouldn't have to use this.  She can access the metainfo
+        directly with ``self.metadata``.'''
+        for attr in ('_metadata', '_infos', '_filters', '_formats'):
+            setattr(self, attr, {})
+
+        parser = _vcf_metadata_parser()
+
+        line = self.reader.next()
+        while line.startswith('##'):
+            line = line.strip()
+            if line.startswith('##INFO'):
+                key, val = parser.read_info(line)
+                self._infos[key] = val
+
+            elif line.startswith('##FILTER'):
+                key, val = parser.read_filter(line)
+                self._filters[key] = val
+
+            elif line.startswith('##FORMAT'):
+                key, val = parser.read_format(line)
+                self._formats[key] = val
+
+            else:
+                key, val = parser.read_meta(line.strip())
+                self._metadata[key] = val
+
+            line = self.reader.next()
+
+        # AW check for vcf files without header. could assume default
+        # header but can't easily unget already read variant line
+        assert line.startswith('#CHROM'), (
+            "Can't find VCF header: Doesn't look like VCF format (line was %s)" % line)
+            
+        fields = line.split()
+        # AW this used to be 8: which included FORMAT as well
+        self._samples = fields[9:]
+
+    def _none_map(self, func, iterable, bad='.'):
+        '''``map``, but make bad values None.'''
+        return [func(x) if x != bad else None
+                for x in iterable]
+
+    def _pass_map(self, func, iterable, bad='.'):
+        '''``map``, but make bad values None.'''
+        return [func(x) if x != bad else bad
+                for x in iterable]
+
+    def _parse_info(self, info_str):
+        '''Parse the INFO field of a VCF entry into a dictionary of Python
+        types.
+
+        '''
+        entries = info_str.split(';')
+        retdict = {}
+        for entry in entries:
+            entry = entry.split('=')
+            ID = entry[0]
+            try:
+                entry_type = self.infos[ID].type
+            except KeyError:
+                try:
+                    entry_type = RESERVED_INFO[ID]
+                except KeyError:
+                    if entry[1:]:
+                        entry_type = 'String'
+                    else:
+                        entry_type = 'Flag'
+
+            if entry_type == 'Integer':
+                vals = entry[1].split(',')
+                val = self._mapper(int, vals)
+            elif entry_type == 'Float':
+                vals = entry[1].split(',')
+                val = self._mapper(float, vals)
+            elif entry_type == 'Flag':
+                val = True
+            elif entry_type == 'String':
+                val = entry[1]
+
+            try:
+                if self.infos[ID].num == 1:
+                    val = val[0]
+            except KeyError:
+                pass
+
+            retdict[ID] = val
+
+        return retdict
+
+    def _parse_samples(self, samples, samp_fmt):
+        '''Parse a sample entry according to the format specified in the FORMAT
+        column.'''
+        samp_data = []
+        samp_fmt = samp_fmt.split(':')
+        for sample in samples:
+            sampdict = dict(zip(samp_fmt, sample.split(':')))
+            for fmt in sampdict:
+                vals = sampdict[fmt].split(',')
+                try:
+                    entry_type = self.formats[fmt].type
+                except KeyError:
+                    try:
+                        entry_type = RESERVED_FORMAT[fmt]
+                    except KeyError:
+                        entry_type = 'String'
+
+                if entry_type == 'Integer':
+                    sampdict[fmt] = self._mapper(int, vals)
+                elif entry_type == 'Float' or entry_type == 'Numeric':
+                    sampdict[fmt] = self._mapper(float, vals)
+                elif sampdict[fmt] == './.' and self.aggro:
+                    sampdict[fmt] = None
+
+            samp_data.append(sampdict)
+
+        for name, data in zip(self.samples, samp_data):
+            data['name'] = name
+
+        return samp_data
+
+    def next(self):
+        '''Return the next record in the file.'''
+        if self._samples is None:
+            self._parse_metainfo()
+        row = self.reader.next().split()
+        chrom = row[0]
+        pos = int(row[1])
+
+        if row[2] != '.':
+            ID = row[2]
+        else:
+            ID = None if self.aggro else row[2]
+
+        ref = row[3]
+        alt = self._mapper(str, row[4].split(','))
+        #qual = float(row[5]) if '.' in row[5] else int(row[5])
+        if row[5] != '.':
+            qual = float(row[5]) if '.' in row[5] else int(row[5])
+        else:
+            qual = None if self.aggro else row[5]
+        filt = row[6].split(';') if ';' in row[6] else row[6]
+        if filt == 'PASS' and self.aggro:
+            filt = None
+        info = self._parse_info(row[7])
+
+        try:
+            fmt = row[8]
+        except IndexError:
+            fmt = None
+            samples = None
+        else:
+            samples = self._parse_samples(row[9:], fmt)
+
+        record = _Record(chrom, pos, ID, ref, alt, qual, filt, info, fmt,
+                         samples)
+        return record
+
+
+
+    
+
+class VCFWriter(object):
+    """Hack to complement VCFReader. 
+
+    Partially LoFreq specific!
+
+    Reader is modelled after csvreader. Writer would therefore best be
+    modelled after csvwriter.
+    """
+
+
+    def __init__(self, handle, metadata=None, infos=None, filters=None,
+                 formats=None, samples=None):
+
+        self.handle = handle
+        
+        self.metadata = metadata if metadata else dict()
+        self.infos = infos if infos else dict()
+        self.filters = filters if filters else dict()
+        
+        if formats or samples:
+            sys.stderr.write("WARN: Will ignore samples and format field in vcf. Module can't handle them\n")
+        self.formats = dict()
+        self.samples = []
+        
+
+        
+    def meta_from_reader(self, vcfreader):
+        """Copy metainformation like metadata info and filter from vcfreader template instance
+        """
+
+        assert isinstance(vcfreader, VCFReader)
+        self.metadata = vcfreader.metadata
+        self.infos = vcfreader.infos
+        self.filters = vcfreader.filters
+        # sanity check
+        if len(vcfreader.formats) or len(vcfreader.samples):
+            sys.stderr.write("WARN: Will ignore samples and format field in vcf. Module can't handle them\n")
+
+        
+    def write(self, vars):
+        """FIXME
+        """
+
+        self.write_metainfo()
+        self.write_header()
+        for v in vars:
+            self.write_rec(v)     
+
+            
+    def write_metainfo(self):
+        """FIXME
+        """
+    
+        # metadata
+        #
+        # order doesn't matter in theory, but fileformat usually comes first.
+        # define a number of keys to use first. 
+        # FIXME: does fileDate have to be changed?
+        # FIXME: does filtering prog have to be added to source
+        #
+        PRIO_KEYS = ['fileformat', 'fileDate', 'source']
+        for prio_key in PRIO_KEYS:
+            if self.metadata.has_key(prio_key):
+                self.handle.write("##%s=%s\n" % (prio_key, self.metadata[prio_key]))
+        for (k, v) in sorted(self.metadata.items()):
+            if k not in PRIO_KEYS:
+                self.handle.write("##%s=%s\n" % (k, v))
+    
+        # info
+        # dict with undefined order, therefore sorted here and in write_record as well
+        for (k, v) in sorted(self.infos.items()):
+            self.handle.write("##INFO=<ID=%s,Number=%s,Type=%s,Description=\"%s\">\n" % (
+                v.id, v.num, v.type, v.desc))
+        # note: v.id == k
+    
+        # filters
+        # dict with undefined order, therefore sorted here and in write_record as well
+        for (k, v) in sorted(self.filters.items()):
+            self.handle.write("##FILTER=<ID=%s,Description=\"%s\">\n" % (
+                v.id, v.desc))
+    
+        # formats
+        # list, therefore ordered and no need to sort
+        for (k, v) in self.formats.items():
+            self.handle.write("##FORMAT=<ID=%s,Number=%s,Type=%s,Description=\"%s\">\n" % (
+                v.id, v.num, v.type, v.desc))
+    
+    
+    def write_header(self):
+        """Write the CHROM... header line
+        """
+    
+        #self.handle.write('#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO'
+        self.handle.write('#' + '\t'.join(_Record._fields[:8]) + "\n")
+    
+    
+    def write_rec(self, rec):
+        """FIXME
+        """
+    
+        rec_str = ""
+
+        # limited to 8 standard fields ignoring 'FORMAT' and extra 'samples'
+        for (field_no, field_id) in enumerate(_Record._fields[:8]):
+            if field_no:
+                rec_str += "\t"
+            
+            field_val = getattr(rec, field_id)
+    
+            #print field, getattr(rec, field)
+            #CHROM 20
+            #POS 14370
+            #ID rs6054257
+            #REF G
+            #ALT ['A']
+            #QUAL 29
+            #FILTER PASS
+            #INFO {'H2': True, 'NS': 3, 'DB': True, 'DP': 14, 'AF': [0.5]}
+            #
+            # or if you include all fields
+            # FORMAT GT:GQ:DP:HQ
+            # samples [{'GT': '0|0', 'HQ': [51, 51], 'DP': [1], 'GQ': [48], 'name': 'NA00001'}, {'GT': '1|0', 'HQ': [51, 51], 'DP': [8], 'GQ': [48], 'name': 'NA00002'}, {'GT': '1/1', 'HQ': ['.', '.'], 'DP': [5], 'GQ': [43], 'name': 'NA00003'}]
+            #
+            # If list join with ,
+            # if dict join with =, and join values with ,
+    
+            # could use some recursive printing function here
+            # to avoid all the unnecssary nssting and asserts
+            
+            if isinstance(field_val, list):
+                for v in field_val:
+                    assert not isinstance(v, dict) and not isinstance(v, list)
+                # e.g. multiple alleles (but also 'samples' in non-lofreq vcf's)
+                rec_str += ','.join(["%s" % v for v in field_val])
+                
+            elif isinstance(field_val, dict):
+                # e.g. info field
+                for (d_no, (d_key, d_val)) in enumerate(sorted(field_val.items())):
+                    assert not isinstance(d_val, dict), (
+                        "Arghh...don't know how to hand field value %s" % d_key)
+                    if d_no:
+                        rec_str += ";"
+                    rec_str += "%s" % d_key
+                        
+                    if isinstance(d_val, bool):
+                        continue
+                    elif isinstance(d_val, list):
+                        rec_str += "="
+                        rec_str += ','.join(["%s" % v for v in d_val])
+                    else:
+                        assert not isinstance(d_val, dict)
+                        rec_str += "="
+                        rec_str += "%s" % (d_val)
+            else:
+                rec_str += "%s" % (field_val)
+        self.handle.write(rec_str + "\n")
+    
+    
+    writerow = write_rec# as csvwriter
+
+    
+def test_parse():
+    '''Parse the example VCF file from the specification and print every
+    record.'''
+    import contextlib
+    import StringIO
+    import textwrap
+    records = []
+    buff = EXAMPLE_VCF_STR
+    with contextlib.closing(StringIO.StringIO(textwrap.dedent(buff))) as sock:
+        vcf_file = VCFReader(sock, aggressive=True)
+        for record in vcf_file:
+            print record
+            records.append(record)
+
+    vcf_writer = VCFWriter(sys.stdout)
+    vcf_writer.meta_from_reader(vcf_file)
+    vcf_writer.write(records)
+
+    
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()
diff --git a/src/tools/phased_out/vcf.py.README b/src/tools/phased_out/vcf.py.README
new file mode 100644
index 0000000..3fb2eaf
--- /dev/null
+++ b/src/tools/phased_out/vcf.py.README
@@ -0,0 +1,27 @@
+This is based on James Casbon's PyVCF
+(commit bdd950d7f9f226f17ec8ff6e87ea22be3aa0ee1f)
+See https://github.com/jamescasbon/PyVCF
+
+----------------------------------------------------------------------
+
+Copyright (c) 2011 John Dougherty
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software is furnished to do so,
+subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
diff --git a/src/tools/scripts/lofreq2_add_sample.py b/src/tools/scripts/lofreq2_add_sample.py
new file mode 100755
index 0000000..70c9c55
--- /dev/null
+++ b/src/tools/scripts/lofreq2_add_sample.py
@@ -0,0 +1,312 @@
+#!/usr/bin/env python
+"""Complement VCF with simple pileup info from BAM files
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+# --- standard library imports
+#
+import sys
+import os
+import argparse
+import logging
+from collections import OrderedDict, namedtuple
+import csv
+import gzip
+
+#--- third-party imports
+#
+import pysam
+
+#--- project specific imports
+#
+# /
+
+
+# global logger
+#
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+Variant = namedtuple('Variant',
+    ['CHROM', 'POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO'])
+# all fields except POS (int) are strings and values are preserved as-is
+
+Format = namedtuple('Format',
+                    ['id', 'num', 'type', 'descr'])
+
+def median(x):
+    """compute median of provided list"""
+
+    if not len(x):
+        return None
+    # http://stackoverflow.com/questions/10482339/how-to-find-median/10482422#10482422 answer by user3100512
+    return sorted(x)[len(x)//2]
+
+
+def cmdline_parser():
+    """
+    creates an OptionParser instance
+    """
+
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument("--verbose",
+                      action="store_true",
+                      dest="verbose",
+                      help="be verbose")
+    parser.add_argument("--debug",
+                      action="store_true",
+                      dest="debug",
+                      help="enable debugging")
+    parser.add_argument("-i", "--vcf-in",
+                      dest="vcf_in",
+                      required=True,
+                      help="Input vcf file listing somatic variants"
+                      " (gzip supported; - for stdin).")
+    default = "-"
+    parser.add_argument("-o", "--vcf-out",
+                      dest="vcf_out",
+                      default=default,
+                      help="Output vcf file (gzip supported; - for stdout;"
+                      " default: %s)." % default)
+    parser.add_argument("-b", "--bam",
+                        dest="bams", nargs="*",
+                        required=True,
+                        help="BAM files, e.g. normal and tumor bam")
+    return parser
+
+
+def fmt_to_line(fmt):
+    """convert format class to vcf line"""
+    return "##FORMAT=<ID=%s,Number=%s,Type=%s,Description=\"%s\">" % (
+        fmt.id, fmt.num, fmt.type, fmt.descr)
+
+
+def gen_formats():
+    """Must be in sync with gen_plp_data
+    """
+
+    formats = OrderedDict()
+    for (id, num_str, type_str, descr) in [
+            ('DP', '1', 'Integer', 'Read depth at this position for this sample'),# standard
+            ('NR', '1', 'Integer', 'Number of reference bases'),
+            ('NA', '1', 'Integer', 'Number of alternate bases'),
+            ('OR', '1', 'Integer', 'Number of orphan reads supporting reference bases'),
+            ('OA', '1', 'Integer', 'Number of orphan reads supporting alternate bases'),
+            ('BR', '3', 'Integer', 'Minimum, median and maximum base-qualities for reference bases'),
+            ('BA', '3', 'Integer', 'Minimum, median and maximum base-qualities for alternate bases'),
+            ('MR', '3', 'Integer', 'Minimum, median and maximum mapping-qualities for reference bases'),
+            ('MA', '3', 'Integer', 'Minimum, median and maximum mapping-qualities for alternate bases')]:
+        formats[id] = Format(id=id, num=num_str, type=type_str, descr=descr)
+
+    return formats
+
+
+def gen_plp_data(sam_fh, var):
+    """generate data must be in sync with gen_formats()
+    """
+
+    for plp_col in sam_fh.pileup(var.CHROM, var.POS-1, var.POS):
+        # pileup() extracts all reads overlapping that region.
+        # only look at the one of interest
+        if plp_col.pos != var.POS-1:
+            continue
+
+        cov = plp_col.n
+        bqs = {'ref': [], 'alt': []}
+        mqs = {'ref': [], 'alt': []}
+        num_orphans = {'ref': 0, 'alt': 0}
+
+        for plp_read in plp_col.pileups:
+            aln_read = plp_read.alignment
+            # most minimal filtering
+            if aln_read.is_unmapped or aln_read.is_secondary or \
+               aln_read.is_qcfail or aln_read.is_duplicate:
+                continue
+
+            if aln_read.is_paired and aln_read.mate_is_unmapped:
+                assert not aln_read.is_unmapped
+                is_orphan = True
+            else:
+                is_orphan = False
+
+            base = aln_read.seq[plp_read.qpos]
+            mq = aln_read.mapq
+            bq = ord(aln_read.qual[plp_read.qpos])-33
+
+            if base == var.REF:
+                k = 'ref'
+            elif base == var.ALT[0]:
+                k = 'alt'
+            else:
+                continue
+
+            bqs[k].append(bq)
+            mqs[k].append(mq)
+            if is_orphan:
+                num_orphans[k] += 1
+
+        (min_bqs, median_bqs, max_bqs) = (
+            {'ref': -1, 'alt': -1},
+            {'ref': -1, 'alt': -1},
+            {'ref': -1, 'alt': -1})
+        (min_mqs, median_mqs, max_mqs) = (
+            {'ref': -1, 'alt': -1},
+            {'ref': -1, 'alt': -1},
+            {'ref': -1, 'alt': -1})
+
+        for k in ['ref', 'alt']:
+            if len(bqs[k]):
+                (min_bqs[k], median_bqs[k], max_bqs[k]) = (
+                    min(bqs[k]), median(bqs[k]), max(bqs[k]))
+            if len(mqs[k]):
+                (min_mqs[k], median_mqs[k], max_mqs[k]) = (
+                    min(mqs[k]), median(mqs[k]), max(mqs[k]))
+
+    sample_data = OrderedDict()
+    for (fmt_key, val) in [
+            ('DP', "%d" % cov),
+            ('NR', "%d" % len(bqs['ref'])),
+            ('NA', "%d" % len(bqs['alt'])),
+            ('OR', "%d" % num_orphans['ref']),
+            ('OA', "%d" % num_orphans['alt']),
+            ('BR', "%d,%d,%d" % (min_bqs['ref'], median_bqs['ref'], max_bqs['ref'])),
+            ('BA', "%d,%d,%d" % (min_bqs['alt'], median_bqs['alt'], max_bqs['alt'])),
+            ('MR', "%d,%d,%d" % (min_mqs['ref'], median_mqs['ref'], max_mqs['ref'])),
+            ('MA', "%d,%d,%d" % (min_mqs['alt'], median_mqs['alt'], max_mqs['alt']))]:
+        sample_data[fmt_key] = val
+
+    return sample_data
+
+
+def add_plp_to_vcf(vcf_in, vcf_out, bam_files):
+    """process each var in vcf_in and add plp info from sam_fh,
+    writing to vcf_out. is no way to edit/add format fields in current
+    versions of pyvcf (as of 2014-06-30). see discussion here
+    https://github.com/jamescasbon/PyVCF/issues/82 for patches and
+    workarounds. chose to use csv module instead for simplicity
+    """
+
+    assert all([os.path.exists(b) for b in bam_files])
+
+    # set up vcf_reader
+    #
+    if vcf_in == '-':
+        fh_in = sys.stdin
+    else:
+        assert os.path.exists(vcf_in)
+        if vcf_in[-3:] == ".gz":
+            fh_in = gzip.open(vcf_in, 'rb')
+        else:
+            fh_in = open(vcf_in, 'rb')
+    vcf_reader = csv.reader(fh_in, delimiter='\t')
+
+
+    # set up vcf_writer/fh_out
+    #
+    if vcf_out == '-':
+        fh_out = sys.stdout
+    else:
+        assert not os.path.exists(vcf_out)
+        if vcf_out[-3:] == ".gz":
+            fh_out = gzip.open(vcf_out, 'wb')
+        else:
+            fh_out = open(vcf_out, 'wb')
+    vcf_writer = csv.writer(fh_out, delimiter='\t',
+                            quotechar='', quoting=csv.QUOTE_NONE,
+                            lineterminator=os.linesep)
+
+    formats = gen_formats()
+
+    for row in vcf_reader:
+
+        if row[0].startswith('#'):
+            if row[0] == "#CHROM":
+                assert len(row) == 8, (
+                    "variant incomplete or FORMAT column already exists")
+
+                # before writing header, add our format description.
+                for fmt in formats.values():
+                    vcf_writer.writerow([fmt_to_line(fmt)])
+
+                row.append("FORMAT")
+
+                for bam in bam_files:
+                    row.append(os.path.basename(bam))
+
+            vcf_writer.writerow(row)
+
+        else:
+            assert len(row) == 8, (
+                "variant incomplete or FORMAT column already exists")
+            var = Variant._make([row[0], int(row[1]), row[2], row[3],
+                                 row[4], row[5], row[6], row[7]])
+
+            # no support for indels
+            if 'INDEL' in var.INFO.split(';') or len(var.REF) > 1 or len(var.ALT) > 1:
+                LOG.warn("Skipping unsupported variant) %s:%d:%s" % (
+                    var.CHROM, var.POS, var.REF))
+                continue
+
+            row.append(':'.join(formats.keys()))
+            for bam in bam_files:
+                assert os.path.exists(bam)
+                sam_fh = pysam.Samfile(bam)
+
+                sample_data = gen_plp_data(sam_fh, var)
+                assert sample_data.keys() == formats.keys(), (
+                    "sample keys (%s) != format keys (%s)" % (sample_data.keys(), formats.keys()))
+                row.append(':'.join(sample_data.values()))
+            vcf_writer.writerow(row)
+
+
+    if fh_in != sys.stdin:
+        fh_in.close()
+    if fh_out != sys.stdout:
+        fh_out.close()
+
+
+def main():
+    """main function
+    """
+
+    parser = cmdline_parser()
+    args = parser.parse_args()
+
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)
+
+    for (in_file, descr) in [#(args.bam, "BAM"),
+                             (args.vcf_in, "VCF input")]:
+        if not in_file:
+            parser.error("%s file argument missing." % descr)
+            sys.exit(1)
+        if not os.path.exists(in_file) and in_file != "-":
+            LOG.fatal("file '%s' does not exist.\n" % in_file)
+            sys.exit(1)
+
+    for (out_file, descr) in [(args.vcf_out, "VCF output")]:
+        if not out_file:
+            parser.error("%s output file argument missing." % descr)
+            sys.exit(1)
+        if os.path.exists(out_file) and out_file != "-":
+            LOG.fatal("Cowardly refusing to overwrite existing"
+                      " output file '%s'.\n" % out_file)
+            sys.exit(1)
+
+    add_plp_to_vcf(args.vcf_in, args.vcf_out, args.bams)
+
+
+
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
diff --git a/src/tools/scripts/lofreq2_analyze_somatic_fn.py b/src/tools/scripts/lofreq2_analyze_somatic_fn.py
new file mode 100755
index 0000000..d948d3c
--- /dev/null
+++ b/src/tools/scripts/lofreq2_analyze_somatic_fn.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+"""If you know about false negative somatic calls, find where they were lost along the way
+"""
+
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+
+import sys
+import argparse
+
+import vcf
+
+
+
+def cmdline_parser():
+    """
+    creates an OptionParser instance
+    """
+    
+    parser = argparse.ArgumentParser(description=__doc__)
+    
+    parser.add_argument("-v", "--verbose",
+                        action="store_true",
+                        dest="verbose",
+                        help="be verbose")
+    parser.add_argument("--fn",
+                        required=True,
+                        dest="vcf_fn",
+                        help="FN vcf file")
+    parser.add_argument("--n-rlx",
+                        required=True,
+                        dest="vcf_nrlx",
+                        help="Normal relaxed vcf file")
+    parser.add_argument("--n-str",
+                        required=True,
+                        dest="vcf_nstr",
+                        help="Normal stringent vcf file")
+    parser.add_argument("--t-rlx",
+                        required=True,
+                        dest="vcf_trlx",
+                        help="Tumor relaxed vcf file")
+    parser.add_argument("--t-str",
+                        required=True,
+                        dest="vcf_tstr",
+                        help="Tumor stringent vcf file")
+    parser.add_argument("--s-raw",
+                        required=True,
+                        dest="vcf_sraw",
+                        help="Somatic raw vcf file")
+    parser.add_argument("--s-final",
+                        required=True,
+                        dest="vcf_sfinal",
+                        help="Somatic final vcf file")
+    parser.add_argument("--s-final-wo-dbsnp",
+                        required=True,
+                        dest="vcf_sfinal_wo_dbsnp",
+                        help="Somatic final vcf file without dbSNP")
+    return parser
+
+
+
+def main():
+    """main function
+    """
+    
+    vcf_fh = dict()
+    #vcf_files = dict()
+
+    parser = cmdline_parser()
+    args = parser.parse_args()
+        
+    for (k, v) in [
+            ('FN', args.vcf_fn),
+            ('normal_rlx', args.vcf_nrlx),
+            ('normal_str', args.vcf_nstr),
+            ('tumor_rlx', args.vcf_trlx),
+            ('tumor_str', args.vcf_tstr),
+            ('somatic_raw', args.vcf_sraw),
+            ('somatic_final', args.vcf_sfinal),
+            ('somatic_final_minus_dbsnp', args.vcf_sfinal_wo_dbsnp)]:
+        #vcf_files[k] = v
+        try:
+            vcf_fh[k] = vcf.VCFReader(filename=v)
+        except:
+            sys.stderr.write("Reading %s failed\n" % v)
+            raise
+    
+    sys.stderr.write("Analyzing FN %s and friends\n" % vcf_fh['FN'].filename)
+    
+    ORDER = ['normal_rlx', 'normal_str', 'tumor_rlx', 'tumor_str', 'somatic_raw', 'somatic_final', 'somatic_final_minus_dbsnp']
+    
+    
+    print "#CHROM\tPOS\tREF\tALT\t%s" % ('\t'.join(ORDER))
+    for fn in vcf_fh['FN']:
+        present_in = dict()
+        for k in ORDER:
+            present_in[k] = 0
+            for t in vcf_fh[k].fetch(fn.CHROM, fn.POS-1, fn.POS):
+                assert len(fn.REF) == len(t.REF)
+                assert len(fn.ALT)==1
+                assert len(t.ALT)==1            
+                if t.ALT[0] == fn.ALT[0]:
+                    if t.QUAL:
+                        q = t.QUAL
+                    else:
+                        q = "."
+                    try:
+                        present_in[k] = "Q=%s;SB=%s;DP=%d;AF=%f" % (q, t.INFO['SB'], t.INFO['DP'], t.INFO['AF'])
+                    except KeyError:
+                        sys.stderr.write("Key Error. Dropping to debugger\n")
+                        import pdb; pdb.set_trace()
+                    break
+        print "%s\t%s\t%s\t%s\t%s" % (
+            fn.CHROM, fn.POS, fn.REF, fn.ALT[0], '\t'.join(["%s" % present_in[k] for k in ORDER]))
+    
+    
+if __name__ == "__main__":
+    main()    
diff --git a/src/tools/scripts/lofreq2_bias.py b/src/tools/scripts/lofreq2_bias.py
new file mode 100755
index 0000000..6830fff
--- /dev/null
+++ b/src/tools/scripts/lofreq2_bias.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python
+"""Experimental implementation of various quality bias checks
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+#--- standard library imports
+#
+import sys
+import logging
+import os
+import argparse
+import gzip
+
+#--- third-party imports
+#
+import pysam
+from scipy.stats import mannwhitneyu
+import vcf
+
+#--- project specific imports
+#
+# sets PATH so that local scripts/binary is used if present, i.e.
+# stuff can be run without installing it
+try:
+    import lofreq2_local
+except ImportError:
+    pass
+try:
+    from lofreq_star.utils import prob_to_phredqual, phredqual_to_prob, fisher_comb
+    #from lofreq_star import vcf
+    from lofreq_star import multiple_testing
+    from lofreq_star import fdr
+except ImportError:
+    sys.stderr.write("FATAL(%s): Couldn't find LoFreq modules."
+                     " Are you sure your PYTHONPATH is set correctly?")
+    sys.exit(1)
+
+
+#global logger
+# http://docs.python.org/library/logging.html
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+DEFAULT_MTC = 'fdr'
+#DEFAULT_MTC = 'bonf'
+#DEFAULT_MTC = 'holmbonf'
+DEFAULT_MTC_ALPHA = 0.001
+DEFAULT_TAG_TO_FILTER = 'BB'
+
+
+def mean(values):
+    """compute mean of (non-empty) list"""
+    size = len(values)
+    if size==0:
+        return ValueError
+    return sum(values)/float(size)
+
+           
+def cmdline_parser():
+    """Returns an argparse instance
+    """
+
+    # http://docs.python.org/dev/howto/argparse.html
+    parser = argparse.ArgumentParser(description=__doc__)
+    
+    parser.add_argument("--verbose",
+                        action="store_true",
+                        help="Be verbose")
+    parser.add_argument("--debug",
+                        action="store_true",
+                        help="Enable debugging")
+    parser.add_argument("-b", "--bam",
+                        required=True,
+                        help="Input BAM file matching vcf")
+    parser.add_argument("-i", "--vcfin",
+                        required=True,
+                        help="Input VCF file containing variants to filter")
+    parser.add_argument("-o", "--vcfout",
+                        default = "-",
+                        help="Output VCF")
+    parser.add_argument("-m", "--mtc",
+                        choices=['bonf', 'holmbonf', 'fdr', 'None'],
+                        default = DEFAULT_MTC,
+                        help="Multiple Testing correction method (default: %s)" % DEFAULT_MTC)
+    parser.add_argument("--mtc-alpha",
+                        type=float,
+                        default = DEFAULT_MTC_ALPHA,
+                        help="Multiple Testing correction alpha (default: %s)" % DEFAULT_MTC_ALPHA)
+    parser.add_argument("-t", "--mtc-tag",
+                        choices=['BB', 'MB', 'CB'],
+                        default = DEFAULT_TAG_TO_FILTER,
+                        help="Which tag to apply multiple testing to (default: %s)" % DEFAULT_TAG_TO_FILTER)
+    default = -1
+    parser.add_argument("--mq-filter",
+                        dest="min_mq",
+                        type=int,
+                        default=default,
+                        help="Ignore reads with mapping quality below this value (default=%d)" % default)
+    default = 6
+    parser.add_argument("--bq-filter",
+                        dest="min_bq",
+                        type=int,
+                        default=default,
+                        help="Ignore bases with quality below this value (default=%d)" % default)
+    parser.add_argument("-a", "--use-orphan",
+                        action="store_true",
+                        help="Don't ignore orphan-reads / anomalous read-pairs")
+    parser.add_argument("-p", "--pass-only",
+                        action="store_true",
+                        help="Don't print filtered variants")
+
+    return parser
+
+
+def skip_read(r):
+    """Decide whether to skip a read
+
+    FIXME identical copy in mutect_alt_allele_in_normal.py
+    """
+    
+    skip_flags = [0x4, 0x100, 0x200, 0x400]
+    skip = False
+    # FIXME combine
+    for f in skip_flags:
+        if r.flag & f:
+            return True
+    return False
+
+
+
+def main():
+    """The main function
+    """
+    
+    parser = cmdline_parser()
+    args = parser.parse_args()
+    
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)        
+    
+    assert os.path.exists(args.bam), (
+        "BAM file %s does not exist" % args.bam)
+    samfh = pysam.Samfile(args.bam)
+
+    # setup vcf_reader
+    # 
+    if args.vcfin == '-':
+        vcf_reader = vcf.VCFReader(sys.stdin)
+    else:
+        vcf_reader = vcf.VCFReader(filename=args.vcfin)
+            
+    variants = [r for r in vcf_reader]
+    LOG.info("Loaded %d variants" % len(variants))
+    
+    if args.mtc.lower() != 'None':
+        LOG.info("Will use %s for MTC on %s with alpha %f" % (
+            args.mtc, args.mtc_tag, args.mtc_alpha))
+    else:
+        LOG.info("No multiple testing correction will be done")
+        
+    # setup vcf_writer
+    #
+    if args.vcfout == '-':
+        fh_out = sys.stdout
+    else:
+        if os.path.exists(args.vcfout):
+            LOG.fatal("Cowardly refusing to overwrite already existing file %s" % (args.vcfout))
+            sys.exit(1)
+            
+        if args.vcfout[-3:] == '.gz':
+            fh_out = gzip.open(args.vcfout, 'w')
+        else:
+            fh_out = open(args.vcfout, 'w')
+    # pyvcf needs template as arg to VCFWriter, whereas LoFreq's vcf clone didn't
+    vcf_writer = vcf.VCFWriter(fh_out, vcf_reader, lineterminator=os.linesep)
+    #vcf_writer = vcf.VCFWriter(fh_out)
+    #vcf_writer.meta_from_reader(vcf_reader)
+                                       
+    pvalues = []
+    for (var_no, var) in enumerate(variants):
+        if var_no%500==1:
+            LOG.info("Computing bias for var %d of %d" % (var_no, len(variants)))
+            
+        if var.INFO.has_key('INDEL'):
+            LOG.warn("Skipping unsupported indel variant %s:%d" % (var.CHROM, var.POS))
+            continue
+        
+        reads = list(samfh.fetch(reference=var.CHROM,
+                                 start=var.POS-1, end=var.POS))
+        LOG.debug("%s %d: %d (unfiltered) reads covering position" % (
+           var.CHROM, var.POS, len(reads)))
+
+        ref_mquals = []
+        alt_mquals = []
+        ref_bquals = []
+        alt_bquals = []
+        # only for PE
+        #ref_isize = []
+        #alt_isize = []
+        # following two meant to test
+        #alt_vpos = [] 
+        #rlens = []
+        
+        for r in reads:
+
+            if skip_read(r):
+                continue
+                
+            orphan = (r.flag & 0x1) and not (r.flag & 0x2)
+            if orphan and not args.use_orphan:
+                continue
+
+            if r.mapq < args.min_mq:
+                continue
+        
+            vpos_on_read = [vpos_on_read 
+                            for (vpos_on_read, vpos_on_ref) in r.aligned_pairs 
+                            if vpos_on_ref==var.POS-1]
+            assert len(vpos_on_read)==1
+            vpos_on_read = vpos_on_read[0]
+            if vpos_on_read == None:# skip deletions
+                continue
+
+            #alt_vpos.append(vpos_on_read)
+            #rlens.append(r.rlen)
+            
+            b = r.query[vpos_on_read]
+            bq = ord(r.qqual[vpos_on_read])-33
+            mq = r.mapq
+
+            if bq < args.min_bq:
+                continue
+            
+            assert len(var.REF)==1 and len(var.ALT)==1
+            if b.upper() == var.REF[0].upper():
+                ref_mquals.append(mq)
+                ref_bquals.append(bq)
+                #if not args.use_orphan:
+                #    ref_isize.append(abs(r.tlen))
+            elif b.upper() == str(var.ALT[0]).upper():
+                alt_mquals.append(mq)
+                alt_bquals.append(bq)
+                #if not args.use_orphan:
+                #    alt_isize.append(abs(r.tlen))
+            else:            
+                LOG.debug("Skipping non-ref-alt base %s at %s:%d" % (b, var.CHROM, var.POS))
+                continue
+            
+        LOG.debug("After filtering at %s:%d: %d ref mquals and %d alt mquals" % (
+            var.CHROM, var.POS, len(ref_mquals), len(alt_mquals)))
+        
+        # mannwhitneyu fails if all values the same
+        if len(set(ref_mquals).union(alt_mquals))==1:
+            m_pv = 1.0
+        elif len(ref_mquals)==0 or len(alt_mquals)==0:
+            m_pv = 1.0
+        else:
+            # compute only if alternate quals are smaller on average
+            if mean(alt_mquals) < mean(ref_mquals):
+                ustat = mannwhitneyu(ref_mquals, alt_mquals)
+                m_pv = ustat[1]
+            else:
+                m_pv = 1.0
+
+        # same for bqs
+        if len(set(ref_bquals).union(alt_bquals))==1:
+            b_pv = 1.0
+        elif len(ref_bquals)==0 or len(alt_bquals)==0:
+            b_pv = 1.0
+        else:
+            if mean(alt_bquals) < mean(ref_bquals):
+                ustat = mannwhitneyu(ref_bquals, alt_bquals)
+                b_pv = ustat[1]
+            else:
+                b_pv = 1.0
+        # same for isize-qs
+        #if len(ref_isize) and len(alt_isize):
+        #    if len(set(ref_isize).union(alt_isize))==1:
+        #        i_pv = 1
+        #    else:
+        #        ustat = mannwhitneyu(ref_isize, alt_isize)
+        #        i_pv = ustat[1]
+        #else:
+        #    i_pv = 1
+        
+        c_pv = fisher_comb(m_pv, b_pv)
+            
+        #import pdb; pdb.set_trace()
+        LOG.debug("%s %d: mb %f bb %f cb %f" % (var.CHROM, var.POS, m_pv, b_pv, c_pv))
+
+        var.INFO['MB'] = prob_to_phredqual(m_pv)
+        var.INFO['BB'] = prob_to_phredqual(b_pv)
+        #var.INFO['IB'] = prob_to_phredqual(i_pv)
+        var.INFO['CB'] = prob_to_phredqual(c_pv)
+
+        if args.mtc.lower() != 'none':
+            pvalues.append(phredqual_to_prob(int(var.INFO[args.mtc_tag])))
+                       
+
+    if args.mtc.lower() != 'none':
+    
+        ftag = "%s<%f" % (args.mtc, args.mtc_alpha)
+        rej_idxs = []
+        if args.mtc == 'bonf':
+            rej_idxs = [i for (i, p) in
+                       enumerate(multiple_testing.Bonferroni(pvalues).corrected_pvals) 
+                       if p<args.mtc_alpha]
+            
+        elif args.mtc == 'holmbonf':
+            rej_idxs = [i for (i, p) in
+                       enumerate(multiple_testing.Bonferroni(pvalues).corrected_pvals) 
+                       if p<args.mtc_alpha]
+                    
+        elif args.mtc == 'fdr':
+            rej_idxs = fdr.fdr(pvalues, a=args.mtc_alpha)
+    
+        else:
+            raise ValueError(), ("unknown MTC method %s" % args.mtc)
+
+        for i in rej_idxs:
+            # pyvcf filter is empty if not set. lofreq's vcf clone was . or PASS
+            #if not variants[i].FILTER or variants[i].FILTER in [".", "PASS"]:
+            #    new_f = [ftag]
+            #else:
+            #    new_f = "%s;%s" % (variants[i].FILTER, ftag)
+            #variants[i] = variants[i]._replace(FILTER=new_f)
+            variants[i].FILTER.append(ftag)
+    
+        LOG.info("%d of %d variants didn't pass filter" % (
+            len(rej_idxs), len(variants)))
+
+    # pyvcf doesn't need write_metainfo or write_header
+    #vcf_writer.write_metainfo()
+    #vcf_writer.write_header()
+    for var in variants:
+        filtered = len(var.FILTER)>0 and var.FILTER not in [".", "PASS"]
+        if args.pass_only and filtered:
+            continue
+        # LoFreq's vcf clone called this write_rec()
+        vcf_writer.write_record(var)
+    
+    if fh_out != sys.stdout:
+        fh_out.close()
+                        
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
diff --git a/src/tools/scripts/lofreq2_cluster.py b/src/tools/scripts/lofreq2_cluster.py
new file mode 100755
index 0000000..edfd91b
--- /dev/null
+++ b/src/tools/scripts/lofreq2_cluster.py
@@ -0,0 +1,304 @@
+#!/usr/bin/env python
+"""Cluster SNVs based on SNV freqs confidence interval
+"""
+
+
+__author__ = "Andreas Wilm, Niranjan Nagarajan"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2013,2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+
+# --- standard library imports
+#
+import sys
+import logging
+import os
+import argparse
+from math import sqrt
+
+
+#--- third-party imports
+#
+# /
+
+#--- project specific imports
+#
+# legacy snp format
+HAVE_SNP_MODULE = False
+try:
+    from lofreq import snp
+    HAVE_SNP_MODULE = True
+except ImportError:
+    pass
+# vcf format
+HAVE_VCF_MODULE = False
+try:
+    import lofreq2_local
+    #from lofreq_star import vcf
+    import vcf
+    HAVE_VCF_MODULE = True
+except ImportError:
+    pass    
+if HAVE_SNP_MODULE == False and HAVE_VCF_MODULE == False:
+    sys.stderr.write("Couldn't import any of LoFreq SNP format modules\n")
+    sys.exit(1)
+SUPPORTED_FORMATS = []
+if HAVE_SNP_MODULE:
+    SUPPORTED_FORMATS.append('snp')
+if HAVE_VCF_MODULE:
+    SUPPORTED_FORMATS.append('vcf')
+    
+
+
+#global logger
+# http://docs.python.org/library/logging.html
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+
+# invocation of ipython on exceptions
+#import sys, pdb
+#from IPython.core import ultratb
+#sys.excepthook = ultratb.FormattedTB(mode='Verbose',
+#                                     color_scheme='Linux', call_pdb=1)
+
+class MetaVar(object):
+    """Wrapper for SNV and VCF format variants to look the same
+    """
+    
+    def __init__(self, vcf_var=None, snp_var=None):
+        """
+        """
+
+        if vcf_var and snp_var:
+            raise ValueError, ("Can only take one: vcf- or snp-var")
+        self.repr = None
+        self.coverage = None
+        self.freq = None
+        self.var_count = None
+        self.max_ci = None
+        self.min_ci = None
+        if vcf_var:
+            self.add_vcf_var(vcf_var)
+        elif snp_var:
+            self.add_snp_var(snp_var)
+
+
+    def add_vcf_var(self, vcf_var):
+        """Add variant in vcf format
+        """
+
+        # pyvcf keeps ALT as _Substitution, not string as LoFreq's vcf clone
+        # therefore won't work','.join(vcf_var.ALT),
+        self.repr = "%s %d %s>%s %f" % (vcf_var.CHROM, vcf_var.POS,
+                                        vcf_var.REF, 
+                                        ','.join(["%s" % x for x in vcf_var.ALT]),
+                                        vcf_var.INFO['AF'])
+        self.coverage = vcf_var.INFO['DP']
+        self.freq = vcf_var.INFO['AF']
+        self.var_count = int(self.coverage * self.freq)
+        (self.min_ci, self.max_ci) = self.compute_ci(
+            self.coverage, self.var_count)
+        LOG.info('CI for %s: %f--%f' % (
+            self.repr, self.min_ci, self.max_ci))
+
+        
+    def add_snp_var(self, snp_var):
+        """Add variant in legacy SNP format
+        """
+        self.repr = "%s %d %c>%c %f" % (snp_var.chrom, snp_var.pos+1,
+                                        snp_var.wildtype, snp_var.variant,
+                                        snp_var.freq)
+        self.coverage = int(snp_var.info['coverage'])
+        self.freq = snp_var.freq
+        self.var_count = int(self.coverage * self.freq)
+        (self.min_ci, self.max_ci) = self.compute_ci(
+            self.coverage, self.var_count)
+        LOG.info('CI for %s: %f--%f' % (
+            self.repr, self.min_ci, self.max_ci))
+
+        
+    @staticmethod
+    def compute_ci(coverage, var_count):
+        """Compute confidnce interval:
+        
+        Agresti-Coull Interval at the 0.05 level
+        http://en.wikipedia.org/wiki/Binomial_proportion_confidence_interval#Agresti-Coull_Interval
+        
+        n~ = n + 4
+        p~ = 1/n~ * (X + 4/2)
+        ci: p~ +- 2*sqrt(1/n~ * p~ * (1-p~)
+        """
+        n_t = float(coverage + 4)
+        p_t = (var_count + 2) / n_t
+        ci = 2 * sqrt(p_t * (1-p_t) / n_t)
+        min_ci = p_t - 3*ci
+        if min_ci < 0.0:
+            min_ci = 0.0
+        max_ci = p_t + 3*ci
+
+        return (min_ci, max_ci)
+    
+
+        
+
+def cmdline_parser():
+    """
+    creates an OptionParser instance
+    """
+
+    # http://docs.python.org/dev/howto/argparse.html
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument("--verbose",
+                      action="store_true", 
+                      dest="verbose",
+                      help="be verbose")
+    parser.add_argument("--debug",
+                      action="store_true", 
+                      dest="debug",
+                      help="enable debugging")
+    parser.add_argument("-i", "--variants",
+                      dest="var_file",
+                      help="variant input file (supported formats: %s)" % (
+                          ', '.join(SUPPORTED_FORMATS)))
+    parser.add_argument("-o", "--out",
+                      dest="cluster_file",
+                      default="-",
+                      help="Cluster output file (- for stdout = default)")
+
+    return parser
+
+
+    
+def main():
+    """The main function
+    """
+
+    parser = cmdline_parser()
+    args = parser.parse_args()
+
+    # FIXME catch unrecognized args (not just (len(args)
+
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)
+
+
+    for (in_file, descr) in [(args.var_file, "variant file")]:
+        if not in_file:
+            parser.error("%s input file argument missing." % descr)
+            sys.exit(1)
+        if not os.path.exists(in_file) and in_file != "-":
+            sys.stderr.write(
+                "file '%s' does not exist.\n" % in_file)
+            sys.exit(1)
+            
+    for (out_file, descr) in [(args.cluster_file, "cluster output file")]:
+        if not out_file:
+            parser.error("%s output file argument missing." % descr)
+            sys.exit(1)
+        if os.path.exists(out_file) and out_file!="-":
+            sys.stderr.write(
+                "Cowardly refusing to overwrite existing"
+                " output file '%s'.\n" % out_file)
+            sys.exit(1)
+
+
+    # A lot of code for supporting legacy SNP format. 
+    #
+    # FIXME this and MetaVar() should use vcf by default and just
+    # convert snp to vcf
+    #
+    is_vcf = False
+    if HAVE_VCF_MODULE:
+        if args.var_file == '-':
+            vcf_fh = sys.stdin
+        else:
+            vcf_fh = open(args.var_file)
+            # FIXME gzip support
+        vcf_reader = vcf.VCFReader(vcf_fh)
+        try:
+            var_list = [MetaVar(vcf_var=r)
+                        for r in vcf_reader]
+            is_vcf = True
+        except:
+            raise
+        if vcf_fh != sys.stdin:
+            vcf_fh.close()
+    is_snp = False
+    if not is_vcf and HAVE_SNP_MODULE:
+        try:
+            var_list = [MetaVar(snp_var=s) 
+                        for s in snp.parse_snp_file(args.var_file)]
+            is_snp = True
+        except IndexError:
+            pass
+
+    if not is_snp and not is_vcf:
+        LOG.error("Can't parse %s. Tried the following formats: %s" % (
+            args.var_file, ', '.join(SUPPORTED_FORMATS)))
+        sys.exit(1)
+
+    
+    LOG.info("Parsed %d SNPs from %s" % (len(var_list), args.var_file))
+
+    
+    var_list =  sorted(var_list, key=lambda x: x.freq, reverse=True)
+
+    if args.cluster_file == '-':
+        fh_out = sys.stdout
+    else:
+        fh_out = open(args.cluster_file, 'w')
+
+        
+    if len(var_list)==0:
+        fh_out.write("No SNPs <-> no clusters!\n")
+        if fh_out != sys.stdout:
+            print "No SNPs <-> no clusters!"
+            fh_out.close()
+        sys.exit(0)
+
+        
+    cluster = dict()
+    clu_no = 0
+    seed = var_list[0]
+    #cluster[clu_no,'members'] = ["%s %f" % (seed.repr, seed.freq)]
+    cluster[clu_no,'members'] = ["%s" % (seed.repr)]
+    cluster[clu_no,'min'] = seed.min_ci
+    cluster[clu_no,'max'] = seed.max_ci
+
+    for var in var_list[1:]:
+        LOG.debug("checking %s %f: max_ci %f vvar. clu_min %f" % (
+            var.repr, var.freq, var.max_ci, cluster[clu_no,'min']))
+        if var.max_ci > cluster[clu_no,'min']:
+            #cluster[clu_no,'members'].append("%s %f" % (var.repr, var.freq))
+            cluster[clu_no,'members'].append("%s" % (var.repr))
+        else:
+            clu_no += 1
+            seed = var
+            #cluster[clu_no,'members'] = ["%s %f" % (seed.repr, seed.freq)]
+            cluster[clu_no,'members'] = ["%s" % (seed.repr)]
+            cluster[clu_no,'min'] = seed.min_ci
+            cluster[clu_no,'max'] = seed.max_ci
+
+        
+    for i in range(clu_no+1):
+        fh_out.write("cluster %d (freq. range: %f - %f): %s\n" % (
+            i+1, cluster[i,'min'], cluster[i,'max'], 
+            ', '.join(cluster[i,'members'])))
+        
+    if fh_out != sys.stdout:
+        fh_out.close()
+    print "%d clusters found (written to %s)" % (clu_no+1, fh_out.name)
+ 
+
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
+        
diff --git a/src/tools/scripts/lofreq2_indel_ovlp.py b/src/tools/scripts/lofreq2_indel_ovlp.py
new file mode 100755
index 0000000..41055e1
--- /dev/null
+++ b/src/tools/scripts/lofreq2_indel_ovlp.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+"""Removes overlapping indels
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+#--- standard library imports
+#
+import sys
+from collections import namedtuple
+import gzip
+
+#--- third-party imports
+#
+#/
+
+
+VCFEntry = namedtuple('VCFEntry', ['chrom', 'pos', 'dbsnpid', 'ref', 'alt', 'qual', 'filter', 'info'])
+
+
+def write_var(var, fh=sys.stdout):    
+    var = var._replace(pos=str(var.pos))
+    fh.write("%s\n" % '\t'.join(var))
+
+    
+def vcf_line_to_var(line):
+    fields = line.rstrip().split('\t')[:8]
+    e = VCFEntry._make(fields)
+    return e._replace(pos=int(e.pos))
+
+
+#def var_len(var):
+#    return abs(len(var.alt)-len(var.ref))
+
+
+def af_from_var(var):
+    for f in var.info.split(';'):
+        if f.startswith('AF='):
+            return float(f[3:]) 
+    return None
+
+
+def qual_from_var(var):
+    """takes care of missing values, int conversion and ties in comparisons
+    """
+    if var.qual==".":
+        return sys.maxint
+    else:
+        # add AF to deal with ties
+        return int(var.qual)+af_from_var(var)
+
+
+def overlap(v1, v2):
+    """determine whether affected positions of two variants overlap
+    """
+
+    #if v1.pos==4589049:
+    #    import pdb; pdb.set_trace()
+    pos1 = set([v1.pos+i for i in range(max([len(v1.ref), len(v1.alt)]))])
+    pos2 = set([v2.pos+i for i in range(max([len(v2.ref), len(v2.alt)]))])
+    return len(pos1.intersection(pos2))>0
+
+def main():
+    if len(sys.argv) != 2:
+        sys.stderr.write("FATAL: Need (one) vcf file as only argument\n")
+        sys.exit(1)
+        
+    vcf = sys.argv[1]
+    if vcf == "-":
+        fh = sys.stdin
+    elif vcf.endswith(".gz"):
+        fh = gzip.open(vcf)
+    else:
+        fh = open(vcf)
+    
+    #pic_best_func = af_from_var
+    pick_best_func = qual_from_var
+
+    prev_vars = []
+    for line in fh:
+        line = line.rstrip()
+        if line.startswith('#'):
+            print line
+            continue
+        
+        cur_var = vcf_line_to_var(line)
+        if False:
+            sys.stderr.write("INFO: looking at %d:%s>%s\n" % (cur_var.pos, cur_var.ref, cur_var.alt))
+            sys.stderr.write("INFO: on stack: %s\n" % (', '.join(["%d:%s>%s" % (v.pos, v.ref, v.alt) for v in prev_vars])))
+        if len(prev_vars):
+            if cur_var.chrom != prev_vars[-1].chrom or not overlap(prev_vars[-1], cur_var):
+                # pick highest qual/af from stack and empty stack
+                picked_var = sorted(prev_vars, key=lambda e: pick_best_func(e), reverse=True)[0]
+                #if len(prev_vars)>1:
+                #    print "picked %s from %s" % (picked_var, prev_vars)
+                write_var(picked_var)
+                prev_vars = []
+        prev_vars.append(cur_var)
+    
+    # don't forget remaining ones
+    picked_var = sorted(prev_vars, key=lambda e: pick_best_func(e), reverse=True)[0]
+    write_var(picked_var)
+    
+        
+    if fh != sys.stdout:
+        fh.close()
+        
+    #print "%d prev_vars left" % (len(prev_vars))
+
+if __name__ == "__main__":
+    main()
diff --git a/src/tools/scripts/lofreq2_local.py b/src/tools/scripts/lofreq2_local.py
new file mode 100644
index 0000000..98026e3
--- /dev/null
+++ b/src/tools/scripts/lofreq2_local.py
@@ -0,0 +1,31 @@
+# add local dir to path to make source dir, i.e. not installed scripts
+# work straight-away
+
+import sys
+import os
+
+# Set sys.path/PYTHONPATH such that we find the local source dir first
+# by using: from lofreq_star import ...
+d = os.path.normpath(os.path.join(
+    os.path.dirname(sys.argv[0]), '..'))
+if os.path.exists(os.path.join(d, "lofreq_star")):
+    #sys.stderr.write("NOTE: Adding local dir %s to PYTHONPATH\n" % d)
+    sys.path.insert(0, d)
+
+# Set PATH such that we find lofreq binary first
+#d = os.path.normpath(os.path.join(
+#    os.path.dirname(sys.argv[0]), '../../lofreq'))
+#if os.path.exists(os.path.join(d, 'lofreq')):
+#    #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d)
+#    os.environ["PATH"] = d + os.pathsep + os.environ["PATH"]
+
+# In theory need to find scripts because the main binary knows about them. However, there are circular cases where script call the binary which then can't find the scripts again (e.g. in parallel wrapper),so:
+#
+#d = os.path.normpath(os.path.join(
+#    os.path.dirname(sys.argv[0]), '../tools/scripts'))
+#if os.path.exists(d):
+#    #sys.stderr.write("NOTE: Adding local dir %s to PATH\n" % d)
+#    os.environ["PATH"] = d + os.pathsep + os.environ["PATH"]
+    
+
+        
diff --git a/src/tools/scripts/lofreq2_vcfplot.py b/src/tools/scripts/lofreq2_vcfplot.py
new file mode 100755
index 0000000..0019d8a
--- /dev/null
+++ b/src/tools/scripts/lofreq2_vcfplot.py
@@ -0,0 +1,613 @@
+#!/usr/bin/env python
+"""Plot characteristics of variants listed in VCF file
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+# --- standard library imports
+#
+
+# imports
+import sys
+import os
+import argparse
+import logging
+from collections import Counter, deque
+import itertools
+
+#--- third-party imports
+#
+import numpy as np
+
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_pdf import PdfPages
+
+# only for boxplots
+from scipy.stats import gaussian_kde
+
+import vcf
+
+#--- project specific imports
+#
+try:
+    import lofreq2_local
+except ImportError:
+    pass
+
+#try:
+#    #from lofreq_star import vcf
+#except ImportError:
+#    sys.stderr.write("FATAL(%s): Couldn't find LoFreq's vcf module."
+#                     " Are you sure your PYTHONPATH is set correctly (= %s)?\n" % (
+#                         (sys.argv[0], os.environ['PYTHONPATH'])))
+#    sys.exit(1)
+try:
+    from lofreq_star.utils import complement, now
+except ImportError:
+    sys.stderr.write("FATAL: Couldn't find LoFreq modules."
+                     " Are you sure your PYTHONPATH is set correctly?\n")
+    sys.exit(1)
+
+
+# global logger
+#
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+COLORS = ["b", "g", "r", "c", "m", "y", "k"]
+
+
+def r_ify(axes):
+    '''FIXME:unused
+
+    source:
+    http://stackoverflow.com/questions/14349055/making-matplotlib-graphs-look-like-r-by-default
+    ttp://messymind.net/2012/07/making-matplotlib-look-like-ggplot/
+
+    Produce R-style Axes properties
+    '''
+    xticks = axes.get_xticks()
+    yticks = axes.get_yticks()
+
+    #remove right and upper spines
+    axes.spines['right'].set_color('none')
+    axes.spines['top'].set_color('none')
+
+    #make the background transparent
+    axes.set_axis_bgcolor('none')
+
+    #allow space between bottom and left spines and Axes
+    axes.spines['bottom'].set_position(('axes', -0.05))
+    axes.spines['left'].set_position(('axes', -0.05))
+
+    #allow plot to extend beyond spines
+    axes.spines['bottom'].set_bounds(xticks[0], xticks[-1])
+    axes.spines['left'].set_bounds(yticks[0], yticks[-1])
+
+    #set tick parameters to be more R-like
+    axes.tick_params(direction='out', top=False, right=False, length=10, pad=12, width=1, labelsize='medium')
+
+    #set x and y ticks to include all but the last tick
+    axes.set_xticks(xticks[:-1])
+    axes.set_yticks(yticks[:-1])
+
+    return axes
+
+def ts_or_tv(b1, b2):
+    """Inspired by https://github.com/yesimon/rosalind/blob/master/TRAN.py
+
+    Returns None if any of the two given bases is not in 'ACGT'
+    """
+    type_map = {
+        frozenset(['A', 'G']): 'ts',
+        frozenset(['C', 'T']): 'ts',
+        frozenset(['A', 'C']): 'tv',
+        frozenset(['G', 'T']): 'tv',
+        frozenset(['A', 'T']): 'tv',
+        frozenset(['C', 'G']): 'tv',
+        }
+    return type_map.get(frozenset([b1, b2]))
+
+
+def ts_tv_ratio(vars):
+    """Computes Ts/Tv ratio. Only works on SNVs
+    """
+
+    counts = dict()
+    num_vars = 0
+    for v in vars:
+        assert len(v.REF)==1
+        assert len(v.ALT)==1 and len(v.ALT[0])==1
+        num_vars += 1
+        ref = v.REF.upper()
+        alt = str(v.ALT[0]).upper()
+        t = ts_or_tv(ref, alt)
+        if t:
+            counts[t] = counts.get(t, 0) + 1
+    ratio = counts['ts']/float(counts['tv'])
+    #print "DEBUG: %d vars. %d ts. %d tv. ratio %2.f" % (num_vars, counts['ts'], counts['tv'], ratio)
+    return ratio
+
+            
+def subst_type_str(ref, alt, strand_specific=False):
+    """FIXME:add-doc
+    """
+
+    # in case we get a list
+    assert len(ref)==1 and len(alt)==1
+    ref = ref[0]
+    alt = alt[0]
+
+    s = "%s>%s" % (ref, alt)
+    if strand_specific:
+        return s
+    else:
+        c = complement(s)
+        return '|'.join(sorted([s, c]))
+
+
+def subst_perc(ax, subst_type_counts):
+    """
+    subst_type_counts should be list of array with type as 1st element and count as 2nd
+    """
+
+    # FIXME sort by transition/transversion type. Add Ts/Tv ratio to plot
+
+    #colors = [cm.jet(1.*i/len(subst_type_counts)) for i in xrange(len(subst_type_counts))]
+    colors = [COLORS[i % len(COLORS)] for i in xrange(len(subst_type_counts))]
+
+    count_sum = sum([x[1] for x in subst_type_counts])
+    percs = [x[1]/float(count_sum) for x in subst_type_counts]
+    ax.bar(xrange(len(subst_type_counts)), percs, color=colors)
+
+    ticks = [x[0] for x in subst_type_counts]
+    ax.set_xticks(xrange(len(ticks))) # forced display of all
+    ax.set_xticklabels(ticks, rotation=45, ha="left")
+    # FIXME rotation=45 doesnt't work
+    # FIXME ha="left" doesn't work
+    # ax1.set_xticks(ticks)
+    # FIXME ticks as string won't work
+    ax.set_ylabel('[%]')
+    ax.set_xlabel('Type')
+
+    # prevent clipping of tick-labels
+    #plt.subplots_adjust(bottom=0.15)
+    plt.tight_layout()
+
+
+def calc_dist_left(vars):
+    """Calculated distance to previous variant. Return -1 for first. Means multi-allelic
+
+    Variants need to be sorted (checking via assert here)
+    """
+
+    dists = []
+
+    # group per chromosome
+    processed_chroms = []
+    for (chrom, vars_on_chrom) in itertools.groupby(vars, lambda v: v.CHROM):
+        assert chrom not in processed_chroms
+        processed_chroms.append(chrom)
+
+        prev_var = None
+        for var in vars_on_chrom:
+            if not prev_var:
+                dists.append(-1)
+            else:
+                dists.append(var.POS-prev_var.POS)
+            prev_var = var
+
+    assert len(dists) == len(vars)
+    #print "end at %s" % now()
+
+    return dists
+
+
+
+def calc_dist_min(variants):
+    """Calculated smallest distance to next closest (left or right) variant.
+
+    If a chromosome only contains a single SNV, -1 will be stored as
+    dist as we can't use 0 which would mean multi-allelic position.
+
+    Variants need to be sorted (checking via assert here)
+
+    This is several order of magnitudes faster then calc_dist_to_next
+
+    """
+
+    
+    #print "starting at %s" % now()
+
+    dists = []
+
+    # group per chromosome
+    processed_chroms = []
+    for (chrom, vars_on_chrom) in itertools.groupby(variants, lambda v: v.CHROM):
+        assert chrom not in processed_chroms
+        processed_chroms.append(chrom)
+
+        # use a queue. fill up with max 3 elements at a time. every
+        # time we kick one out report the minimum dist between it and
+        # the snv on the left and right (if any)
+
+        deck = deque(itertools.islice(vars_on_chrom, 3))
+        if len(deck) == 1:
+            dists.append(-1)
+            continue
+
+        left_dist = sys.maxint
+        for elem in vars_on_chrom:
+            right_dist = deck[1].POS - deck[0].POS
+            min_dist = min([left_dist, right_dist])
+            dists.append(min_dist)
+            #print "Popping %s %d with min_dist %d" % (
+            #    deck[0].CHROM, deck[0].POS, min_dist)
+            deck.popleft()
+            deck.append(elem)
+            left_dist = right_dist
+
+        # dismantle. same as above without appending and left_dist
+        # update
+        while len(deck)>1:
+            right_dist = deck[1].POS - deck[0].POS
+            min_dist = min([left_dist, right_dist])
+            dists.append(min_dist)
+            #print "Popping %s %d with min_dist %d" % (
+            #    deck[0].CHROM, deck[0].POS, min_dist)
+            deck.popleft()
+            left_dist = right_dist
+
+        dists.append(left_dist)
+
+    assert len(dists) == len(variants)
+    #print "end at %s" % now()
+
+    return dists
+
+
+
+def violin_plot(ax, data):
+    '''
+    Create violin plots on an axis
+
+    from http://pyinsci.blogspot.sg/2009/09/violin-plot-with-matplotlib.html
+    '''
+
+    # FIXME possible that this needs values between 0 and 1?
+
+    w = min(0.15, 0.5)
+    try:
+        k = gaussian_kde(data) # calculates the kernel density
+    except ValueError:
+        LOG.warn("calculation of kernel density for violin plot failed. skipping...")
+        ax.text(0, 0.8, "gaussian_kde failed", size=14, ha='left', va="top")
+        return
+    m = min(k.dataset) #lower bound of violin
+    M = max(k.dataset) #upper bound of violin
+    try:
+        x = np.arange(m, M, (M-m)/100.) # support for violin
+    except TypeError:
+        # FIXME TypeError: only length-1 arrays can be converted to Python scalars
+        LOG.warn("arange failed in violint plot. skipping...")
+        return
+    v = k.evaluate(x) # violin profile (density curve)
+    if v.max():
+        v = v/v.max()*w # scaling the violin to the available space
+    else:
+        # FIXME LOG.warn("v.max()==0. won't be able to correctly print violin_plot")
+        v = 0
+    p = 0
+    ax.fill_betweenx(x, p, v+p, facecolor='y', alpha=0.3)
+    ax.fill_betweenx(x,p, -v+p, facecolor='y', alpha=0.3)
+    l = w+w*0.1
+    plt.xlim((-l, l))
+    #print "DEBUG", w, k, m, M, x, v
+    ax.set_xticks([])
+    #ax1.set_xticklabels(ticks, rotation=45, ha="left")
+
+
+
+def print_overview(ax, text_list):
+    """FIXME:add-doc
+    """
+
+    # options:
+    # - annotate () or text()
+    # - tex or text
+
+    # See http://jakevdp.github.io/mpl_tutorial/tutorial_pages/tut4.html
+
+    #matplotlib.rc('text', usetex=True)
+    #table = r'\begin{table} \begin{tabular}{|l|l|l|}  \hline  $\alpha$      & $\beta$        & $\gamma$      \\ \hline   32     & $\alpha$ & 123    \\ \hline   200 & 321    & 50 \\  \hline  \end{tabular} \end{table}'
+
+    ax.axis('off')
+    ax.text(0, 0.8, '\n'.join(text_list), size=14, ha='left', va="top")#, va='center')#, size=50)
+
+    # relative to invisible axes
+    #ax.annotate('\n'.join(text_list), (0, 1), textcoords='data', size=14)# ha='left', va='center')#, size=50)
+
+    #matplotlib.rc('text', usetex=False)
+
+
+def cmdline_parser():
+    """
+    creates an OptionParser instance
+    """
+
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument("-v", "--verbose",
+                      action="store_true",
+                      dest="verbose",
+                      help="be verbose")
+    parser.add_argument("--debug",
+                      action="store_true",
+                      dest="debug",
+                      help="enable debugging")
+    parser.add_argument("-i", "--vcf",
+                      dest="vcf",
+                      required=False,
+                      help="Input vcf file (gzip supported; - for stdin).")
+    parser.add_argument("--simple",
+                      action="store_true",
+                      dest="simple",
+                      help="Simple plots only - no combinations")
+    parser.add_argument("--ign-filter",
+                      action="store_true",
+                      dest="ign_filter",
+                      help="Use all, not just passed variants")                      
+    parser.add_argument("--maxdp",
+                      dest="maxdp",
+                      type=int,
+                      help="Maximum DP")
+    parser.add_argument("-o", "--outplot",
+                      dest="outplot",
+                      #required=True, not needed if summary only and otherwise tested separately 
+                      help="Output plot (pdf) filename")
+    parser.add_argument("--indels",
+                      action="store_true",
+                      dest="indels_only",
+                      help="Work on indels only and ignore substitutions (default is the reverse)")
+    parser.add_argument("--summary-only",
+                      action="store_true",
+                      help="Don't plot; summarize only")
+    return parser
+
+
+def main():
+    """main function
+    """
+
+    parser = cmdline_parser()
+    args = parser.parse_args()
+
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)
+
+    for (in_file, descr) in [(args.vcf, "VCF")]:
+        if not in_file:
+            parser.error("%s input file argument missing." % descr)
+            sys.exit(1)
+        if not os.path.exists(in_file) and in_file != "-":
+            sys.stderr.write(
+                "file '%s' does not exist.\n" % in_file)
+            sys.exit(1)
+
+    out_files_and_descr = []
+    if not args.summary_only:
+        out_files_and_descr = (args.outplot, "plot")
+    for (out_file, descr) in []:
+        if not out_file:
+            parser.error("%s output file argument missing." % descr)
+            sys.exit(1)
+        if os.path.exists(out_file) and out_file!="-":
+            sys.stderr.write(
+                "Cowardly refusing to overwrite existing"
+                " output file '%s'.\n" % out_file)
+            sys.exit(1)
+
+
+    summary_txt = []
+    summary_txt.append("Reading vars from %s" % args.vcf)
+    LOG.info(summary_txt[-1])
+
+    if args.vcf == '-':
+        vcfreader = vcf.VCFReader(sys.stdin)
+    else:
+        vcfreader = vcf.VCFReader(filename=args.vcf)
+        
+    vars = [v for v in vcfreader]
+    
+    if not args.ign_filter:
+        vars = [v for v in vars if not v.FILTER]
+    summary_txt.append("Loaded %d variants" % (len(vars)))
+    LOG.info(summary_txt[-1])
+
+    if args.indels_only:
+        vars = [v for v in vars if v.is_indel]
+    else:
+        vars = [v for v in vars if not v.is_indel]
+    LOG.info("%d variants left after only keeping %s" % (
+	len(vars), "indels" if args.indels_only else "substitutions"))
+
+    filter_list = []
+    if args.maxdp:
+        filter_list.append((lambda v: v.INFO['DP']<=args.maxdp, "DP<=%d" % args.maxdp))
+    #filter_list.append(lambda v: v.CHROM=='chr1')
+    filtered_vars = vars
+    for (f, n) in filter_list:
+        n_in = len(filtered_vars)
+        try:
+            filtered_vars = [v for v in filtered_vars if f(v)]
+        except:
+            LOG.fatal("Filter %s failed" % n)
+            raise
+        n_out = len(filtered_vars)
+        summary_txt.append("Filter '%s' removed %d (more) vars" % (n, n_in-n_out))
+        LOG.info(summary_txt[-1])
+
+    summary_txt.append("%d vars left after filtering" % (len(filtered_vars)))
+    LOG.info(summary_txt[-1])
+    vars = filtered_vars
+
+    if len(vars)==0:
+        LOG.warn("Nothing to do. Exiting")
+        sys.exit(0)
+
+    summary_txt.append("#vars = %d (of which %d are CONSVARs)" % (
+        len(vars),
+        sum([1 for v in vars if v.INFO.has_key('CONSVAR')])))
+    LOG.info(summary_txt[-1])
+
+    # np.histogram([v.INFO['DP'] for v in vars if v.INFO['DP']<1000], bins=20)
+
+    # setup props we want to check in all possible combinations
+    #
+    props = dict()
+    for t in ['AF', 'DP']:
+        try:
+            props[t] = np.array([v.INFO[t] for v in vars])
+        except KeyError:
+            LOG.critical("Couldn't find %s info tag in all variants"
+            " (is %s a LoFreq file?). Won't plot..." % (t, args.vcf))
+    props['Distance (log10)'] = np.array([np.log10(d) if d>0 else -1 for d in calc_dist_left(vars)])
+    #props['QUAL (non-CONSVARs only)'] = np.array([v.QUAL for v in vars if not v.INFO.has_key('CONSVAR')])
+
+    if args.summary_only:
+        for p in props.keys():
+            x = np.array(props[p])
+            for (name, val) in [("minimum", np.min(x)),
+                                ("1st %ile", np.percentile(x, 1)),
+                                ("25th %ile", np.percentile(x, 25)),
+                                ("median", np.percentile(x, 50)),
+                                ("75th %ile", np.percentile(x, 75)),
+                                ("99th %ile", np.percentile(x, 99)),
+                                ("maximum", np.max(x))]:
+                print "%s\t%s\t%f" % (p, name, val)
+            print "%s\trange-min\trange-max\tcount" % (p)
+            (hist, bin_edges) = np.histogram(x)
+            for (i, val) in enumerate(hist):
+                print "%f\t%f\t%d" % (bin_edges[i], bin_edges[i+1], val)
+        return
+    
+    pp = PdfPages(args.outplot)
+
+    # create a summary table
+    #
+    #matplotlib.rc('text', usetex=False)
+    fig = plt.figure()
+    ax = plt.subplot(1,1,1)
+    print_overview(ax, summary_txt)
+    plt.title('Overview')
+    pp.savefig()
+    plt.close()
+
+
+    # boxplots and histograms first
+    #
+    for p in [p for p in props.keys()]:
+        LOG.info("Printing boxplot, histogram and scatter plot for %s" % p)
+        
+        # boxplots
+        fig = plt.figure()
+        ax = plt.subplot(1, 1, 1)
+        x = props[p]
+        if len(x) == 0:
+            LOG.warn("No values for %s. Not plotting..." % p)
+            continue
+        ax.boxplot(x, notch=1, positions=[0], vert=1)
+        violin_plot(ax, x)
+        ax.set_ylabel('#SNVs')
+        ax.set_xlabel(p)
+        plt.title('%s Boxplot' % p)
+        pp.savefig()
+        plt.close()
+
+        # histogram
+        fig = plt.figure()
+        ax = plt.subplot(1, 1, 1)
+        x = props[p]
+        ax.hist(x, bins=20)
+        ax.set_xlim([0, plt.xlim()[1]])
+        ax.set_ylabel('#SNVs')
+        ax.set_xlabel(p)
+        plt.title('%s Histogram' % p)
+        pp.savefig()
+        plt.close()
+
+        # scatter plot per positions. assuming snvs are sorted by position!
+        fig = plt.figure()
+        ax = plt.subplot(1, 1, 1)
+        y = props[p]
+        ax.scatter(range(len(y)), y)
+        ax.set_xlim([0, len(y)])
+        ax.set_ylabel(p)
+        ax.set_xlabel("Neighbourhood")
+        #plt.title('%s Histogram' % p)
+        pp.savefig()
+        plt.close()
+
+
+    if not args.indels_only:
+	# substitution types
+	#	
+	# FIXME needs percentages
+	subst_type_counts = Counter([subst_type_str(v.REF, v.ALT) for v in vars])
+	# turn into list of tuples sorted by key
+	# subst_type_counts = sorted((k, v/101.0*len(vars)) for (k, v) in subst_type_counts.items())
+	subst_type_counts = sorted(subst_type_counts.items())
+	# FIXME should go to text report
+	#for (k, v) in subst_type_counts:
+	#    print "%s %d" % (k, v)
+	#print
+	fig = plt.figure()
+	ax = plt.subplot(1, 1, 1)
+	subst_perc(ax, subst_type_counts)
+	plt.title('Substitution Types (Ts/Tv=%.2f)' % (ts_tv_ratio(vars)))
+	pp.savefig()
+	plt.close()
+
+
+    if not args.simple:
+        # heatmaps of all combinations
+        #
+        for (x, y) in itertools.combinations(props.keys(), 2):
+            fig = plt.figure()
+            ax = plt.subplot(1, 1, 1)
+
+            try:
+                p = plt.hist2d(props[x], props[y], bins=20)
+                plt.colorbar()
+            except:
+                LOG.warn("Plotting %s (#%d) against %s (#%d) failed" % (
+                    x, len(props[x]), y, len(props[y])))
+                
+            ax.set_ylim([0, plt.ylim()[1]])
+            ax.set_xlim([0, plt.xlim()[1]])
+    
+            ax.set_xlabel(x)
+            ax.set_ylabel(y)
+            plt.title('%s vs. %s' % (x, y))
+            pp.savefig()
+            plt.close()
+    
+
+    # FIXME Put related plots together. See http://blog.marmakoide.org/?p=94"
+
+    pp.close()
+
+
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
diff --git a/src/tools/scripts/mutect_alt_allele_in_normal.py b/src/tools/scripts/mutect_alt_allele_in_normal.py
new file mode 100755
index 0000000..81099ca
--- /dev/null
+++ b/src/tools/scripts/mutect_alt_allele_in_normal.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python
+"""Experimental implementation of Mutect's "observed in control" AKA
+alt_allele_in_normal filter:
+
+From the Cibulskis (2013): Eliminate false positives in the tumor data
+by look- ing at the control data (typically from the matched normal
+sample) for evidence of the alternate allele beyond what is expected
+from random sequencing error. A candidate is rejected if, in the
+control data, there are (i) >= 2 observations of the alternate allele
+or they represent >= 3% of the reads; and (ii) their sum of quality
+scores is > 20.
+
+Note, this only makes senseif you're working in similar coverage and
+quality ranges.
+
+"""
+
+__author__ = "Andreas Wilm"
+__email__ = "wilma at gis.a-star.edu.sg"
+__copyright__ = "2014 Genome Institute of Singapore"
+__license__ = "The MIT License"
+
+
+#--- standard library imports
+#
+import sys
+import logging
+import os
+import argparse
+import gzip
+
+#--- third-party imports
+#
+import pysam
+import vcf
+
+
+#global logger
+# http://docs.python.org/library/logging.html
+LOG = logging.getLogger("")
+logging.basicConfig(level=logging.WARN,
+                    format='%(levelname)s [%(asctime)s]: %(message)s')
+
+
+
+FILTER_TAG = "alt_allele_in_normal"
+
+
+def cmdline_parser():
+    """Returns an argparse instance
+    """
+
+    # http://docs.python.org/dev/howto/argparse.html
+    parser = argparse.ArgumentParser(description=__doc__)
+
+    parser.add_argument("--verbose",
+                        action="store_true",
+                        help="Be verbose")
+    parser.add_argument("--debug",
+                        action="store_true",
+                        help="Enable debugging")
+    parser.add_argument("-b", "--bam",
+                        required=True,
+                        help="Normal BAM file")
+    parser.add_argument("-v", "--vcfin",
+                        required=True,
+                        help="VCF file containing somatic variant"
+                        " candidates to filter")
+    parser.add_argument("-o", "--vcfout",
+                        default = "-",
+                        help="Output VCF")
+    parser.add_argument("-p", "--pass-only",
+                        action="store_true",
+                        help="Don't print filtered variants")
+
+    return parser
+
+
+def skip_read(r):
+    """Decide whether to skip a read
+
+    FIXME identical copy in lofreq2_bias.py
+    """
+    
+    skip_flags = [0x4, 0x100, 0x200, 0x400]
+    skip = False
+    # FIXME combine
+    for f in skip_flags:
+        if r.flag & f:
+            return True
+    return False
+
+
+def main():
+    """The main function
+    """
+
+    parser = cmdline_parser()
+    args = parser.parse_args()
+
+    if args.verbose:
+        LOG.setLevel(logging.INFO)
+    if args.debug:
+        LOG.setLevel(logging.DEBUG)
+
+    assert os.path.exists(args.bam), (
+        "BAM file %s does not exist" % args.bam)
+    samfh = pysam.Samfile(args.bam)
+
+    # setup vcf_reader
+    #
+    if args.vcfin[-3:] == '.gz':
+        fh_in = gzip.open(args.vcfin)
+        compressed = True
+    else:
+        compressed = False
+        if args.vcfin == '-':
+            fh_in = sys.stdin
+        else:
+            fh_in = open(args.vcfin)
+    vcf_reader = vcf.VCFReader(fh_in, compressed)
+
+
+    # setup vcf_writer
+    #
+    if args.vcfout == '-':
+        fh_out = sys.stdout
+    else:
+        if os.path.exists(args.vcfout):
+            LOG.fatal("Cowardly refusing to overwrite already existing"
+                      " file %s" % (args.vcfout))
+            sys.exit(1)
+
+        if args.vcfout[-3:] == '.gz':
+            fh_out = gzip.open(args.vcfout, 'w')
+        else:
+            fh_out = open(args.vcfout, 'w')
+
+    # pyvcf needs template as arg to VCFWriter, whereas LoFreq's vcf
+    # clone didn't
+    vcf_writer = vcf.VCFWriter(fh_out, vcf_reader, lineterminator=os.linesep)
+    #vcf_writer = vcf.VCFWriter(fh_out)
+    #vcf_writer.meta_from_reader(vcf_reader)
+    # FIXME should add filter description to header
+
+    for (var_no, var) in enumerate(vcf_reader):
+        if var_no % 500 == 1:
+            LOG.info("Analyzing variant %d" % (var_no))
+
+        if var.INFO.has_key('INDEL'):
+            LOG.warn("Skipping indel %s:%d" % (var.CHROM, var.POS))
+            continue
+        if len(var.REF)>1 or len(var.ALT)>1:
+            LOG.warn("Skipping indel (not tagged as such) %s:%d" % (
+                var.CHROM, var.POS))
+            continue
+
+
+        reads = list(samfh.fetch(reference=var.CHROM,
+                                 start=var.POS-1, end=var.POS))
+        LOG.debug("%s %d: %d (unfiltered) reads covering position" % (
+           var.CHROM, var.POS, len(reads)))
+
+        ref_bquals = []
+        alt_bquals = []
+
+        # FIXME huge code overlap with lofreq2_bias.py
+        for r in reads:
+
+            if skip_read(r):
+                continue
+            
+            # determine position on read for variant to then determine
+            # the current base and its basequal
+            #
+            vpos_on_read = [vpos_on_read
+                            for (vpos_on_read, vpos_on_ref) in r.aligned_pairs
+                            if vpos_on_ref==var.POS-1]
+            #if False:
+            #    if len(vpos_on_read)!=1:
+            #        #import pdb; pdb.set_trace()
+            #        from IPython import embed; embed()
+            assert len(vpos_on_read)==1
+            vpos_on_read = vpos_on_read[0]
+            if vpos_on_read == None:# skip deletions
+                continue
+
+            b = r.query[vpos_on_read]
+            bq = ord(r.qqual[vpos_on_read])-33
+
+            assert len(var.REF)==1 and len(var.ALT)==1
+            if b.upper() == var.REF[0].upper():
+                ref_bquals.append(bq)
+            elif b.upper() == str(var.ALT[0]).upper():
+                alt_bquals.append(bq)
+            else:
+                LOG.debug("Skipping non-ref-alt base %s at %s:%d" % (
+                    b, var.CHROM, var.POS))
+                continue
+
+        # " A candidate is rejected if, in the control data, there are
+        # (i) >= 2 observations of the alternate allele or they represent
+        # >= 3% of the reads; and (ii) their sum of quality scores is >=
+        # 20."
+        # FIXME set filter var.INFO['AN'] = True
+        print_this_var = True
+        num_alt = len(alt_bquals)
+        num_ref = len(ref_bquals)
+        num_both = num_alt+num_ref
+        if num_both==0:
+            LOG.warn("No alt or ref bases for var %s" % var)
+            print_this_var = True
+        else:
+            if (num_alt>=2 or num_alt/float(num_both)>=0.03) and sum(alt_bquals)>20:
+                var.FILTER.append(FILTER_TAG)
+                if args.pass_only:
+                    print_this_var = False
+        if print_this_var:
+            # LoFreq's vcf clone called this write_rec()
+            vcf_writer.write_record(var)
+
+    if fh_in != sys.stdout:
+        fh_in.close()
+    if fh_out != sys.stdout:
+        fh_out.close()
+
+if __name__ == "__main__":
+    main()
+    LOG.info("Successful program exit")
diff --git a/src/tools/setup.py b/src/tools/setup.py
new file mode 100644
index 0000000..f5c6b0d
--- /dev/null
+++ b/src/tools/setup.py
@@ -0,0 +1,56 @@
+from distutils.core import setup
+# see also http://docs.python.org/distutils/setupscript.html
+
+import os
+import sys
+#import subprocess
+
+import setup_conf
+
+DEBUG = False
+#DEBUG = True
+
+
+# checks
+#
+if sys.version_info < (2 , 6):
+    sys.stderr.write("FATAL: sorry, Python versions"
+                     " below 2.6 are not supported\n")
+    sys.exit(1)
+if sys.version_info >= (2 , 8):
+    sys.stderr.write("FATAL: sorry, Python versions"
+                     " above 2.8 are not supported\n")
+    sys.exit(1)
+
+   
+# where modules reside:
+#package_dir = {'': setup_conf.PACKAGE_NAME.lower()}
+#package_dir = {'': ''}
+
+    
+setup(name = setup_conf.PACKAGE_NAME,
+      packages=[setup_conf.PACKAGE_NAME.lower()],
+      version = setup_conf.PACKAGE_VERSION,
+      description="Low frequency variant caller",
+      author="Andreas Wilm",
+      author_email=setup_conf.PACKAGE_BUGREPORT,
+      long_description = """LoFreq-Star is a fast and sensitive variant-caller for inferring single-nucleotide variants (SNVs) from high-throughput sequencing data""",
+      # doesn't seem to work
+      # requires = ['pysam (>=0.7.5)', 'scipy (>=0.12.0)', 'numpy (>=1.7.1)', 'huddel'],
+      #url='https://sourceforge.net/p/lofreq/',
+      scripts = [
+          'scripts/lofreq2_cluster.py',
+          'scripts/lofreq2_vcfplot.py',
+          'scripts/lofreq2_indel_ovlp.py'
+      ],
+      # http://pypi.python.org/pypi?%3Aaction=list_classifiers
+      classifiers=['Environment :: Console',
+                   'Intended Audience :: Science/Research',
+                   'Natural Language :: English',
+                   'Operating System :: Unix',
+                   'Programming Language :: C',
+                   'Programming Language :: Python :: 2.7',
+                   'Topic :: Scientific/Engineering :: Bio-Informatics',
+                   ],
+      keywords='bioinformatics'
+      )
diff --git a/src/tools/setup_conf.py.README b/src/tools/setup_conf.py.README
new file mode 100644
index 0000000..0c8acc2
--- /dev/null
+++ b/src/tools/setup_conf.py.README
@@ -0,0 +1,2 @@
+Automatically generated from setup_conf.py.in by autotols.
+Any changes made here will be overwritten!
diff --git a/src/tools/setup_conf.py.in b/src/tools/setup_conf.py.in
new file mode 100644
index 0000000..2e35b5c
--- /dev/null
+++ b/src/tools/setup_conf.py.in
@@ -0,0 +1,6 @@
+# automagically set by autotools
+PACKAGE_NAME="@PACKAGE_NAME@"
+PACKAGE_TARNAME="@PACKAGE_TARNAME@"
+PACKAGE_VERSION="@PACKAGE_VERSION@"
+PACKAGE_STRING="@PACKAGE_STRING@"
+PACKAGE_BUGREPORT="@PACKAGE_BUGREPORT@"
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..1269488
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1 @@
+data
diff --git a/tests/af_tests.sh b/tests/af_tests.sh
new file mode 100755
index 0000000..216fecd
--- /dev/null
+++ b/tests/af_tests.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+KEEP_TMP=0
+REF=./data/af_tests/ref_fasta.fa
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+
+# See ./data/af_tests/README for expected results
+failed=0
+
+
+echowarn "Only works with AQ off"
+
+
+# del test
+bam=./data/af_tests/test_deletions.bam
+log=$outdir/del_log.txt
+vcf=$outdir/del_out.vcf
+cmd="$LOFREQ call --call-indels --no-default-filter -A -B -f $REF -o $vcf $bam"
+#echodebug "cmd=$cmd"
+if ! eval $cmd > $log 2>&1; then
+    echoerror "LoFreq failed. Check logfile $log. Command was $cmd"
+    exit 1
+fi
+if ! awk '{if ($2=="1" && $4=="ACG" && $5=="A" && $8 ~ /AF=0.5/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then
+    echoerror "Expected deletion of AF=0.5 not found in $vcf"
+    let failed=failed+1
+fi
+if ! awk '{if ($2=="1" && $4=="A" && $5=="T" && $8 ~ /AF=1.0/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then
+    echoerror "Expected SNV of AF=1.0 not found in $vcf"
+    let failed=failed+1
+fi
+
+# ins test
+bam=./data/af_tests/test_insertion.bam
+log=$outdir/ins_log.txt
+vcf=$outdir/ins_out.vcf
+cmd="$LOFREQ call --call-indels --no-default-filter -a 0.5 -B -A -f $REF -o $vcf $bam"
+#echodebug "cmd=$cmd"
+if ! eval $cmd > $log 2>&1; then
+    echoerror "LoFreq failed. Check logfile $log. Command was $cmd"
+    exit 1
+fi
+if ! awk '{if ($2=="2" && $4=="C" && $5=="CAA" && $8 ~ /AF=0.5/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then
+    echoerror "Expected insertion of AF=0.5 not found in $vcf"
+    let failed=failed+1
+fi
+if ! awk '{if ($2=="2" && $4=="C" && $5=="G" && $8 ~ /AF=0.25/) {m=1; exit 0}} END {if (m) {exit 0} else {exit 1}}' $vcf; then
+    echoerror "Expected SNV of AF=0.25 not found in $vcf"
+    let failed=failed+1
+fi
+
+
+# FIXME check output
+if [ $KEEP_TMP -ne 1 ] && [ $failed -eq 0 ]; then
+   test -d $outdir && rm -rf $outdir
+fi
+
diff --git a/tests/alnqual.sh.FIXME b/tests/alnqual.sh.FIXME
new file mode 100644
index 0000000..ce0211c
--- /dev/null
+++ b/tests/alnqual.sh.FIXME
@@ -0,0 +1,2 @@
+read with ID in cigar should get baq and a[id] tags
+read without shouldnt
\ No newline at end of file
diff --git a/tests/bamstats.sh.FIXME b/tests/bamstats.sh.FIXME
new file mode 100644
index 0000000..fd4fb16
--- /dev/null
+++ b/tests/bamstats.sh.FIXME
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+
+basedir=data/bamstats
+bam=$basedir/bamstats.bam
+reffa=$basedir/bamstats.fa
+truebamstats=$basedir/bamstats.expected.bamstats
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outbamstats=$outdir/bamstats.txt
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+cmd="$LOFREQ bamstats -f $reffa  -o $outbamstats $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+if ! diff -q $outbamstats $truebamstats; then
+    echoerror "Output differs from expected output ($outbamstats differs from $truebamstats)"
+    exit 1
+else
+    echook "Got expected output"
+fi
+
+
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/baq-calls-less-than-nobaq.sh b/tests/baq-calls-less-than-nobaq.sh
new file mode 100755
index 0000000..60bd583
--- /dev/null
+++ b/tests/baq-calls-less-than-nobaq.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+
+basedir=data/denv2-pseudoclonal
+bam=$basedir/denv2-pseudoclonal.bam
+reffa=$basedir/denv2-pseudoclonal_cons.fa
+bed=$basedir/denv2-pseudoclonal_incl.bed
+#truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outraw_nobaq=$outdir/raw_nobaq.vcf
+outraw_baq=$outdir/raw_baq.vcf.gz
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+cmd="$LOFREQ call -B -f $reffa -l $bed -o $outraw_nobaq $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+cmd="$LOFREQ call -f $reffa -l $bed -o $outraw_baq $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+ndiff=$($LOFREQ vcfset -a complement -1 $outraw_nobaq -2 $outraw_baq  | grep -c '^[^#]')
+if [ $ndiff -lt 1 ]; then
+    echoerror "Expected more SNVs with BAQ switched off (check $outraw_nobaq and $outraw_baq)"
+    exit 1
+else
+    echook "Got $ndiff more SNVs if BAQ is off"
+fi
+
+
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/bed.sh b/tests/bed.sh
new file mode 100755
index 0000000..cf88954
--- /dev/null
+++ b/tests/bed.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+
+python -c 'import sys; sys.path.insert(0, "../src/scripts/");import lofreq2_call_pparallel; print "\n".join([str(x) for x in lofreq2_call_pparallel.read_bed_coords("data/reg.bed")])' > /dev/null
+if [ $? -eq 0 ]; then
+	echook "bed reading function works"
+else
+	echoerror "bed reading function works"
+	exit 1
+fi
+
diff --git a/tests/bgzf_getline.supp b/tests/bgzf_getline.supp
new file mode 100644
index 0000000..567f1fd
--- /dev/null
+++ b/tests/bgzf_getline.supp
@@ -0,0 +1,22 @@
+{
+   htslib bgzf_getline leak realloc and malloc
+   Memcheck:Leak
+   fun:malloc
+   fun:realloc
+   fun:bgzf_getline
+   fun:tbx_readrec
+   fun:hts_itr_next
+   fun:main_vcfset
+   fun:main
+}
+
+{
+   htslib bgzf_getline leak realloc only
+   Memcheck:Leak
+   fun:realloc
+   fun:bgzf_getline
+   fun:tbx_readrec
+   fun:hts_itr_next
+   fun:main_vcfset
+   fun:main
+}
diff --git a/tests/binom_vs_poisson.FIXME b/tests/binom_vs_poisson.FIXME
new file mode 100644
index 0000000..0503ca5
--- /dev/null
+++ b/tests/binom_vs_poisson.FIXME
@@ -0,0 +1,3 @@
+binom_sf should be the same as poissbin
+Is that trye only for small numbers?
+See pseudo_binomial() in snpcaller.c
diff --git a/tests/bonf_auto_vs_dyn.sh b/tests/bonf_auto_vs_dyn.sh
new file mode 100755
index 0000000..17200d7
--- /dev/null
+++ b/tests/bonf_auto_vs_dyn.sh
@@ -0,0 +1,84 @@
+#a!/bin/bash
+
+# Call SNVs on a BAM fule with full coverage and change bonf settings.
+# Different settings (auto, dynamic and hard-coded) should give
+# identical results here.
+
+source lib.sh || exit 1
+
+basedir=data/denv2-pseudoclonal
+bed=$basedir/denv2-pseudoclonal_incl.bed
+bam=$basedir/denv2-pseudoclonal.bam
+reffa=$basedir/denv2-pseudoclonal_cons.fa
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+# not supported anymore out_auto=$outdir/snv_auto.vcf
+out_dynamic=$outdir/snv_dynamic.vcf.gz
+# bed_len.sh $be;# = 9909 * 3 = 29727
+out_29727=$outdir/snv_29727.vcf.gz
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+#cmd="$LOFREQ call -l $bed -b auto -f $reffa -o $out_auto $bam"
+#if ! eval $cmd >> $log 2>&1; then
+#    echoerror "The following command failed (see $log for more): $cmd"
+#    exit 1
+#fi
+
+cmd="$LOFREQ call -l $bed -b dynamic -f $reffa -o $out_dynamic $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+cmd="$LOFREQ call -l $bed -b 29727 -f $reffa -o $out_29727 $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+#echodebug "All calls done. No checking results"
+
+# make sure we got at least some snvs
+# 
+#if [ $(grep -c '^[^#]' $out_auto) -eq 0 ]; then
+#    echoerror "No SNVs predicted"
+#    exit 1
+#fi
+
+#echodebug "out_auto=$out_auto"
+#echodebug "out_dynamic=$out_dynamic"
+#echodebug "out_29727=$out_29727"
+
+#ndiff=$($LOFREQ vcfset -a complement -1 $out_auto -2 $out_dynamic 2>>$log | grep -c '^[^#]')
+#if [ $ndiff -ne 0 ]; then
+#    echoerror "Found differences between bonf auto and bonf dynamic outputs"
+#    exit 1
+#fi
+#ndiff=$($LOFREQ vcfset -a complement -2 $out_dynamic -1 $out_auto 2>>$log | grep -c '^[^#]')
+#if [ $ndiff -ne 0 ]; then
+#    echoerror "Found differences between bonf auto and bonf dynamic outputs"
+#    exit 1
+#fi
+
+#ndiff=$($LOFREQ vcfset -a complement -1 $out_auto -2 $out_29727 2>>$log | grep -c '^[^#]')
+#if [ $ndiff -ne 0 ]; then
+#    echoerror "Found differences between bonf auto and bonf 29727 outputs"
+#    exit 1
+#fi
+
+ndiff=$($LOFREQ vcfset -a complement -2 $out_29727 -1 $out_dynamic 2>>$log | grep -c '^[^#]')
+if [ $ndiff -ne 0 ]; then
+    echoerror "Found differences between bonf dynamic and bonf 29727 outputs"
+    exit 1
+fi
+
+echook "Tests passed"
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
diff --git a/tests/consvar_noqual_filter.sh b/tests/consvar_noqual_filter.sh
new file mode 100755
index 0000000..e9b1250
--- /dev/null
+++ b/tests/consvar_noqual_filter.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# FIXME:add-doc
+
+source lib.sh || exit 1
+
+VCF=data/vcf/consvar_only.vcf.gz
+
+num_in=$(zgrep -vc '^#' $VCF)
+num_out=$($LOFREQ filter --snvqual-thresh 1 --no-defaults -i $VCF | grep -vc '^#')
+if [ $num_in -ne $num_out ]; then
+    echoerror "Some CONSVARs were filtered by snvqual-thresh."
+else
+    echook "CONSVARs untouched by snvqual-thresh filtering"
+fi
+
diff --git a/tests/denv2-pseudoclonal-source-qual.sh b/tests/denv2-pseudoclonal-source-qual.sh
new file mode 100755
index 0000000..c1aa057
--- /dev/null
+++ b/tests/denv2-pseudoclonal-source-qual.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+
+basedir=data/denv2-pseudoclonal
+bam=$basedir/denv2-pseudoclonal.bam
+reffa=$basedir/denv2-pseudoclonal_cons.fa
+bed=$basedir/denv2-pseudoclonal_incl.bed
+truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf.gz
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outraw_def=$outdir/raw_def.vcf
+outfinal_def=$outdir/final_def.vcf
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+cmd="$LOFREQ call -b dynamic -f $reffa -l $bed -o $outraw_def -s -S $truesnv $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+cmd="$LOFREQ filter -i $outraw_def -o $outfinal_def"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_def -2 $truesnv  | grep -c '^[^#]')
+if [ $ndiff -ne 0 ]; then
+    echoerror "Found FP SNVs (not part of the list of true SNVs)"
+    exit 1
+fi
+
+ndiff=$($LOFREQ vcfset -a intersect -1 $outfinal_def -2 $truesnv  | grep -c '^[^#]')
+#nexp=229
+nexp=219;# FIXME not sure if this is the exact number but this is what I saw first running src qual on this data-set
+if [ $ndiff -lt $nexp ]; then
+    echoerror "Expected $nexp TP SNVs but got $ndiff"
+    exit 1
+fi
+
+
+echook "Tests passed"
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/denv2-pseudoclonal.sh b/tests/denv2-pseudoclonal.sh
new file mode 100755
index 0000000..5e33611
--- /dev/null
+++ b/tests/denv2-pseudoclonal.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+
+basedir=data/denv2-pseudoclonal
+bam=$basedir/denv2-pseudoclonal.bam
+reffa=$basedir/denv2-pseudoclonal_cons.fa
+bed=$basedir/denv2-pseudoclonal_incl.bed
+truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf.gz
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outraw_def=$outdir/raw_def.vcf
+outfinal_def=$outdir/final_def.vcf
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+cmd="$LOFREQ call -b dynamic -f $reffa -l $bed -o $outraw_def $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+cmd="$LOFREQ filter -i $outraw_def -o $outfinal_def"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_def -2 $truesnv  | grep -c '^[^#]')
+if [ $ndiff -ne 0 ]; then
+    echoerror "Found FP SNVs (not part of the list of true SNVs). Check $outdir"
+    exit 1
+fi
+
+ndiff=$($LOFREQ vcfset -a intersect -1 $outfinal_def -2 $truesnv  | grep -c '^[^#]')
+nexp=229
+if [ $ndiff -lt $nexp ]; then
+    echoerror "Expected $nexp TP SNVs but got $ndiff. Check $outdir"
+    exit 1
+fi
+
+
+echook "Tests passed"
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/denv2-simulation.sh b/tests/denv2-simulation.sh
new file mode 100755
index 0000000..e38d078
--- /dev/null
+++ b/tests/denv2-simulation.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+# Call SNVs on simulated data and make sure we got the expected number
+# of SNVs
+
+source lib.sh || exit 1
+
+basedir=data/denv2-simulation
+bam=$basedir/denv2-10haplo.bam
+reffa=$basedir/denv2-refseq.fa
+truesnv=$basedir/denv2-10haplo_true-snp.vcf.gz
+# samtools mpileup $bam | wc -l;# *3
+bonf=32169
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outraw_def=$outdir/raw_def.vcf
+outfinal_def=$outdir/final_def.vcf.gz;# bgzip for complement
+outraw_nomq=$outdir/raw_nomq.vcf
+outfinal_nomq=$outdir/final_nomq.vcf.gz;# bgzip for complement
+log=$outdir/log.txt
+
+KEEP_TMP=0
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Keeping tmp dir $outdir"
+fi
+
+cmd="$LOFREQ call -B -b $bonf -f $reffa -o $outraw_def $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+cmd="$LOFREQ filter -i $outraw_def -o $outfinal_def"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+cmd="$LOFREQ call -B -b $bonf -f $reffa -o $outraw_nomq -N $bam"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+cmd="$LOFREQ filter -i $outraw_nomq -o $outfinal_nomq"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+#nexp=$(grep -v -c '^#' $truesnv)
+#nfinal_def=$(grep -v -c '^#' $outfinal_def)
+#nfinal_nomq=$(grep -v -c '^#' $outfinal_nomq)
+#echodebug "nexp=$nexp nfinal_def=$nfinal_def $nfinal_nomq=$nfinal_nomq"
+
+
+ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_def -2 $truesnv  | grep -c '^[^#]')
+if [ $ndiff -ne 0 ]; then
+    echoerror "Found extra SNVs in default predictions, which are not part of the list of true SNVs"
+    exit 1
+fi
+ndiff=$($LOFREQ vcfset -a complement -2 $outfinal_def -1 $truesnv  | grep -c '^[^#]')
+nexp=15
+# BAQ on: 19
+# BAQ off: 15
+if [ $ndiff -ne $nexp ]; then
+    echoerror "Expected $nexp missing SNVs in default predictions but got $ndiff"
+    exit 1
+fi
+
+
+
+ndiff=$($LOFREQ vcfset -a complement -1 $outfinal_nomq -2 $truesnv  | grep -c '^[^#]')
+if [ $ndiff -ne 0 ]; then
+    echoerror "Found extra SNVs in no-mq predictions, which are not part of the list of true SNVs"
+    exit 1
+fi
+ndiff=$($LOFREQ vcfset -a complement -2 $outfinal_nomq -1 $truesnv  | grep -c '^[^#]')
+nexp=11
+# BAQ on: 14
+# BAQ off: 11
+if [ $ndiff -ne $nexp ]; then
+    echoerror "Expected $nexp missing SNVs in no-mq predictions but got $ndiff"
+    exit 1
+fi
+
+
+# FIXME outfinal should not look different, i.e. filtering shouldn't do much/anything.
+# see /home/wilma/snpcaller/lofreq/lofreq-sourceforge.git/tests/denv2-simulation.sh 
+
+echook "Tests passed"
+
+if [ $KEEP_TMP -ne 1 ]; then
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/denv2-validation.sh b/tests/denv2-validation.sh
new file mode 100755
index 0000000..51fce90
--- /dev/null
+++ b/tests/denv2-validation.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+basedir=./data/denv2-dpcr-validated
+bam1=$basedir/CTTGTA_2_remap_razers-i92_peakrem_corr.bam
+bam2=$basedir/GGCTAC_2_remap_razers-i92_peakrem_corr.bam
+reffa=$basedir/consensus.fa
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+vcfout1=$outdir/$(basename $bam1 .bam).vcf
+vcfout2=$outdir/$(basename $bam2 .bam).vcf.gz;# 2nd file has to be bgzipped for vcfset to work
+vcfinter=$outdir/intersection.vcf
+
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+# true var 1687 in $bam1 has Q62 which becomes 2% after bonf correction
+# i.e. default -a 0.01 swallows it
+
+cmd="$LOFREQ call -a 0.05 -B -f $reffa -o $vcfout1 $bam1"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+cmd="$LOFREQ call -a 0.05 -B -f $reffa -o $vcfout2 $bam2"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+cmd="$LOFREQ vcfset -a intersect -1 $vcfout1 -2 $vcfout2 -o $vcfinter"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+N_PRESENT=7
+n_present=$(for pos in 5914 6843 598 5025 1687 9941 4828; do grep "^consensus[^0-9]*$pos" $vcfinter; done | wc -l)
+N_ABSENT=0
+n_absent=$(for pos in 7035 7404; do grep "^consensus[^0-9]*$pos" $vcfinter; done | wc -l)
+
+if [ $n_present -ne $N_PRESENT ]; then
+    echoerror "Expected $N_PRESENT but got $n_present SNVs (see $outdir)"
+    exit 1
+fi
+
+if [ $n_absent -ne $N_ABSENT ]; then
+    echoerror "Expected $N_ABSENT but got $n_absent SNVs (see $outdir)"
+    exit 1
+fi
+
+echook "Got expected number of present/absent SNVs"
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/diff_opts_same_out.sh.OLD b/tests/diff_opts_same_out.sh.OLD
new file mode 100755
index 0000000..b5ce5b1
--- /dev/null
+++ b/tests/diff_opts_same_out.sh.OLD
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# Test to make sure that paralell computed results are identical to
+# the ones computed without paralllel option and that reading from
+# stdin also results in same result
+
+source lib.sh || exit 1
+
+
+echowarn "Better use a less high coverage data-set for faster completion"
+indir=../../lofreq-test-data/denv2-pseudoclonal/
+bam=$indir/denv2-pseudoclonal.bam
+ref=$indir/denv2-pseudoclonal_cons.fa
+bed=$indir/denv2-pseudoclonal_incl.fake.bed
+
+
+CMD[1]="$LOFREQ call -l $bed -f $ref --verbose $bam"
+CMD[2]="$LOFREQ call -l $bed -f $ref --verbose --pseudo-parallel 4 $bam"
+CMD[3]="cat $bam | $LOFREQ call -f $ref -l $bed --verbose -"
+# cannot: CMD[4]="cat $bam | $LOFREQ call -f $ref -l $bed --verbose --pseudo-parallel 4 -"
+for i in $($seq 1 ${#CMD[@]}); do
+    cmd=${CMD[$i]}
+    out=$(mktemp -t $(basename $0).XXXXXX.vcf)
+    log=$(mktemp -t $(basename $0).XXXXXX.log)
+
+    #echodebug "Executing $cmd with output going to $out and $log"
+    # remove source line from vcf which will change depending on call
+    if ! eval $cmd 2>$log | grep -v 'source' >$out ; then
+        echoerror "Executing following command failed (see $log for more info): $cmd"
+        exit 1
+    fi
+
+    # make sure we predicted at least one snv. if output is always
+    # empty tests would be successful otherwise
+    if ! grep -q DP4 $out; then
+        echoerror "No SNVs in output file $out found"
+        exit 1
+    fi
+
+    # compare to output of previous cmd
+    if [ -n "$prevout" ]; then
+        if ! diff -q $out $prevout; then
+            echoerror "Results between runs differed. Commands were:"
+            echoerror " Current cmd:  $cmd"
+            echoerror " Current out:  $out"
+            echoerror " Previous cmd: $prevcmd"
+            echoerror " Previous out: $prevout"
+            exit 1
+        fi
+    fi
+    
+    prevcmd=$cmd
+    test -s "$prevlog" && rm $prevlog
+    test -s "$prevout" && rm $prevout
+    prevlog=$log
+    prevout=$out
+done
+test -s "$prevlog" && rm $prevlog
+test -s "$prevout" && rm $prevout
+
+
+
+
+
+
diff --git a/tests/doctest.sh b/tests/doctest.sh
new file mode 100755
index 0000000..ef07f60
--- /dev/null
+++ b/tests/doctest.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+myname=$(basename $0)
+
+source lib.sh || exit 1
+
+PY_DIRS="../src/tools/lofreq_star"
+files=$(find $PY_DIRS -name \*py -not -name _\*)
+for f in $files; do
+    echo "$myname: testing $f"
+    python $f || echoerror "testing $f failed"
+done
+
+for f in $files; do
+    echo "$myname: testing $f"
+	python -m doctest $f || echoerror "testing $f failed"
+done
diff --git a/tests/ecoli-clone_incl_parallel.sh b/tests/ecoli-clone_incl_parallel.sh
new file mode 100755
index 0000000..0efb5ce
--- /dev/null
+++ b/tests/ecoli-clone_incl_parallel.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on a clonal data-set
+# and also check whether running it in parallel (single genome!) works
+# and produces same results
+
+source lib.sh || exit 1
+
+
+basedir=data/ecoli-clone/
+bam=$basedir/clone/EAS20_8.bwamem_pe.viterbi.mdups.realn.recal.bam
+reffa=$basedir/ref/Ecoli_K12_MG1655_NC_000913.fa
+#truesnv=$basedir/denv2-pseudoclonal_true-snp.vcf
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outvcf_p=$outdir/$(basename $bam .bam)_parallel.vcf.gz
+outvcf_s=$outdir/$(basename $bam .bam)_single.vcf.gz
+log=$outdir/log.txt
+
+KEEP_TMP=0
+
+cmd="$LOFREQ call-parallel --pp-threads $threads -f $reffa -o $outvcf_p $bam"
+#echodebug "cmd=$cmd"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+#MAX_SNVS=20
+MAX_SNVS=50
+
+
+# run in parallel (should work in single chromosome) 
+#
+nsnvs=$(grep -c '^[^#]' $outvcf_p)
+if [ $nsnvs -ge $MAX_SNVS ]; then
+    echoerror "Expected less then $MAX_SNVS on this clonal dataset but got $nsnvs (see $outdir)"
+    exit 1
+else
+    echook "Got $nsnvs SNVs for this clonal dataset which is okay (below limit of $MAX_SNVS)"
+fi
+
+
+
+# run single and compare results
+# 
+cmd="$LOFREQ call -f $reffa -o $outvcf_s $bam"
+#echodebug "cmd=$cmd"
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+nus=$($LOFREQ vcfset -a complement -1 $outvcf_s -2 $outvcf_p --count-only)
+nup=$($LOFREQ vcfset -a complement -2 $outvcf_s -1 $outvcf_p --count-only)
+# allowing one border line difference
+if [ $nus -gt 1 ] || [ $nup -gt 1 ]; then
+    echoerror "Observed differences between parallel ($nup unique vars) and single ($nus unique vars) results. Check $outvcf_p and $outvcf_s"
+    exit 1
+fi                                    
+
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm $outdir/*
+    rmdir $outdir
+fi
diff --git a/tests/ecoli_spikein.sh b/tests/ecoli_spikein.sh
new file mode 100755
index 0000000..e8db6f2
--- /dev/null
+++ b/tests/ecoli_spikein.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+KEEP_TMP=1
+BASEDIR=data/ecoli-clone/
+BAM=$BASEDIR/spike-in/spike-in.bam
+REF=$BASEDIR/ref/Ecoli_K12_MG1655_NC_000913.fa
+TRUTH=$BASEDIR/spike-in/truth.laln.vcf.gz
+EVALUATOR=data/icgc-tcga-dream-support/evaluator.py
+
+for f in $BAM $REF $TRUTH $EVALUATOR; do
+  if [ ! -e $f ]; then
+    echoerror "Required file $f missing"
+    exit 1
+  fi
+done
+
+ 
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+log=$outdir/log.txt
+outvcf=$outdir/out.vcf
+num_err=0
+
+cmd="$LOFREQ call-parallel --pp-threads 8 -f $REF -o $outvcf --verbose $BAM"
+# only needed as long as indels are disabled by default
+cmd="$cmd --call-indels"
+echodebug "cmd=$cmd"
+if ! eval $cmd > $log 2>&1; then
+    echoerror "LoFreq failed. Check logfile $log. Command was $cmd"
+    exit 1
+fi
+
+
+# this data set was created by running bamsurgeon addsnv first followed by addindel.
+# since variants were dense and bamsurgeon stupid it replaces already inserted variants
+# added in the first step and recall is low
+# took results from v2.1.2a-87-g2d53817-dirty -1%
+res_ll=$($EVALUATOR -v $outvcf -t $TRUTH -m SNV | awk 'END {print $NF}') || exit 1
+res=$(echo $res_ll | \
+  awk -F, '{prec=$1; rec=$2; if (prec<0.945 || rec<0.664) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "snvs: $res" 1>&2
+
+
+# based on results for v2.1.2a-69-g5bd5919 -1%
+res_ll=$($EVALUATOR -f $REF -v $outvcf -t $TRUTH -m INDEL | awk 'END {print $NF}') || exit 1
+res=$(echo $res_ll | \
+  awk -F, '{prec=$1; rec=$2; if (prec<0.956 || rec<0.917) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "indels: $res" 1>&2
+
+
+
+if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then
+    test -d $outdir && rm -rf $outdir
+else
+    echowarn "Not deleting temporary output directory $outdir"
+fi
+if [ $num_err -ne 0 ]; then
+    exit 1
+fi
diff --git a/tests/exome_in_silico.sh.FIXME b/tests/exome_in_silico.sh.FIXME
new file mode 100644
index 0000000..e69de29
diff --git a/tests/faidx_fetch_seq.supp b/tests/faidx_fetch_seq.supp
new file mode 100644
index 0000000..f197b27
--- /dev/null
+++ b/tests/faidx_fetch_seq.supp
@@ -0,0 +1,12 @@
+{
+  faidx_fetch_seq leak
+  Memcheck:Leak
+  fun:malloc
+  fun:faidx_fetch_seq
+  fun:mplp_func
+  fun:bam_plp_auto
+  fun:bam_mplp_auto
+  fun:mpileup
+  fun:main_call
+  fun:main
+}
diff --git a/tests/fdr.sh b/tests/fdr.sh
new file mode 100755
index 0000000..9fcdb69
--- /dev/null
+++ b/tests/fdr.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+VCF=data/vcf/fdr.vcf
+# 25 simulated variants extracted from mq-demo.
+# converted probabilities given in fdr example in 
+# http://www.biostathandbook.com/multiplecomparisons.html
+# (see also multtest.c) to qualities and replace original values.
+
+
+# expecting following result which mimicks the same as in link and in multtest.c
+# expecting 5 significant results
+NEXP=5
+nres=$(cat $VCF  | $LOFREQ filter --no-defaults -q fdr -r 0.25 -i - | grep -vc '^#')
+if [ $nres -ne $NEXP ]; then
+    echoerror "FDR filtering not producing expected results (got $nres instead of $NEXP)"
+    exit 1
+fi
+
+# even after capping and setting #tests
+nres=$(head -n 11 $VCF  | $LOFREQ filter --no-defaults -q fdr -r 0.25 -s 25 -i - | grep -vc '^#')
+if [ $nres -ne $NEXP ]; then
+    echoerror "FDR filtering after capping not producing expected results (got $nres instead of $NEXP)"
+    exit 1
+fi
+echook "FDR filtering produced expected results"
diff --git a/tests/filter.sh b/tests/filter.sh
new file mode 100755
index 0000000..5c58f0f
--- /dev/null
+++ b/tests/filter.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+
+vcf=data/vcf/CTTGTA_2_remap_razers-i92_peakrem_corr_nodeff.vcf.gz
+#outvcf=$(mktemp -t $(basename $0).XXXXXX)
+
+# FIXME base_cmd="$LOFREQ filter -i $vcf --no-defaults -o -"
+#base_cmd="../src/lofreq/lofreq_filter -i $vcf -o -"
+base_cmd="$LOFREQ filter -i $vcf -o -"
+
+ALPHA_LIST='0.01 0.0001 0.000001 0.00000001'
+NUMTEST_LIST='100 10000 1000000'
+
+
+
+# snv quality with varying alpha
+#
+num_fail=0
+for cor in "bonf" "holm-bonf" "fdr"; do
+    last_no=0
+    for a in $ALPHA_LIST; do
+        #cmd="$base_cmd --snv-qual $cor --snv-qual-alpha $a"
+        cmd="$base_cmd --snvqual-mtc $cor --snvqual-alpha $a"
+        #echodebug "cmd=$cmd"
+        new_no=$(eval $cmd | grep -c 'snvqual.*\(bonf\|fdr\)') || exit 1
+        #echodebug "$cor a=$a: new_no=$new_no last_no=$last_no";# cmd = $cmd"
+        if [ $new_no -lt $last_no ]; then
+            echoerror "snvqual: Got fewer SNVs when filtering with higher alpha (cmd=$cmd)"
+            let num_fail=num_fail+1
+        fi
+        last_no=$new_no
+    done
+done
+if [ $num_fail -eq 0 ]; then
+    echook "snvqual (var alpha): all tests passed"
+else
+    echoerror "snvqual (var alpha): $num_fail tests failed"
+fi
+
+
+# snv quality with varying num_tests
+#
+# fixed alpha
+num_fail=0
+a=0.00000001
+for cor in "bonf" "holm-bonf" "fdr"; do
+    last_no=0
+    for n in $NUMTEST_LIST; do
+        #cmd="$base_cmd --snv-qual $cor --snv-qual-alpha $a --snv-qual-numtests $n"
+        cmd="$base_cmd --snvqual-mtc $cor --snvqual-alpha $a --snvqual-ntests $n"
+        #echodebug "cmd=$cmd"
+        new_no=$(eval $cmd | grep -c 'snvqual.*\(bonf\|fdr\)')
+        #echodebug "$cor a=$a n=$n: new_no=$new_no last_no=$last_no";# cmd = $cmd"
+        if [ $new_no -lt $last_no ]; then
+            echoerror "snvqual: Got fewer SNVs when filtering with higher num-tests (cmd=$cmd)"
+            let num_fail=num_fail+1
+        fi
+        last_no=$new_no
+    done
+done
+if [ $num_fail -eq 0 ]; then
+    echook "snvqual (var num_tests): all tests passed"
+else
+    echoerror "snvqual (var num_tests): $num_fail tests failed"
+fi
+
+
+
+# strandbias quality with varying alpha
+#
+num_fail=0
+for cor in "bonf" "holm-bonf"; do
+    last_no=100000
+    for a in $ALPHA_LIST; do
+        #cmd="$base_cmd --strandbias $cor --strandbias-alpha $a"
+        cmd="$base_cmd --sb-mtc $cor --sb-alpha $a"
+        #echodebug "cmd=$cmd"
+        new_no=$(eval $cmd | grep -c 'sb_bonf')
+        #echodebug "$cor a=$a: new_no=$new_no last_no=$last_no";# cmd = $cmd"
+        if [ $new_no -gt $last_no ]; then
+            echoerror "strandbias: Got more SNVs when filtering with higher alpha (cmd=$cmd)"
+            let num_fail=num_fail+1
+        fi
+        last_no=$new_no
+    done
+done
+if [ $num_fail -eq 0 ]; then
+    echook "strandbias: all tests passed"
+else
+    echoerror "strandbias: $num_fail tests failed"
+fi
+
+# window filter
+# FIXME: not implemented in C version
+#
+#num_fail=0
+#base_cmd="$LOFREQ filter -i $vcf --no-defaults -o -"
+#cmd="$base_cmd --window 10"
+#num_reg=$(eval $cmd | grep '[^0-9,]85' | grep -c snvwin) || exit 1
+#num_exp=4
+#if [ $num_reg -ne $num_exp ]; then
+#    echoerror "window: Got $num_reg but expected $num_exp SNVs (cmd = $cmd)"
+#    let num_fail=num_fail+1
+#fi
+##
+#cmd="$base_cmd --window 1"
+#num_reg=$(eval $cmd | grep '[^0-9,]85' | grep -c snvwin) || exit 1
+#num_exp=2
+#if [ $num_reg -ne $num_exp ]; then
+#    echoerror "window: Got $num_reg but expected $num_exp SNVs (cmd = $cmd)"
+#    let num_fail=num_fail+1
+#fi
+#if [ $num_fail -eq 0 ]; then
+#    echook "window: all tests passed"
+#fi
+#
+
+exit 0
diff --git a/tests/filter_c.sh b/tests/filter_c.sh
new file mode 100755
index 0000000..890aede
--- /dev/null
+++ b/tests/filter_c.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+
+# Test that we get the number of expected SNVs on the pseudo-clonal data-set
+
+source lib.sh || exit 1
+
+
+VCF=data/vcf/filter_test.vcf.gz
+
+FILTER="$LOFREQ filter --sb-no-compound"
+#FILTER=../src/lofreq/lofreq_filter
+
+# must be ordered
+ALPHA_LIST='0.01 0.00001 0.00000001'
+MTC_TYPES='bonf holmbonf fdr'
+NUMTEST_LIST='10000 1000000'
+
+# number of input variants
+num_in=$(zgrep -vc '^#' $VCF)
+
+# number of failed tests
+num_fails=0
+
+
+# #input == #output with and without filtering
+#
+num_out=$($FILTER --print-all -i $VCF -v 1 -V 2 -a 0.5 -A 0.6 -B 10 -q bonf | grep -vc '^#')
+if [ $num_in -ne $num_out ]; then
+    echoerror "total #input != #output (with filter)"
+    let num_fails=num_fails+1
+fi
+num_out=$($FILTER --print-all -i $VCF | grep -vc '^#')
+if [ $num_in -ne $num_out ]; then
+    echoerror "total #input != #output (without filter)"
+    let num_fails=num_fails+1
+fi
+
+
+# check defaults
+#
+num_filter_tags=$($FILTER --print-all -i $VCF | grep -v '^#' | grep -v PASS | cut -f 7 | tr ';' '\n' | sort -u | wc -l)
+if [ $num_filter_tags -ne 2 ]; then
+    echoerror "was expecting exactly two filter tags coming from default filtering"
+    let num_fails=num_fails+1
+fi
+
+
+# AF filtering
+#
+num_exp=$(zgrep -c 'AF=0.2' $VCF)
+num_out=$($FILTER -i $VCF --no-defaults --af-min 0.2 --af-max 0.3 | grep -vc '^#')
+if [ $num_exp -ne $num_out ]; then
+    echoerror "AF filtering failed"
+    let num_fails=num_fails+1
+fi
+
+
+# DP filtering
+#
+num_exp=$(zgrep -c 'DP=2[0-9][0-9]' $VCF)
+num_out=$($FILTER -i $VCF --no-defaults  --cov-min 200 --cov-max 300 | grep -vc '^#')
+if [ $num_exp -ne $num_out ]; then
+    echoerror "DP filtering failed"
+    let num_fails=num_fails+1
+fi
+
+
+# SB threshold filtering
+#
+num_exp=$(zgrep -c 'SB=[0-9]\($\|;\)' $VCF)
+num_out=$($FILTER -i $VCF --no-defaults  --sb-thresh 9 | grep -vc '^#')
+if [ $num_exp -ne $num_out ]; then
+    echoerror "SB thresholdfiltering failed"
+    let num_fails=num_fails+1
+fi
+
+
+# SB MTC
+#
+num_prev_mtc=100000
+for mtc in $MTC_TYPES; do
+    num_prev_alpha=$($FILTER -i $VCF --sb-mtc $mtc | grep -vc '^#')
+
+    # bonf rejects fewer than holm-bonf than fdr, i.e. in that order
+    # more are significant, i.e. are filtered and therefore fewer pass
+    #
+    if [ $num_prev_alpha -gt $num_prev_mtc ]; then
+        echoerror "SB $mtc produced let more variants pass then previous one"
+        let num_fails=num_fails+1
+        break
+    fi
+    num_prev_mtc=$num_prev_alpha
+    
+    # as alpha goes up, we become more stringent, i.e. fewer are
+    # significant and more pass
+    #
+    for alpha in $ALPHA_LIST; do
+        num_higher_alpha=$($FILTER -i $VCF --sb-mtc $mtc --sb-alpha $alpha | grep -vc '^#')
+        #echodebug "$mtc  $alpha  $num_prev_alpha -> $num_higher_alpha"
+        if [ $num_higher_alpha -lt $num_prev_alpha ]; then
+            echoerror "SB $mtc with next highest alpha ($alpha) produced fewer PASSED variants"
+            let num_fails=num_fails+1
+            break
+        fi
+        num_prev_alpha=$num_higher_alpha
+
+    done
+done
+
+
+
+# SNV qual threshold filtering
+#
+Q=40
+num_exp=$($zcat $VCF | awk -v q=$Q '/^[^#]/ {if ($6=="." || $6>=q) {s+=1}} END {print s}')
+num_out=$($FILTER -i $VCF --no-defaults  --snvqual-thresh $Q | grep -vc '^#')
+if [ $num_exp -ne $num_out ]; then
+    echoerror "SNV quality threshold filtering failed: expected $num_exp but got $num_out"
+    let num_fails=num_fails+1
+fi
+
+if [ $num_fails -gt 0 ];then
+    echoerror "$num_fails tests failed"
+else
+    echook "all tests passed"
+fi
+
+
+# SB MTC
+#
+num_prev_mtc=0
+for mtc in $MTC_TYPES; do
+    num_prev_alpha=$($FILTER -i $VCF --no-defaults --snvqual-mtc $mtc | grep -vc '^#')
+
+    # bonf rejects fewer than holm-bonf than fdr, i.e. in that order
+    # more are significant, i.e. are kept and therefore more pass
+    #
+    if [ $num_prev_alpha -lt $num_prev_mtc ]; then
+        echoerror "SNV qual $mtc produced let fewer variants pass than previous one"
+        let num_fails=num_fails+1
+        break
+    fi
+    num_prev_mtc=$num_prev_alpha
+    
+    # as alpha goes up, we become more stringent, i.e. fewer
+    # significant and fewer pass
+    #
+    for alpha in $ALPHA_LIST; do
+        num_higher_alpha=$($FILTER -i $VCF --no-defaults --snvqual-mtc $mtc --snvqual-alpha $alpha | grep -vc '^#')
+        #echodebug "$mtc  $alpha  $num_prev_alpha -> $num_higher_alpha"
+        if [ $num_higher_alpha -gt $num_prev_alpha ]; then
+            echoerror "SNV qual $mtc with next highest alpha ($alpha) produced more PASSED variants"
+            let num_fails=num_fails+1
+            break
+        fi
+        num_prev_alpha=$num_higher_alpha
+
+    done
+done
+
+
+echo "WARN: manual diff against py impl. missing" 1>&2
+# see:
+# VCF_IN=data/vcf/filter_test.vcf.gz
+# ../src/lofreq/lofreq filter -i $VCF_IN \
+#     --no-defaults --cov-min 10 --cov-max \
+#     --af-min 0.1 \
+#     --sb-mtc bonf --sb-alpha 0.001\
+#     --snvqual-mtc holmbonf --snvqual-alpha 1 --snvqual-ntests 100000 | \
+#     grep -v '^#' | sed -e 's,_dp,cov,' -e 's,_,,g' -e 's,sb,strandbias,' | \
+#     grep -c PASS
+# 
+# ../src/lofreq_python/scripts/lofreq2_filter.py -i $VCF_IN \
+#     --no-defaults -o - \
+#     --min-cov 10 --max-cov 90 \
+#     --min-af 0.1 \
+#     --strandbias bonf --strandbias-alpha 0.001 \
+#     --snv-qual holm-bonf --snv-qual-alpha 1 --snv-qual-numtests 100000 | \
+#     grep -v '^#' | \
+#     grep -c PASS
+# 
diff --git a/tests/filter_only_snvs_or_indels.sh b/tests/filter_only_snvs_or_indels.sh
new file mode 100755
index 0000000..318462d
--- /dev/null
+++ b/tests/filter_only_snvs_or_indels.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+vcf=data/icgc-tcga-dream-testproject/strelka-1.0.13_snvs-indels-somatic.vcf
+
+num_total=$(grep -vc '^#' $vcf)
+num_snvs=$($LOFREQ filter --no-default --only-snvs -i $vcf | grep -vc '^#')
+num_indels=$($LOFREQ filter --no-default --only-indels -i $vcf | grep -vc '^#')
+vcf=tests/data/icgc-tcga-dream-testproject/strelka-1.0.13_snvs-indels-somatic.vcf
+msg="Number of SNVs ($num_snvs) and indels ($num_indels) extracted by filter"
+if [ $(expr $num_snvs + $num_indels) -ne $num_total ]; then
+     echoerror "$msg don't add up to total number of variants ($num_total)"
+     exit 1
+else
+     echook "$msg add up to total number of variants ($num_total)"
+fi
diff --git a/tests/icgc-tcga-dream-indel_chr19.sh b/tests/icgc-tcga-dream-indel_chr19.sh
new file mode 100755
index 0000000..004c98d
--- /dev/null
+++ b/tests/icgc-tcga-dream-indel_chr19.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+KEEP_TMP=0
+REF=data/icgc-tcga-dream-support/Homo_sapiens_assembly19.fasta
+NORMAL=data/icgc-tcga-dream-indel_chr19/chr19.normal_didq_aq.bam
+TUMOR=data/icgc-tcga-dream-indel_chr19/chr19.tumor_didq_aq.bam
+BED=data/icgc-tcga-dream-indel_chr19/chr19.bed
+#BED=data/icgc-tcga-dream-indel_chr19/chr19-debug.bed
+DBSNP=data/icgc-tcga-dream-support/00-All.vcf.gz
+EVALUATOR=data/icgc-tcga-dream-support/evaluator.py
+#EVALUATOR=/mnt/pnsg10_projects/wilma/lofreq/somatic/dream-challenge/tools/ICGC-TCGA-DREAM-Mutation-Calling-challenge-tools/evaluator.py
+# for patched version with --classvcf support but no proper arg handling
+TRUTH=data/icgc-tcga-dream-indel_chr19/chr19.truth.vcf.gz
+
+# threads=16; echoinfo "overwriting default threads to $threads"
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outpref=$outdir/lofreq_test
+log=$outdir/log.txt
+
+cmd="$LOFREQ somatic -f $REF --threads $threads -n $NORMAL -t $TUMOR -o $outpref -l $BED -d $DBSNP --verbose"
+# only needed as long as indels are disabled by default
+cmd="$cmd --call-indels"
+echodebug "cmd=$cmd"
+if ! eval $cmd > $log 2>&1; then
+    echoerror "LoFreq failed. Check logfile $log. Command was $cmd"
+    exit 1
+fi
+
+num_err=0
+
+title="snvs"
+f=${outpref}somatic_final.snvs.vcf.gz
+res_ll=$($EVALUATOR -v $f -t $TRUTH -m SNV | awk 'END {print $NF}') || exit 1
+res=$(echo $res_ll | \
+  awk -F, '{prec=$1; rec=$2; if (prec<0.98 || rec<0.96) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "$title: " $res 1>&2
+
+title="snvs after dbsnp removal"
+f=${outpref}somatic_final_minus-dbsnp.snvs.vcf.gz
+res_ll=$($EVALUATOR -v $f -t $TRUTH -m SNV | awk 'END {print $NF}') || exit 1
+res=$(echo $res_ll | \
+  awk -F, '{prec=$1; rec=$2; if (prec<0.96 || rec<0.96) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "$title: " $res 1>&2
+
+
+# sens/spec limit based on v2.1.2a-54-g52e8097 and with -1% allowance
+
+title="indels"
+f=${outpref}somatic_final.indels.vcf.gz
+res_ll=$($EVALUATOR -v $f -t $TRUTH -m INDEL | awk 'END {print $NF}') || exit 1
+res=$(echo $res_ll | \
+  awk -F, '{prec=$1; rec=$2; if (prec<0.879 || rec<0.484) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "$title: "$res 1>&2
+
+title="indels after dbsnp removal"
+f=${outpref}somatic_final_minus-dbsnp.indels.vcf.gz
+res_ll=$($EVALUATOR -v $f -t $TRUTH -m INDEL | awk 'END {print $NF}') || exit 1
+res=$(echo $res_ll | \
+  awk -F, '{prec=$1; rec=$2; if (prec<0.952 || rec<0.482) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "$title: "$res 1>&2
+
+
+
+if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then
+    test -d $outdir && rm -rf $outdir
+else
+    echowarn "Not deleting temporary output directory $outdir"
+fi
+if [ $num_err -ne 0 ]; then
+    exit 1
+fi
diff --git a/tests/icgc-tcga-dream-testproject.sh b/tests/icgc-tcga-dream-testproject.sh
new file mode 100755
index 0000000..20518dd
--- /dev/null
+++ b/tests/icgc-tcga-dream-testproject.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+source lib.sh || exit 1
+
+KEEP_TMP=0
+BASE=data/icgc-tcga-dream-testproject/
+#BASE=/projects/wilma/SOMATIC/dream-challenge/testproject/
+#BASE=/mnt/userArchive/wilma/projects/somatic/testproject/
+REF=data/icgc-tcga-dream-support/Homo_sapiens_assembly19.fasta
+TUMOR=${BASE}/tumor.chr20.bam
+NORMAL=${BASE}/normal.chr20.bam
+TRUTH=${BASE}/truth.chr20.vcf.gz
+BED=${BASE}/chr20.bed
+#EVALUATOR=/projects/wilma/SOMATIC/dream-challenge/tools/bamsurgeon.git/etc/evaluator.py
+EVALUATOR=data/icgc-tcga-dream-support/evaluator.py
+DEBUG=0
+
+# threads=16; echoinfo "overwriting default threads to $threads"
+
+
+for f in $REF $TUMOR $NORMAL $TRUTH $EVALUATOR; do
+    if [ ! -s $f ]; then
+        echoerror "Essential file $f missing"
+        exit 1
+    fi
+done
+out_pref=$(mktemp -t $(basename $0).XXXXXX)
+log=${out_pref}.exec.log
+vcf_out=${out_pref}somatic_final.snvs.vcf.gz
+if [ $DEBUG -eq 1 ]; then
+    cp ${BASE}/snvs/lofreq/beta-4-8-g7b8b334-dirty_somatic_final.vcf $vcf_out
+else
+    cmd="$LOFREQ somatic -l $BED -n $NORMAL -t $TUMOR -f $REF -o $out_pref --threads $threads"
+    if ! eval $cmd > $log 2>&1; then
+        echoerror "LoFreq failed. Check log $log and files with prefix $out_pref"
+        exit 1
+    fi
+    echoinfo "lofreq somatic run completed. now checking results"
+fi
+
+num_err=0
+# use bamsurgeon evaluator
+#
+# example output
+# alterantive to using evaluator is to run lofreq vcfset on a truth file only containing SNVs
+# tpcount, fpcount, subrecs, trurecs:
+# 1389 15 1404 1445
+# precision, recall, F1 score: 0.989316239316,0.96124567474,0.975078975079
+title="snvs before dbsnp removal"
+res_ll=$($EVALUATOR -t $TRUTH -v $vcf_out -m SNV | awk 'END {print $NF}') || exit
+
+res=$(echo $res_ll | \
+    awk -F, '{prec=$1; rec=$2; if (prec<0.98 || rec<0.96) {status="ERROR"} else {status="OK"} printf "%s: precision=%f recall=%f\n", status, prec, rec}') || exit 1
+if echo $res | grep -q ERROR; then
+   let num_err=num_err+1
+fi
+echo "$title: "$res 1>&2
+
+
+if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then
+    test -d $outdir && rm -rf $outdir
+else
+    echowarn "Not deleting temporary output directory $outdir"
+fi
+if [ $num_err -ne 0 ]; then
+    exit 1
+fi
+
diff --git a/tests/indel_misc.sh b/tests/indel_misc.sh
new file mode 100755
index 0000000..43c027a
--- /dev/null
+++ b/tests/indel_misc.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+KEEP_TMP=0
+REF=data/denv2-dpcr-validated/consensus.fa
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+BAM=data/denv2-dpcr-validated/CTTGTA_2_remap_razers-i92_peakrem_corr.bam 
+
+
+log=$outdir/log.txt
+vcf=$outdir/out.vcf
+cmd="$LOFREQ call --no-default-filter --only-indels --call-indels -f $REF -o $vcf $BAM"
+if ! eval $cmd > $log 2>&1; then
+    echoerror "LoFreq failed. Check logfile $log. Command was $cmd"
+    exit 1
+fi
+
+num_indels=$(grep -vc '^#' $vcf)
+if [ $num_indels -ne 0 ]; then
+    echoerror "Got indels in indel free bam. See $vcf"[B
+    exit 1
+else
+    echook "Got no indels from indel free bam."
+fi
+
+if [ $KEEP_TMP -ne 1 ]; then
+	test -d $outdir && rm -rf $outdir
+fi
+
+# FIXME call on ecoli as well or test there
\ No newline at end of file
diff --git a/tests/indel_qual.sh b/tests/indel_qual.sh
new file mode 100755
index 0000000..3254608
--- /dev/null
+++ b/tests/indel_qual.sh
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+set -o pipefail
+
+# test correct use of indel qualities
+
+sam=data/idq/delq_3lq.sam
+ref=data/idq/ref.fa
+res=$(samtools view -bS $sam 2>/dev/null | $LOFREQ call --call-indels -f $ref  -b 1 -a 0.05 --no-default-filter -B -A - 2>&1 || exit 1)
+#echo "$res"
+if ! echo "$res" | grep -q 'ref[[:space:]]1'; then
+	echoerror "Should have called indel at pos 1 but didn't (res was $res)"
+	exit 1
+fi
+if echo "$res" | grep -q 'ref[[:space:]]3'; then
+        echoerror "Shouldn't have called indel at pos 3 but did (res was $res)"
+        exit 1
+fi
+
+sam=data/idq/delq_1lq.sam
+res=$(samtools view -bS $sam 2>/dev/null | $LOFREQ call --call-indels -f $ref  -b 1 -a 0.05 --no-default-filter -B -A - 2>&1 || exit 1)
+#echo "$res"
+if echo "$res" | grep -q 'ref[[:space:]]1'; then
+        echoerror "Shouldn't have called indel at pos 1 but did (res was $res)"
+        exit 1
+fi
+if ! echo "$res" | grep -q 'ref[[:space:]]3'; then
+       echoerror "Should have called indel at pos 3 but didn't (res was $res)"
+       exit 1
+fi
+
+echook "Indels predicted as expected"
\ No newline at end of file
diff --git a/tests/indels.sh.FIXME b/tests/indels.sh.FIXME
new file mode 100644
index 0000000..3f599af
--- /dev/null
+++ b/tests/indels.sh.FIXME
@@ -0,0 +1,3 @@
+Number of indel tests on razers alignments should be zero, because we ran it without indel support 
+Number of indel tests performed: 0
+./src/lofreq/lofreq call -f ../lofreq2-gis.git/tests/data/denv2-dpcr-validated/consensus.fa ../lofreq2-gis.git/tests/data/denv2-dpcr-validated/CTTGTA_2_remap_razers-i92_peakrem_corr.bam -r consensus:10000-11000 --debug --no-default-filter
diff --git a/tests/lewis_known.sh.FIXME b/tests/lewis_known.sh.FIXME
new file mode 100644
index 0000000..e69de29
diff --git a/tests/lib.sh b/tests/lib.sh
new file mode 100755
index 0000000..43e272b
--- /dev/null
+++ b/tests/lib.sh
@@ -0,0 +1,36 @@
+echoerror() {
+    echo "ERROR: $@" 1>&2
+}
+echook() {
+    echo "OK: $@" 1>&2
+}
+echowarn() {
+    echo "WARN: $@" 1>&2
+}
+echoinfo() {
+    echo "INFO: $@" 1>&2
+}
+echodebug() {
+    echo "DEBUG: $@" 1>&2
+}
+
+# md5sum is md5 on mac
+md5=$(which md5sum 2>/dev/null || which md5)
+
+# zcat looks for .Z file on mac
+zcat="gzip -dc"
+
+seq=$(which seq 2>/dev/null || which gseq)
+
+ncpus=$(sysctl -2 hw.ncpu 2>/dev/null || grep -c ^processor /proc/cpuinfo 2>/dev/null || echo 1)
+# use 1/8 of available cpus at max but 4 min for parallel tasks
+threads=$(echo $ncpus | awk '{n=$1/8; if (n<4) {n=4}; print n}')
+
+# if not user defined use local LoFreq
+if [ -z "$LOFREQ" ]; then
+	LOFREQ=../src/lofreq/lofreq
+fi	
+echoinfo "Using $LOFREQ"
+#LOFREQ=../lofreq_star-2.0.0-beta/lofreq/lofreq
+
+
diff --git a/tests/no_snvs_on_cons_indels.sh.FIXME b/tests/no_snvs_on_cons_indels.sh.FIXME
new file mode 100644
index 0000000..0bc1ffc
--- /dev/null
+++ b/tests/no_snvs_on_cons_indels.sh.FIXME
@@ -0,0 +1,5 @@
+NC_000913       3558478 .       G       C       .       .       SB=0,DP4=0,1,2,5,CONSVAR,DP=554,AF=0.012635
+samtools tview  EAS20_8.1k-snvs-1k-indels.postprocessed.viterbi-sorted.mdups.realn.recal.bam -p NC_000913:3558478-3558478 ref/Ecoli_K12_MG1655_NC_000913.fa
+
+
+
diff --git a/tests/not-matching-ref.sh b/tests/not-matching-ref.sh
new file mode 100755
index 0000000..f56a31d
--- /dev/null
+++ b/tests/not-matching-ref.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Test whether we can detect if the wrong reference was given
+
+source lib.sh || exit 1
+
+
+bam=data/denv2-pseudoclonal/denv2-pseudoclonal.bam
+reffa=data/denv2-simulation/denv2-refseq.fa
+
+
+cmd="$LOFREQ call -f $reffa $bed $bam"
+if eval $cmd 2>/dev/null; then
+    echoerror "LoFreq should have failed but didn't. Command was $cmd"
+    exit 1
+else
+    echook "LoFreq detected use of wrong reference"
+fi
+
diff --git a/tests/parallel.sh b/tests/parallel.sh
new file mode 100755
index 0000000..43aa5aa
--- /dev/null
+++ b/tests/parallel.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+# Make sure the parallel wrapper produces the same result as the
+# default
+
+source lib.sh || exit 1
+
+
+
+BAM=data/icgc-tcga-first10kperchrom-syn1/dream-icgc-tcga-first10kperchrom-synthetic.challenge.set1.normal.v2.bam
+# don't bloody gzip your reference even though samtools happily indexes it
+REF=data/icgc-tcga-dream-support/Homo_sapiens_assembly19.fasta
+
+KEEP_TMP=0
+DEBUG=0
+SIMULATE=0
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+outraw_parallel=$outdir/raw_parallel.vcf.gz
+outraw_single=$outdir/raw_single.vcf.gz
+log=$outdir/log.txt
+
+LOFREQ_PARALLEL="$(dirname $LOFREQ)/../scripts/lofreq2_call_pparallel.py"
+cmd="/usr/bin/time -p $LOFREQ_PARALLEL --pp-threads $threads -f $REF -o $outraw_parallel --verbose $BAM"
+test $SIMULATE -eq 1 && cmd="echo $cmd"
+test $DEBUG -eq 1 && echo "DEBUG: cmd=$cmd" 1>&2
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+cmd="/usr/bin/time -p $LOFREQ call -f $REF -o $outraw_single --verbose $BAM"
+test $SIMULATE -eq 1 && cmd="echo $cmd"
+test $DEBUG -eq 1 && echo "DEBUG: cmd=$cmd" 1>&2
+if ! eval $cmd >> $log 2>&1; then
+    echoerror "The following command failed (see $log for more): $cmd"
+    exit 1
+fi
+
+
+if [ $SIMULATE -eq 1 ]; then
+    nup=0
+    nus=0
+else
+    nup=$($LOFREQ vcfset -a complement -1 $outraw_parallel -2 $outraw_single --count-only)
+    nus=$($LOFREQ vcfset -a complement -2 $outraw_parallel -1 $outraw_single --count-only)
+fi
+#if [ $nup -ne 0 ] || [ $nus -ne 0 ] ; then
+# there are occasional differences possible likely due to BAQ effects on region ends
+if [ $nup -gt 1 ] || [ $nus -gt 1 ] ; then
+    echoerror "Observed some difference between parallel and single results. Check $outraw_parallel and $outraw_single"
+    n_parallel=$(zgrep -vc '^#' $outraw_parallel)
+    n_single=$(zgrep -vc '^#' $outraw_single)
+
+    n_overlap=$($LOFREQ vcfset -a intersect -1 $outraw_parallel -2 $outraw_single --count-only)
+    echoerror "$outraw_parallel has $n_parallel and $outraw_single has $n_single SNVS (both overlap by $n_overlap). Make sure these are all right on the --snvqual-thresh value."
+    exit 1
+else
+    echook "Parallel and single run give identical results."
+fi
+
+
+
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Not deleting tmp dir $outdir"
+else 
+    rm  $outdir/*
+    rmdir $outdir
+fi
+
diff --git a/tests/pseudomonas_jade.sh.FIXME b/tests/pseudomonas_jade.sh.FIXME
new file mode 100644
index 0000000..e69de29
diff --git a/tests/pylint.rc b/tests/pylint.rc
new file mode 100644
index 0000000..3de9dd8
--- /dev/null
+++ b/tests/pylint.rc
@@ -0,0 +1,238 @@
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=CVS
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once).
+#disable=
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html
+output-format=text
+
+# Include message's id in output
+include-ids=no
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (RP0004).
+comment=no
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the beginning of the name of dummy variables
+# (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=map,filter,apply,input
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamically set).
+ignored-classes=SQLObject
+
+# When zope mode is activated, add a predefined set of Zope acquired attributes
+# to generated-members.
+zope=no
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E0201 when accessed.
+generated-members=REQUEST,acl_users,aq_parent
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branchs=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
diff --git a/tests/pylint.sh b/tests/pylint.sh
new file mode 100755
index 0000000..fee4bda
--- /dev/null
+++ b/tests/pylint.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+#files_to_test=$(grep 'scripts/' ../src/lofreq_python/setup.py | tr -d "[,']" | tr -d '[\t ]' | sed -e 's,^,../,')
+#files_to_test=$(grep '^[^#].*\.py'  ../src/lofreq_python/Makefile.am | grep -v PYTHON | cut -f 2 -d = | tr -d '\\' | tr -d '[\t ]')
+files_to_test=$(find ../src/scripts ../src/tools/scripts ../src/tools/lofreq_star -name \*py)
+PYLINT=$(which pylint 2>/dev/null || which pylint-2.7) || exit 1
+
+echoinfo "Using $PYLINT"
+log=$(mktemp -t pylint.XXXXX)
+for f in $files_to_test; do
+    echoinfo "Testing $f"
+    $PYLINT -E --rcfile pylint.rc $f >> $log
+done 
+if [ -s $log ]; then
+    echoerror "pylint produced errors:"
+    cat $log
+    exit 1
+else
+    echook "pylint produced no errors"
+fi
+rm $log
+
+
diff --git a/tests/run_all.sh b/tests/run_all.sh
new file mode 100755
index 0000000..12b2c4d
--- /dev/null
+++ b/tests/run_all.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+source lib.sh >/dev/null || exit 1
+
+#if hostname | grep -q aquila; then
+if set | grep -q SGE_CLUSTER_NAME; then
+	on_cluster=1
+	threads=8;# overriding default
+	ln -sf /mnt/projects/wilma/lofreq/testing/data .	
+else
+	on_cluster=0
+	ln -sf /mnt/pnsg10_projects/wilma/lofreq/testing/data .
+fi
+
+#mail="-m bes -M wilma at gis.a-star.edu.sg"
+mail=""
+
+
+
+for f in $(ls *sh | grep -v run_all*sh | grep -v lib.sh); do
+	if [ $on_cluster -eq 1 ]; then
+		echo "*** Scheduling $f"
+		name="lf-test-$(basename $f)"
+		log=${f}.$(date +%Y%m%d-%H%M).log		
+#cat<<EOF
+		qsub -N $name -pe OpenMP $threads $mail -l h_vmem=8G -l h_rt=3:00:00 -o $log -j y -V -b y -cwd "bash $f"
+#EOF
+	else
+		echo "*** Running $f";	
+#cat<<EOF
+		./$f || echo "FAILED: $f" ;
+#EOF
+	fi
+	echo
+done
+
+if [ $on_cluster -eq 1 ]; then
+	echo "After all jobs completed check log files per run script with current datetime suffix"
+fi
diff --git a/tests/somatic_CHH966_chr22.sh b/tests/somatic_CHH966_chr22.sh
new file mode 100755
index 0000000..a3b30cc
--- /dev/null
+++ b/tests/somatic_CHH966_chr22.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# FIXME:add-doc
+
+source lib.sh || exit 1
+
+KEEP_TMP=0
+
+BAM_N=./data/somatic_CHH966_chr22/CHH966-normal-100x-100pur-hg19.chr22-bed-only.bam
+BAM_T=./data/somatic_CHH966_chr22//CHH966-tumor-100x-10pur-hg19.chr22-bed-only.bam
+BED=./data/somatic_CHH966_chr22/SeqCap_EZ_Exome_v3_primary_lib_extend_no_overlap_minus300.chr22.bed
+REF=./data/somatic_CHH966_chr22/hg19_chr22.fa
+TRUESNV=./data/somatic_CHH966_chr22/hg19_chr22_true_snv.vcf.gz
+outprefix=$(mktemp -t $(basename $0) 2>/dev/null || mktemp -t $(basename $0).XXXXXX);#XXXXXX needed on linux?
+if [ $KEEP_TMP -eq 1 ]; then
+    echowarn "Keeping tmp files with prefix $outprefix"
+fi
+
+finalout=${outprefix}somatic_final.snvs.vcf.gz
+cmd="$LOFREQ somatic --threads $threads -n $BAM_N -t $BAM_T -f $REF -l $BED -o $outprefix";#--verbose";# --debug"
+#echodebug "cmd = $cmd"
+if ! eval $cmd; then
+    echoerror "The following command failed: $cmd"
+    exit 1
+fi
+n_intersect=$($LOFREQ vcfset -1 $TRUESNV -2 $finalout -a intersect | grep -vc '^#')
+if [ "$n_intersect" -lt 2 ]; then
+    echoerror "Expected at least two true predictions but got $n_intersect (compare $finalout and $TRUESNV)"
+    exit 1
+else
+    echook "Got $n_intersect true predictions"
+    if [ $KEEP_TMP -ne 1 ]; then
+	    rm ${outprefix}*vcf*
+    fi
+fi
+
+
+
diff --git a/tests/uniq.sh b/tests/uniq.sh
new file mode 100755
index 0000000..70c7e18
--- /dev/null
+++ b/tests/uniq.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# FIXME:add-doc
+
+source lib.sh || exit 1
+
+# test vs self prediction should give zero results
+
+bam=data/denv2-simulation/denv2-10haplo.bam
+vcf_in=data/denv2-simulation/denv2-10haplo_true-snp.vcf.gz
+vcf_out=$(mktemp -t $(basename $0).XXXXXX.vcf)
+rm $vcf_out
+
+$LOFREQ uniq -v $vcf_in $bam -o $vcf_out || exit 1
+num_snvs=$(grep -cv '^#' $vcf_out)
+if [ "$num_snvs" -ne 0 ]; then
+    echoerror "Expected zero SNVs when checking variants predicted from same BAM but got $num_snvs"
+    exit 1
+else
+    echook "Got zero SNVs during self-comparison, as expected"
+fi
+rm $vcf_out
+
+
+vcf_in=data/vcf/denv2-10haplo-fake-filter-only.vcf.gz
+$LOFREQ uniq -v $vcf_in $bam -o $vcf_out || exit 1
+num_snvs=$(grep -cv '^#' $vcf_out)
+if [ "$num_snvs" -ne 0 ]; then
+    echoerror "Expected zero SNVs when checking against indels and filtered variants only but got $num_snvs"
+    exit 1
+else
+    echook "Got zero SNVs when checking indels and filtered variants only"
+fi
+rm $vcf_out
+
+
+
+
+# no indels!
+vcf_in=data/vcf/CTTGTA_2_remap_razers-i92_peakrem_corr_nodeff.vcf.gz
+bam=data/denv2-dpcr-validated/GGCTAC_2_remap_razers-i92_peakrem_corr.bam
+
+# in == out with detlim
+#
+num_in=$(zgrep -cv '^#' $vcf_in)
+cmd="$LOFREQ uniq -v $vcf_in $bam --use-det-lim -o -"
+num_out=$(eval $cmd | grep -vc '^#') || exit 1
+if [ "$num_in" -ne "$num_out" ]; then
+    echoerror "Expected same number of in and output vars when using --use-det-lim but go $num_in and $num_out resp. (cmd was $cmd)"
+fi
+
+# UQ= present even with --output-all
+cmd="$LOFREQ uniq -v $vcf_in $bam --output-all -o -"
+eval $cmd | grep -q 'UQ=' || echoerror "No UQ markup found"
+
+# in gt out in default mode
+num_in=$(zgrep -cv '^#' $vcf_in)
+cmd="$LOFREQ uniq -v $vcf_in $bam -o -"
+num_out=$(eval $cmd | grep -vc '^#') || exit 1
+if [ "$num_in" -le "$num_out" ]; then
+    echoerror "Expected fewer number of vars in default output due to filtering but got $num_in and $num_out resp. (cmd was $cmd)"
+fi
+
+
+vcf_in=data/somatic_CHH966_chr22/hg19_chr22_true_snv.vcf.gz
+bam=data/somatic_CHH966_chr22/CHH966-tumor-100x-10pur-hg19.chr22-bed-only.bam
+$LOFREQ uniq -v $vcf_in $bam -o $vcf_out || exit 1
+# previously 4, but now 2 true snvs in vcf_in, which both should be unique
+num_snvs=$(grep -cv '^#' $vcf_out)
+if [ "$num_snvs" -ne 2 ]; then
+    echoerror "Expected two SNVs from somatic check but got $num_snvs"
+    exit 1
+else
+    echook "Got expected number of SNVs from somatic check"
+fi
+
+rm $vcf_out
+#echo $vcf_out
+
+
diff --git a/tests/valgrind_call.sh b/tests/valgrind_call.sh
new file mode 100755
index 0000000..32e440a
--- /dev/null
+++ b/tests/valgrind_call.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+# running valgrind on call incl indel calls on ecoli spikein
+
+source lib.sh || exit 1
+
+KEEP_TMP=0
+BASEDIR=data/ecoli-clone/
+BAM=$BASEDIR/spike-in/spike-in.bam
+REF=$BASEDIR/ref/Ecoli_K12_MG1655_NC_000913.fa
+
+for f in $BAM $REF; do
+  if [ ! -e $f ]; then
+      echoerror "Required file $f missing"
+      exit 1
+  fi
+done
+            
+
+outdir=$(mktemp -d -t $(basename $0).XXXXXX)
+log=$outdir/log.txt
+valgrindlog=$outdir/valgrind.log
+vcf_out=$outdir/out.vcf
+
+
+# how to get a region with true SNVs and indels close-by
+#ipython
+#import vcf
+#vcfr = vcf.Reader(filename="truth.vcf.gz")
+#vars = [v for v in vcfr]
+#indel_highq = [v for v in vars_highq if v.is_indel]
+#snv_highq = [v for v in vars_highq if v.is_snp]
+#def argmin(iterable):
+#        return min(enumerate(iterable), key=lambda x: x[1])[0]
+#def closest(v, cmp_list):
+#    dists = [abs(v.POS-c.POS) for c in cmp_list]
+#    return argmin(dists)
+#for i in indel_highq:
+#    c = closest(i, snv_highq)
+#    print i, snv_highq[c]
+# and check that both are present in truth and lofreq prediction
+
+valgrind --suppressions=faidx_fetch_seq.supp --leak-check=full --tool=memcheck --log-file=$valgrindlog \
+  $LOFREQ call --call-indels -f $REF $BAM -r 'NC_000913:2000-2600' -o $vcf_out >$log 2>&1 || exit 1
+
+for pos in 2000 2032 2214 2514 2572; do 
+  if ! grep -q -w $pos $vcf_out; then
+    echoerror "Excepted variant position $pos not found in vcf $vcf_out"
+    exit 1
+  fi
+done
+echook "All expected variant positions found"
+
+
+num_err=$(grep 'ERROR SUMMARY' $valgrindlog | grep -cv ': 0 errors')
+if [ "$num_err" -ne 0 ]; then
+    echoerror "Found errors in Valgrind output $valgrindlog"
+    exit 1
+else
+    echook "No errors found in Valgrind output"
+fi
+
+lost_bytes=$(grep 'lost' $valgrindlog | grep -cv ': 0 bytes in 0 blocks')
+if [ "$lost_bytes" -ne 0 ]; then
+    echoerror "Found lost bytes in Valgrind output $valgrindlog" || exit 1
+    exit 1
+else
+    echook "No lost bytes found in Valgrind output"
+fi
+
+if [ $KEEP_TMP -ne 1 ] && [ $num_err -eq 0 ]; then
+    test -d $outdir && rm -rf $outdir
+else
+    echowarn "Not deleting temporary output directory $outdir"
+fi
diff --git a/tests/valgrind_uniq.sh b/tests/valgrind_uniq.sh
new file mode 100755
index 0000000..c2bd55e
--- /dev/null
+++ b/tests/valgrind_uniq.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# FIXME:add-doc
+
+source lib.sh || exit 1
+
+valgrind_log=$(mktemp -t $(basename $0).XXXXXX.valgrind)
+vcf_out=$(mktemp -t $(basename $0).XXXXXX.vcf)
+rm $vcf_out $valgrind_log
+
+# FIXME better to use somatic SNVs
+bam=data/denv2-simulation/denv2-10haplo.bam
+vcf=data/denv2-simulation/denv2-10haplo_true-snp.vcf.gz
+
+# use only head. otherwise too slow
+$zcat $vcf | head | valgrind  --log-file=$valgrind_log --tool=memcheck \
+    $LOFREQ uniq -v - $bam -o $vcf_out || exit 1
+ 
+
+num_err=$(grep 'ERROR SUMMARY' $valgrind_log | grep -cv ': 0 errors')
+if [ "$num_err" -ne 0 ]; then
+    echoerror "Found errors in Valgrind output $valgrind_log"
+    exit 1
+else
+    echook "No errors found in Valgrind output"
+fi
+
+lost_bytes=$(grep 'lost' $valgrind_log | grep -cv ': 0 bytes in 0 blocks')
+if [ "$lost_bytes" -ne 0 ]; then
+    echoerror "Found lost bytes in Valgrind output $valgrind_log" || exit 1
+    exit 1
+else
+    echook "No lost bytes found in Valgrind output"
+fi
+
+
+
+rm $vcf_out $valgrind_log
diff --git a/tests/valgrind_vcfset.sh b/tests/valgrind_vcfset.sh
new file mode 100755
index 0000000..e1820af
--- /dev/null
+++ b/tests/valgrind_vcfset.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# FIXME:add-doc
+
+source lib.sh || exit 1
+
+valgrind_log=$(mktemp -t $(basename $0).XXXXXX.valgrind)
+vcf_in=data/vcf/CTTGTA_2_remap_razers-i92_peakrem_corr_nodeff.vcf.gz
+
+# htslib's (1.1) bgzf_getline() always seems to leak even though we free the used memory. suppress errors here
+valgrind --suppressions=bgzf_getline.supp --log-file=$valgrind_log --tool=memcheck --leak-check=full $LOFREQ vcfset -a complement -1 $vcf_in -2 $vcf_in >/dev/null || exit 1
+
+test -s $valgrind_log || exit 1
+
+num_err=$(grep 'ERROR SUMMARY' $valgrind_log | grep -cv ': 0 errors')
+if [ "$num_err" -ne 0 ]; then
+    echoerror "Found errors in Valgrind output $valgrind_log"
+    exit 1
+else
+    echook "No errors found in Valgrind output"
+fi
+
+lost_bytes=$(grep 'lost' $valgrind_log | grep -cv ': 0 bytes in 0 blocks')
+if [ "$lost_bytes" -ne 0 ]; then
+    echoerror "Found lost bytes in Valgrind output $valgrind_log" || exit 1
+    exit 1
+else
+    echook "No lost bytes found in Valgrind output"
+fi
+
diff --git a/tests/valid_vcf_output.sh b/tests/valid_vcf_output.sh
new file mode 100755
index 0000000..8318090
--- /dev/null
+++ b/tests/valid_vcf_output.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+bam=./data/denv2-pseudoclonal/denv2-pseudoclonal.bam
+reffa=./data/denv2-pseudoclonal/denv2-pseudoclonal_cons.fa
+bed=./data/denv2-pseudoclonal/denv2-pseudoclonal_incl.bed
+vcf=$(mktemp -t $(basename $0).XXXXXX.vcf)
+rm -f $vcf
+
+# index bam if necessary
+test -s ${bam}.bai || samtools index $bam
+
+$LOFREQ call -f $reffa -l $bed -o $vcf $bam || exit 1
+# this tests 'filter' as well as it's part of call
+#export  PERL5LIB=/Users/wilma/local/lib/
+#if perl -mVcf -e validate ../tests/denv2-pseudoclonal.vcf; then
+if vcf-validator $vcf; then
+    echook "Got valid VCF output"
+else
+    echoerror "Invalid VCF output"
+fi
diff --git a/tests/vcf_setop.sh b/tests/vcf_setop.sh
new file mode 100755
index 0000000..f19fb8d
--- /dev/null
+++ b/tests/vcf_setop.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+# FIXME:add-doc
+
+source lib.sh || exit 1
+
+
+vcf_t=data/vcf/CHH966-tumor-100x-100pur-hg19.bwa_6431925.vcf.gz
+vcf_n=data/vcf/CHH966-normal-100x-100pur-hg19.bwa.renamed_6431925.vcf.gz
+#vcf_t=data/vcf/CHH966-tumor-100x-100pur-hg19.bwa_6431925.vcf
+#vcf_n=data/vcf/CHH966-normal-100x-100pur-hg19.bwa.renamed_6431925.vcf
+vcf_out=$(mktemp -t $(basename $0).XXXXXX.vcf)
+
+cmd="$LOFREQ vcfset -1 $vcf_t -2 $vcf_n -a complement -o -"
+
+#echodebug "cmd=$cmd"
+eval $cmd | cut -f 1-7 > $vcf_out
+
+num_diffs=$(gzip -dc data/vcf/CHH966-tumor-only.f-7.vcf.gz | \
+    diff -u $vcf_out - | grep -v '##' | grep '^[\+\-]' | wc -l)
+exp_diffs=10
+#--- test.vcf	2013-04-03 22:12:53.000000000 +0800
+#+++ -	2013-04-03 22:22:06.000000000 +0800
+#-chr12	30805918	.	C	G	23	.
+#-chr13	107516488	.	T	G	22	.
+#-chr16	69170707	.	G	C	23	.
+#-chr17	8738690	.	T	G	23	.
+#-chr2	42513376	.	G	C	23	.
+#-chr4	186560162	.	C	G	22	.
+#-chr6	42571331	.	T	A	24	.
+#-chr6	106553829	.	G	A	26	.
+#
+# All diffs expected. vcf-isec only looks at chrom and pos, not the 
+
+if [ $num_diffs -ne $exp_diffs ]; then
+    echoerror "Expected $exp_diffs but got $num_diffs (keeping $vcf_out for your reference)."
+else
+    echook "Larger complement test produced expected results."
+    rm $vcf_out
+fi
+
+
+
+
+
+vcf_1=data/vcf/vcf_set.vcf.gz
+vcf_1_allfiltered=data/vcf/vcf_set_allfiltered.vcf.gz
+
+# complement against self should give zero
+cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1 -a complement -o -"
+num_compl=$(eval $cmd | grep -vc '^#')
+if [ $num_compl -ne 0 ]; then
+    echoerror "Complement against self should give 0"
+else
+    echook "Complement against self returned 0"
+fi
+
+
+# intersect against self should give all
+cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1 -a intersect -o -"
+md5_test=$(eval $cmd | grep -v '^#' | $md5)
+md5_org=$(zgrep -v '^#' $vcf_1 | $md5)
+if [ "$md5_test" != "$md5_org" ]; then
+    echoerror "Intersect against self should give results identical to input (cmd: $cmd)"
+    #echodebug "md5_test = $md5_test"
+    #echodebug "md5_org = $md5_org"
+else
+    echook "Intersect against self gave results identical to input"
+fi
+
+
+# intersect with all filtered should give 0
+cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1_allfiltered -a intersect --only-passed -o -"
+num_inter=$(eval $cmd | grep -vc '^#')
+if [ $num_inter -ne 0 ]; then
+    echoerror "Intersect (only-passed) with all filtered should give 0 (but gave $num_inter; cmd = $cmd)"
+else
+    echook "intersect (only-passed) with all filtered returned 0"
+fi
+
+# complement with all filtered should give all
+cmd="$LOFREQ vcfset -1 $vcf_1 -2 $vcf_1_allfiltered -a complement -o - --only-passed"
+md5_test=$(eval $cmd | grep -v '^#' | $md5)
+md5_org=$(zgrep -v '^#' $vcf_1 | grep 'PASS' | $md5)
+#echodebug "$cmd test=$md5_test org=$md5_org"
+if [ "$md5_test" != "$md5_org" ]; then
+    echoerror "only-passed complement with all filtered should give results identical to input (cmd = $cmd)"
+else
+    echook "only-passed complement with all filtered gave results identical to input"
+fi
+
+
+#
+vcf_org=data/vcf/vcf_set.vcf.gz
+vcf_baseswap=data/vcf/vcf_set_altrefswap.vcf.gz
+cmd="$LOFREQ vcfset -1 $vcf_org -2 $vcf_baseswap -a intersect -o -"
+num_out=$(eval $cmd | grep -cv '^#')
+if [ $num_out -ne 0 ]; then
+    echoerror "intersection with base swapped file did not return any variants"    
+else
+    echook "intersection with base swapped file return variants"    
+fi
+
+cmd="$cmd --only-pos"
+num_out=$(eval $cmd | grep -cv '^#')
+if [ $num_out -eq 0 ]; then
+    echoerror "intersection with base swapped file when using bases did not return zero variants"
+else
+    echook "intersection with base swapped file return zero variants"    
+fi
+
diff --git a/tests/viterbi.sh b/tests/viterbi.sh
new file mode 100755
index 0000000..70d14e3
--- /dev/null
+++ b/tests/viterbi.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+source lib.sh || exit 1
+
+BASEDIR=data/viterbi/
+REF=$BASEDIR/NC_011770.fa
+BAM=$BASEDIR/pseudomonas_pair_screwed_up_cigar.bam
+
+
+# input contains two reads with near random cigar strings
+# that are in fact perfect matches
+
+ncorr=$($LOFREQ viterbi -f $REF $BAM | samtools view - 2>/dev/null | grep -cw 75M) || exit 1
+if [ $ncorr != "2" ]; then
+    echoerror "Expected two fixed input reads but got $ncorr"
+    exit 1
+else
+    echook "All reads correctly realigned"
+fi

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/lofreq.git



More information about the debian-med-commit mailing list