[med-svn] [Git][med-team/delly][upstream] New upstream version 0.8.7

Andreas Tille gitlab at salsa.debian.org
Sun Jan 17 14:36:36 GMT 2021



Andreas Tille pushed to branch upstream at Debian Med / delly


Commits:
faeb27c1 by Andreas Tille at 2021-01-17T15:27:31+01:00
New upstream version 0.8.7
- - - - -


21 changed files:

- .github/workflows/docker.yml
- + R/cnv.R
- R/rd.R
- README.md
- src/assemble.h
- src/bolog.h
- + src/classify.h
- src/cnv.h
- src/coral.h
- src/delly.cpp
- src/delly.h
- src/genotype.h
- src/junction.h
- src/merge.h
- src/modvcf.h
- src/msa.h
- src/split.h
- src/tags.h
- src/tegua.h
- src/util.h
- src/version.h


Changes:

=====================================
.github/workflows/docker.yml
=====================================
@@ -2,19 +2,24 @@ name: Docker CI
 
 on:
   push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
+    branches: master
 
 jobs:
-  build:
-
+  main:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout at v2
-    - uses: docker/build-push-action at v1
-      with:
-        username: ${{ secrets.DOCKER_USERNAME }}
-        password: ${{ secrets.DOCKER_PASSWORD }}
-        repository: dellytools/delly
-        tags: latest
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action at v1
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action at v1
+      - name: Login to DockerHub
+        uses: docker/login-action at v1
+        with:
+          username: ${{ secrets.DOCKER_USERNAME }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+      - name: Build and push
+        id: docker_build
+        uses: docker/build-push-action at v2
+        with:
+          push: true
+          tags: dellytools/delly:latest


=====================================
R/cnv.R
=====================================
@@ -0,0 +1,32 @@
+library(ggplot2)
+library(reshape2)
+library(scales)
+
+args = commandArgs(trailingOnly=TRUE)
+x = read.table(args[1], header=F)
+
+colnames(x)[1] = c("cnv")
+x = melt(x, id.vars=c("cnv"))
+x$cn = round(x$value)
+x[x$cn > 9,]$cn = 9
+x$cn = factor(x$cn, levels=0:9)
+nsamples = length(unique(x$variable))
+nbins = sqrt(nsamples)
+if (nbins < 30) { nbins = 30; }
+
+# Plot CNVs
+for (CNV in unique(x$cnv)) {
+  print(CNV)
+  df = x[x$cnv == CNV,]
+  p = ggplot(data=df, aes(x=value))
+  for(i in 0:9) {
+    p = p + geom_histogram(data=subset(df, cn == i), aes(fill=cn), bins=nbins)
+  }
+  p = p + xlab("Copy-number")
+  p = p + ylab("Count")
+  p = p + scale_x_continuous(breaks=0:10, labels=comma)
+#  p = p + scale_fill_manual(values=c("#a6cee3","#1f78b4","#b2df8a","#33a02c","#fb9a99","#e31a1c","#fdbf6f","#ff7f00","#cab2d6","#6a3d9a"), drop=F)
+  p = p + scale_fill_manual(values=c("#ff7f00", "#1f78b4","#33a02c","#e31a1c","#6a3d9a", "#fdbf6f", "#a6cee3", "#b2df8a", "#fb9a99", "#cab2d6"), drop=F)
+  p = p + ggtitle(CNV)
+  ggsave(p, file=paste0(CNV, ".png"), width=24, height=6)
+}


=====================================
R/rd.R
=====================================
@@ -9,18 +9,28 @@ chrNamesShort = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","
 args = commandArgs(trailingOnly=TRUE)
 x = read.table(args[1], header=T)
 maxCN = 8
+seg = data.frame()
+if (length(args)>1) {
+   seg = read.table(args[2], header=F, sep="\t")
+   colnames(seg) = c("chr", "start", "end", "id", "cn")
+}
 
 # Fix chromosome ordering
 if (sum(x$chr %in% chrNamesLong) > sum(x$chr %in% chrNamesShort)) { chrs = chrNamesLong; } else { chrs = chrNamesShort; }
 x = x[x$chr %in% chrs,]
 x$chr = factor(x$chr, levels=chrs)
+if (nrow(seg) > 0) {
+ seg = seg[seg$chr %in% chrs,]
+ seg$chr = factor(seg$chr, levels=chrs)
+}
 
 # Whole genome
 p = ggplot(data=x, aes(x=start, y=x[,6]))
-p = p + geom_point(pch=21, size=0.5)
+p = p + geom_point(pch=21, color="black", fill="black", size=0.5)
 p = p + xlab("Chromosome")
 p = p + ylab("Copy-number")
 p = p + scale_x_continuous(labels=comma)
+if (nrow(seg)) { p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
 p = p + facet_grid(. ~ chr, scales="free_x", space="free_x")
 p = p + ylim(0, maxCN)
 p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
@@ -31,10 +41,12 @@ print(warnings())
 for(chrname in unique(x$chr)) {
  print(chrname)
  sub = x[x$chr == chrname,]
+ sl = seg[seg$chr == chrname,]
  p = ggplot(data=sub, aes(x=start, y=sub[,6]))
- p = p + geom_point(pch=21, size=0.5)
+ p = p + geom_point(pch=21, color="black", fill="black", size=0.5)
  p = p + ylab("Copy-number") + xlab(chrname)
  p = p + scale_x_continuous(labels=comma, breaks = scales::pretty_breaks(n=20))
+ if (nrow(sl)) { p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
  p = p + ylim(0, maxCN)
  p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
  ggsave(p, file=paste0("plot.", chrname, ".png"), width=24, height=6)


=====================================
README.md
=====================================
@@ -12,7 +12,7 @@
 [![GitHub license](https://img.shields.io/badge/License-BSD%203--Clause-blue.svg)](https://github.com/dellytools/delly/blob/master/LICENSE)
 [![GitHub Releases](https://img.shields.io/github/release/dellytools/delly.svg)](https://github.com/dellytools/delly/releases)
 
-Delly is an integrated structural variant (SV) prediction method that can discover, genotype and visualize deletions, tandem duplications, inversions and translocations at single-nucleotide resolution in short-read massively parallel sequencing data. It uses paired-ends, split-reads and read-depth to sensitively and accurately delineate genomic rearrangements throughout the genome. Structural variants can be visualized using [Delly-maze](https://github.com/dellytools/maze) and [Delly-suave](https://github.com/dellytools/suave).
+Delly is an integrated structural variant (SV) prediction method that can discover, genotype and visualize deletions, tandem duplications, inversions and translocations at single-nucleotide resolution in short-read massively parallel sequencing data. It uses paired-ends, split-reads and read-depth to sensitively and accurately delineate genomic rearrangements throughout the genome. Structural variants can be annotated using [Delly-sansa](https://github.com/dellytools/sansa) and visualized using [Delly-maze](https://github.com/dellytools/maze) or [Delly-suave](https://github.com/dellytools/suave).
 
 
 Installing Delly
@@ -54,16 +54,6 @@ Delly needs a sorted, indexed and duplicate marked bam file for every input samp
 `bcftools view delly.bcf > delly.vcf`
 
 
-Delly for long reads from PacBio or ONT (experimental)
-------------------------------------------------------
-
-Delly also has a long-read SV discovery mode.
-
-`delly lr -y ont -g hg19.fa -x hg19.excl input.bam`
-
-`delly lr -y pb -g hg19.fa -x hg19.excl input.bam`
-
-
 Somatic SV calling
 ------------------
 
@@ -111,6 +101,16 @@ Germline SV calling
 `delly filter -f germline -o germline.bcf merged.bcf`
 
 
+Delly for long reads from PacBio or ONT
+---------------------------------------
+
+Delly also has a long-read (lr) SV discovery mode.
+
+`delly lr -y ont -g hg19.fa -x hg19.excl input.bam`
+
+`delly lr -y pb -g hg19.fa -x hg19.excl input.bam`
+
+
 Read-depth profiles
 -------------------
 
@@ -118,21 +118,86 @@ You can generate read-depth profiles with delly. This requires a mappability map
 
 [Mappability Maps](https://gear.embl.de/data/delly/)
 
-The command to count reads in 10kbp windows and normalize the coverage is:
+The command to count reads in 10kbp mappable windows and normalize the coverage is:
 
-`delly rd -a -g hg19.fa -m hg19.map input.bam`
+`delly cnv -a -g hg19.fa -m hg19.map input.bam`
 
-The output file can be plotted using R to generate normalized copy-number profiles:
+The output file `out.cov.gz` can be plotted using [R](https://www.r-project.org/) to generate normalized copy-number profiles:
 
 `Rscript R/rd.R out.cov.gz`
 
-The GC bias can be visualized using the stats output.
 
-`delly rd -s stats.gz -g hg19.fa -m hg19.map input.bam`
+Copy-number segmentation
+------------------------
+
+Read-depth profiles can also be segmented at the same time.
+
+`delly cnv -a -u -g hg19.fa -m hg19.map input.bam`
+
+The segmentation is in VCF format but you can extract a BED-like file using bcftools.
+
+`bcftools query -f "%CHROM\t%POS\t%INFO/END\t%ID\t[%RDCN]\n" cnv.bcf > segmentation.bed`
+
+Plotting:
+
+`Rscript R/rd.R out.cov.gz segmentation.bed`
+
+Germline CNV calling
+--------------------
+
+Delly uses GC and mappability fragment correction to call CNVs. This requires a [mappability map](https://gear.embl.de/data/delly/).
+
+* Call CNVs for each sample and optionally refine breakpoints using delly SV calls
+
+`delly cnv -o c1.bcf -g hg19.fa -m hg19.map -l delly.sv.bcf input.bam`
+
+* Merge CNVs into a unified site list
+
+`delly merge -e -p -o sites.bcf -m 1000 -n 100000 c1.bcf c2.bcf ... cN.bcf`
+
+* Genotype CNVs for each sample
+
+`delly cnv -u -v sites.bcf -g hg19.fa -m hg19.map -o geno1.bcf input.bam`
+
+* Merge genotypes using [bcftools](https://github.com/samtools/bcftools)
+
+`bcftools merge -m id -O b -o merged.bcf geno1.bcf ... genoN.bcf`
+
+* Filter for germline CNVs
+
+`delly classify -f germline -o filtered.bcf merged.bcf`
+
+* Optional: Plot copy-number distribution for large number of samples (>>100)
+
+`bcftools query -f "%ID[\t%RDCN]\n" filtered.bcf > plot.tsv`
+
+`Rscript R/cnv.R plot.tsv`
+
+
+Somatic copy-number alterations (SCNAs)
+---------------------------------------
+
+* For somatic copy-number alterations, delly first segments the tumor genome (`-u` is required). Depending on the coverage, tumor purity and heterogeneity you can adapt parameters `-z`, `-t` and `-x` which control the sensitivity of SCNA detection.
+
+`delly cnv -u -z 10000 -o tumor.bcf -c tumor.cov.gz -g hg19.fa -m hg19.map tumor.bam`
+
+* Then these tumor SCNAs are genotyped in the control sample (`-u` is required).
+
+`delly cnv -u -v tumor.bcf -o control.bcf -g hg19.fa -m hg19.map control.bam`
+
+* The VCF IDs are matched between tumor and control. Thus, you can merge both files using [bcftools](https://github.com/samtools/bcftools).
+
+`bcftools merge -m id -O b -o tumor_control.bcf tumor.bcf control.bcf`
+
+* Somatic filtering requires a tab-delimited sample description file where the first column is the sample id (as in the VCF/BCF file) and the second column is either tumor or control.
+
+`delly classify -p -f somatic -o somatic.bcf -s samples.tsv tumor_control.bcf`
+
+* Optional: Plot the SCNAs using bcftools and R.
 
-`zgrep "^GC" stats.gz  > gc.table`
+`bcftools query -s tumor -f "%CHROM\t%POS\t%INFO/END\t%ID\t[%RDCN]\n" somatic.bcf > segmentation.bed`
 
-`Rscript R/gcbias.R gc.table`
+`Rscript R/rd.R tumor.cov.gz segmentation.bed`
 
 
 FAQ
@@ -141,10 +206,10 @@ FAQ
 This depends on the sharpness of the insert size distribution. For an insert size of 200-300bp with a 20-30bp standard deviation, Delly starts to call reliable SVs >=300bp. Delly also supports calling of small InDels using soft-clipped reads only, the smallest SV size called is 15bp.
 
 * Can Delly be used on a non-diploid genome?  
-Yes and no. The SV site discovery works for any ploidy. However, Delly's genotyping model assumes diploidy (hom. reference, het. and hom. alternative).
+Yes and no. The SV site discovery works for any ploidy. However, Delly's genotyping model assumes diploidy (hom. reference, het. and hom. alternative). The CNV calling allows to set the baseline ploidy on the command-line.
 
 * Delly is running too slowly what can I do?    
-You should exclude telomere and centromere regions and also all unplaced contigs. Delly ships with such an exclude list for human and mouse samples. In addition, you can filter input reads more stringently using -q 20 and -s 15.
+You should exclude telomere and centromere regions and also all unplaced contigs. Delly ships with such an exclude list for human and mouse samples. In addition, you can filter input reads more stringently using -q 20 and -s 15. Lastly, `-z` can be set to 5 for high-coverage data.
 
 * Are non-unique alignments, multi-mappings and/or multiple split-read alignments allowed?  
 Delly expects two alignment records in the bam file for every paired-end, one for the first and one for the second read. Multiple split-read alignment records of a given read are allowed if and only if one of them is a primary alignment whereas all others are marked as secondary or supplementary (flag 0x0100 or flag 0x0800). This is the default for bwa mem.
@@ -158,7 +223,7 @@ There is a delly discussion group [delly-users](http://groups.google.com/d/forum
 * Docker/Singularity support?            
 There is a dockerized delly available [here](https://hub.docker.com/r/dellytools/delly/) and singularity containers (*.sif files) are part of the [delly release](https://github.com/dellytools/delly/releases).
 
-* Bioconda support?             
+* Bioconda support?              
 Delly is available via [bioconda](http://bioconda.github.io/recipes/delly/README.html).
 
 


=====================================
src/assemble.h
=====================================
@@ -17,6 +17,7 @@ namespace torali
     int32_t inslen;
     int32_t qual;  // Only required for junction count map
 
+    SeqSlice() : svid(-1), sstart(-1), inslen(-1), qual(-1) {}
     SeqSlice(int32_t const sv, int32_t const sst, int32_t const il, int32_t q) : svid(sv), sstart(sst), inslen(il), qual(q) {}
   };
 
@@ -31,7 +32,6 @@ namespace torali
 
     // SV consensus done
     std::vector<bool> svcons(svs.size(), false);
-    uint32_t maxReadPerSV = 20;
 
     // Open file handles
     typedef std::vector<samFile*> TSamFile;
@@ -76,7 +76,7 @@ namespace torali
 	      //std::cerr << svs[svid].svStart << ',' << svs[svid].svEnd << ',' << svs[svid].svt << ',' << svid << " SV" << std::endl;
 	      //std::cerr << seed << '\t' << srStore[seed][ri].svid << '\t' << srStore[seed][ri].sstart << '\t' << srStore[seed][ri].inslen << '\t' << sv[srStore[seed][ri].svid].srSupport << '\t' << sv[srStore[seed][ri].svid].svt << std::endl;
 
-	      if (!svcons[svid]) {
+	      if ((!svcons[svid]) && (seqStore[svid].size() < c.maxReadPerSV)) {
 		// Get sequence
 		std::string sequence;
 		sequence.resize(rec->core.l_qseq);
@@ -95,18 +95,23 @@ namespace torali
 		if (sPos < 0) sPos = 0;
 		if (ePos > (int32_t) readlen) ePos = readlen;
 		// Min. seq length and max insertion size, 10kbp?
-		if (((ePos - sPos) > window) && ((ePos - sPos) <= 10000)) {
+		if (((ePos - sPos) > window) && ((ePos - sPos) <= (10000 + window))) {
 		  std::string seqalign = sequence.substr(sPos, (ePos - sPos));
+		  if ((svs[svid].svt == 5) || (svs[svid].svt == 6)) {
+		    if (svs[svid].chr == refIndex) reverseComplement(seqalign);
+		  }
 		  seqStore[svid].insert(seqalign);
-	      
+
 		  // Enough split-reads?
 		  if ((!_translocation(svs[svid].svt)) && (svs[svid].chr == refIndex)) {
-		    if ((seqStore[svid].size() == maxReadPerSV) || ((int32_t) seqStore[svid].size() == svs[svid].srSupport)) {
+		    if ((seqStore[svid].size() == c.maxReadPerSV) || ((int32_t) seqStore[svid].size() == svs[svid].srSupport)) {
 		      bool msaSuccess = false;
 		      if (seqStore[svid].size() > 1) {
 			//std::cerr << svs[svid].svStart << ',' << svs[svid].svEnd << ',' << svs[svid].svt << ',' << svid << " SV" << std::endl;
 			//for(typename TSequences::iterator it = seqStore[svid].begin(); it != seqStore[svid].end(); ++it) std::cerr << *it << std::endl;
 			msa(c, seqStore[svid], svs[svid].consensus);
+			//outputConsensus(hdr, svs[svid], svs[svid].consensus);
+			if ((svs[svid].svt == 1) || (svs[svid].svt == 5)) reverseComplement(svs[svid].consensus);
 			//std::cerr << svs[svid].consensus << std::endl;
 			if (alignConsensus(c, hdr, seq, NULL, svs[svid])) msaSuccess = true;
 			//std::cerr << msaSuccess << std::endl;
@@ -128,14 +133,30 @@ namespace torali
 	bam_destroy1(rec);
 	hts_itr_destroy(iter);
       }
-      // Handle left-overs
-      for(uint32_t svid = 0; svid < svcons.size(); ++svid) {
-	if (!svcons[svid]) {
-	  if ((!_translocation(svs[svid].svt)) && (svs[svid].chr == refIndex)) {
+      // Handle left-overs and translocations
+      for(int32_t refIndex2 = 0; refIndex2 <= refIndex; ++refIndex2) {
+	char* sndSeq = NULL;
+	for(uint32_t svid = 0; svid < svcons.size(); ++svid) {
+	  if (!svcons[svid]) {
+	    if ((svs[svid].chr != refIndex) || (svs[svid].chr2 != refIndex2)) continue;
 	    bool msaSuccess = false;
 	    if (seqStore[svid].size() > 1) {
+	      // Lazy loading of references
+	      if (refIndex != refIndex2) {
+		if (sndSeq == NULL) {
+		  int32_t seqlen = -1;
+		  std::string tname(hdr->target_name[refIndex2]);
+		  sndSeq = faidx_fetch_seq(fai, tname.c_str(), 0, hdr->target_len[refIndex2], &seqlen);
+		}
+	      }
+	      //std::cerr << svs[svid].svStart << ',' << svs[svid].svEnd << ',' << svs[svid].svt << ',' << svid << " SV" << std::endl;
+	      //for(typename TSequences::iterator it = seqStore[svid].begin(); it != seqStore[svid].end(); ++it) std::cerr << *it << std::endl;
 	      msa(c, seqStore[svid], svs[svid].consensus);
-	      if (alignConsensus(c, hdr, seq, NULL, svs[svid])) msaSuccess = true;
+	      //outputConsensus(hdr, svs[svid], svs[svid].consensus);
+	      if ((svs[svid].svt == 1) || (svs[svid].svt == 5)) reverseComplement(svs[svid].consensus);
+	      //std::cerr << "Consensus: " << svs[svid].consensus << std::endl;
+	      if (alignConsensus(c, hdr, seq, sndSeq, svs[svid])) msaSuccess = true;
+	      //std::cerr << msaSuccess << std::endl;
 	    }
 	    if (!msaSuccess) {
 	      svs[svid].consensus = "";
@@ -146,6 +167,7 @@ namespace torali
 	    svcons[svid] = true;
 	  }
 	}
+	if (sndSeq != NULL) free(sndSeq);
       }
       
       // Clean-up


=====================================
src/bolog.h
=====================================
@@ -2,6 +2,7 @@
 #define BOLOG_H
 
 #include <boost/math/special_functions/round.hpp>
+#include <boost/math/distributions/normal.hpp>
 
 namespace torali {
 
@@ -82,7 +83,60 @@ struct BoLog {
    gls[file_c * 3 + 1] = (float) gl[1];
    gls[file_c * 3] = (float) gl[2];
  }
- 
+
+
+  template<typename TConfig>
+  inline int32_t
+  _computeCNLs(TConfig const& c, double const mean, double const sd, float* gl, int32_t* gqval, int32_t const file_c) {
+    // Compute copy-number likelihoods
+    boost::math::normal s(mean, sd);
+    for(uint32_t geno=0; geno< MAX_CN; ++geno) {
+      double prob = boost::math::pdf(s, geno);
+      gl[file_c * MAX_CN + geno] = std::log10(prob);
+      gl[file_c * MAX_CN + geno] = (gl[file_c * MAX_CN + geno] > SMALLEST_GL) ? gl[file_c * MAX_CN + geno] : SMALLEST_GL;
+    }
+    uint32_t glBest=file_c * MAX_CN + 0;
+    uint32_t glBest2nd=file_c * MAX_CN + 1;
+    if (gl[glBest] < gl[glBest2nd]) {
+      glBest = file_c * MAX_CN + 1;
+      glBest2nd = file_c * MAX_CN + 0;
+    }
+    for(uint32_t geno=2; geno < MAX_CN; ++geno) {
+      if (gl[file_c * MAX_CN + geno] > gl[glBest2nd]) {
+	if (gl[file_c * MAX_CN + geno] > gl[glBest]) {
+	  glBest2nd = glBest;
+	  glBest = file_c * MAX_CN + geno;
+	} else {
+	  glBest2nd = file_c * MAX_CN + geno;
+	}
+      }
+    }
+    
+    // Variant quality
+    double glObs = std::log10(boost::math::pdf(s, mean));
+    glObs = (glObs > SMALLEST_GL) ? glObs : SMALLEST_GL;
+    uint32_t plVariant = (uint32_t) boost::math::round(-10 * glObs);
+    uint32_t plPloidy = (uint32_t) boost::math::round(-10 * gl[file_c * MAX_CN + c.ploidy]);
+    int32_t varqual = plPloidy - plVariant;
+    
+    // GQ
+    uint32_t plBest = (uint32_t) boost::math::round(-10 * gl[glBest]);
+    uint32_t plBest2nd = (uint32_t) boost::math::round(-10 * gl[glBest2nd]);
+    gqval[file_c] = plBest2nd - plBest;
+    
+    // Rescale by best genotype
+    double glBestVal = gl[glBest];
+    for(uint32_t geno=0; geno< MAX_CN; ++geno) gl[file_c * MAX_CN + geno] -= glBestVal;
+    
+    // Variant quality
+    return varqual;
+  }
+
+  template<typename TConfig>
+  inline int32_t
+  _computeCNLs(TConfig const& c, double const mean, double const sd, float* gl, int32_t* gqval) {
+    return _computeCNLs(c, mean, sd, gl, gqval, 0);
+  }
 
 }
 


=====================================
src/classify.h
=====================================
@@ -0,0 +1,477 @@
+#ifndef CLASSIFY_H
+#define CLASSIFY_H
+
+#include <iostream>
+#include <fstream>
+#include <boost/unordered_map.hpp>
+#include <boost/graph/adjacency_list.hpp>
+#include <boost/graph/connected_components.hpp>
+#include <boost/program_options/cmdline.hpp>
+#include <boost/program_options/options_description.hpp>
+#include <boost/program_options/parsers.hpp>
+#include <boost/program_options/variables_map.hpp>
+#include <boost/date_time/posix_time/posix_time.hpp>
+#include <boost/date_time/gregorian/gregorian.hpp>
+#include <boost/iostreams/stream.hpp>
+#include <boost/iostreams/stream_buffer.hpp>
+#include <boost/iostreams/device/file.hpp>
+#include <boost/iostreams/filtering_stream.hpp>
+#include <boost/iostreams/filter/zlib.hpp>
+#include <boost/iostreams/filter/gzip.hpp>
+#include <boost/icl/interval_map.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/tokenizer.hpp>
+#include <boost/progress.hpp>
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics.hpp>
+
+#include <htslib/sam.h>
+#include <htslib/vcf.h>
+#include <htslib/tbx.h>
+
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <zlib.h>
+#include <stdio.h>
+
+#include "tags.h"
+#include "version.h"
+#include "util.h"
+#include "modvcf.h"
+
+namespace torali
+{
+
+
+struct ClassifyConfig {
+  bool filterForPass;
+  bool hasSampleFile;
+  int32_t minsize;
+  int32_t maxsize;
+  int32_t qual;
+  uint16_t ploidy;
+  float pgerm;
+  float maxsd;
+  float cn_offset;
+  std::string filter;
+  std::set<std::string> tumorSet;
+  std::set<std::string> controlSet;
+  boost::filesystem::path outfile;
+  boost::filesystem::path samplefile;
+  boost::filesystem::path vcffile;
+};
+
+
+template<typename TClassifyConfig>
+inline int
+classifyRun(TClassifyConfig const& c) {
+
+  // Load bcf file
+  htsFile* ifile = hts_open(c.vcffile.string().c_str(), "r");
+  bcf_hdr_t* hdr = bcf_hdr_read(ifile);
+
+  // Open output VCF file
+  htsFile *ofile = hts_open(c.outfile.string().c_str(), "wb");
+  bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr);
+  if (c.filter == "somatic") {
+    bcf_hdr_remove(hdr_out, BCF_HL_INFO, "SOMATIC");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic copy-number variant.\">");
+    bcf_hdr_remove(hdr_out, BCF_HL_INFO, "PGERM");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=PGERM,Number=1,Type=Float,Description=\"Probability of being germline.\">");
+    bcf_hdr_remove(hdr_out, BCF_HL_INFO, "CNDIFF");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=CNDIFF,Number=1,Type=Float,Description=\"Absolute tumor-normal CN difference.\">");
+  } else {
+    bcf_hdr_remove(hdr_out, BCF_HL_INFO, "CNSHIFT");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=CNSHIFT,Number=1,Type=Float,Description=\"Estimated CN shift.\">");
+    bcf_hdr_remove(hdr_out, BCF_HL_INFO, "CNSD");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=CNSD,Number=1,Type=Float,Description=\"Estimated CN standard deviation.\">");
+  }
+  if (bcf_hdr_write(ofile, hdr_out) != 0) std::cerr << "Error: Failed to write BCF header!" << std::endl;
+
+  // VCF fields
+  int32_t nsvend = 0;
+  int32_t* svend = NULL;
+  int32_t nsvt = 0;
+  char* svt = NULL;
+  int ngqval = 0;
+  int32_t* gqval = NULL;
+  int ncnval = 0;
+  int32_t* cnval = NULL;
+  int ncnl = 0;
+  float* cnl = NULL;
+  int nrdcn = 0;
+  float* rdcn = NULL;
+  int nrdsd = 0;
+  float* rdsd = NULL;
+  bool germline = false;
+  if (c.filter == "germline") germline = true;
+
+  // Parse BCF
+  boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+  std::cout << '[' << boost::posix_time::to_simple_string(now) << "] " << "Filtering VCF/BCF file" << std::endl;
+  bcf1_t* rec = bcf_init1();
+  while (bcf_read(ifile, hdr, rec) == 0) {
+    bcf_unpack(rec, BCF_UN_INFO);
+
+    // Check SV type
+    bcf_get_info_string(hdr, rec, "SVTYPE", &svt, &nsvt);
+    if (std::string(svt) != "CNV") continue;
+
+    // Check PASS
+    bool pass = true;
+    if (c.filterForPass) pass = (bcf_has_filter(hdr, rec, const_cast<char*>("PASS"))==1);
+    if (!pass) continue;
+
+    // Check size
+    int32_t svStart= rec->pos - 1;
+    bcf_get_info_int32(hdr, rec, "END", &svend, &nsvend);
+    int32_t svEnd = *svend;
+    if (svStart > svEnd) continue;
+    int32_t svlen = svEnd - svStart;
+    if ((svlen < c.minsize) || (svlen > c.maxsize)) continue;
+
+    // Check copy-number
+    bcf_unpack(rec, BCF_UN_ALL);
+    bcf_get_format_int32(hdr, rec, "GQ", &gqval, &ngqval);
+    bcf_get_format_int32(hdr, rec, "CN", &cnval, &ncnval);
+    bcf_get_format_float(hdr, rec, "CNL", &cnl, &ncnl);
+    bcf_get_format_float(hdr, rec, "RDCN", &rdcn, &nrdcn);
+    bcf_get_format_float(hdr, rec, "RDSD", &rdsd, &nrdsd);
+
+    typedef std::pair<float, float> TCnSd;
+    typedef std::vector<TCnSd> TSampleDist;
+    TSampleDist control;
+    TSampleDist tumor;
+    for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) {
+      if ((germline) || (c.controlSet.find(hdr->samples[i]) != c.controlSet.end())) {
+	// Control or population genomics
+	control.push_back(std::make_pair(rdcn[i], rdsd[i]));
+      } else if ((!germline) && (c.tumorSet.find(hdr->samples[i]) != c.tumorSet.end())) {
+	// Tumor
+	tumor.push_back(std::make_pair(rdcn[i], rdsd[i]));
+      }
+    }
+
+    // Classify
+    if (!germline) {
+      // Somatic mode
+      double bestCnOffset = 0;
+      bool somaticcnv = false;
+      double lowestp = 1;
+      for(uint32_t i = 0; i < tumor.size(); ++i) {
+	bool germcnv = false;
+	double highestprob = 0;
+	double tcnoffset = -1;
+	for(uint32_t k = 0; k < control.size(); ++k) {
+	  boost::math::normal s1(control[k].first, control[k].second);
+	  double prob1 = boost::math::pdf(s1, tumor[i].first);
+	  boost::math::normal s2(tumor[i].first, tumor[i].second);
+	  double prob2 = boost::math::pdf(s2, control[k].first);
+	  double prob = std::max(prob1, prob2);
+	  if (prob > c.pgerm) germcnv = true;
+	  else {
+	    // Among all controls, take highest p-value (most likely germline CNV)
+	    if (prob > highestprob) highestprob = prob;
+	  }
+	  double cndiff = std::abs(tumor[i].first - control[k].first);
+	  if (cndiff < c.cn_offset) germcnv = true;
+	  else {
+	    // Among all controls, take smallest CN difference
+	    if ((tcnoffset == -1) || (cndiff < tcnoffset)) tcnoffset = cndiff;
+	  }
+	}
+	// Among all tumors take best CN difference and lowest p-value
+	if (!germcnv) {
+	  somaticcnv = true;
+	  if ((highestprob < lowestp) && (tcnoffset > bestCnOffset)) {
+	    lowestp = highestprob;
+	    bestCnOffset = tcnoffset;
+	  }
+	}
+      }
+      if (!somaticcnv) continue;
+      _remove_info_tag(hdr_out, rec, "SOMATIC");
+      bcf_update_info_flag(hdr_out, rec, "SOMATIC", NULL, 1);
+      float pgerm = (float) lowestp;
+      _remove_info_tag(hdr_out, rec, "PGERM");
+      bcf_update_info_float(hdr_out, rec, "PGERM", &pgerm, 1);
+      float cndiv = (float) bestCnOffset;
+      _remove_info_tag(hdr_out, rec, "CNDIFF");
+      bcf_update_info_float(hdr_out, rec, "CNDIFF", &cndiv, 1);
+    } else {
+      // Correct CN shift
+      int32_t cnmain = 0;
+      {
+	std::vector<int32_t> cncount(MAX_CN, 0);
+	{
+	  bool validsite = true;
+	  boost::accumulators::accumulator_set<double, boost::accumulators::features<boost::accumulators::tag::mean, boost::accumulators::tag::variance> > acc;
+	  for(uint32_t k = 0; k < control.size(); ++k) {
+	    if ((boost::math::isinf(control[k].first)) || (boost::math::isnan(control[k].first))) validsite = false;
+	    else acc(boost::math::round(control[k].first) - control[k].first);
+	  }
+	  if (!validsite) continue;
+	  double cnshift = boost::accumulators::mean(acc);
+	  float cnshiftval = cnshift;
+	  _remove_info_tag(hdr_out, rec, "CNSHIFT");
+	  bcf_update_info_float(hdr_out, rec, "CNSHIFT", &cnshiftval, 1);
+	  for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) {
+	    rdcn[i] += cnshift;
+	    cnval[i] = boost::math::round(rdcn[i]);
+	    if (cnval[i] < MAX_CN) ++cncount[cnval[i]];
+	  }
+	}
+	
+	// Find max CN
+	for(uint32_t i = 1; i < MAX_CN; ++i) {
+	  if (cncount[i] > cncount[cnmain]) cnmain = i;
+	}
+      }
+
+      // Calculate SD
+      boost::accumulators::accumulator_set<double, boost::accumulators::features<boost::accumulators::tag::mean, boost::accumulators::tag::variance> > accLocal;
+      for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) {
+	if (cnval[i] == cnmain) accLocal(rdcn[i]);
+      }
+      double sd = sqrt(boost::accumulators::variance(accLocal));
+      if (sd < 0.025) sd = 0.025;
+      float cnsdval = sd;
+      _remove_info_tag(hdr_out, rec, "CNSD");
+      bcf_update_info_float(hdr_out, rec, "CNSD", &cnsdval, 1);
+      if (cnsdval > c.maxsd) continue;
+      
+      // Re-compute CNLs
+      std::vector<std::string> ftarr(bcf_hdr_nsamples(hdr));
+      int32_t altqual = 0;
+      for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) {
+	int32_t qval = _computeCNLs(c, rdcn[i], sd, cnl, gqval, i);
+	if (cnval[i] != c.ploidy) altqual += qval;
+	if (gqval[i] < 15) ftarr[i] = "LowQual";
+	else ftarr[i] = "PASS";
+      }
+      if (altqual < c.qual) continue;
+      if (altqual > 10000) altqual = 10000;
+      
+      // Update QUAL and FILTER
+      rec->qual = altqual;
+      int32_t tmpi = bcf_hdr_id2int(hdr_out, BCF_DT_ID, "PASS");
+      if (rec->qual < 15) tmpi = bcf_hdr_id2int(hdr_out, BCF_DT_ID, "LowQual");
+      bcf_update_filter(hdr_out, rec, &tmpi, 1);
+
+      // Update GT fields
+      std::vector<const char*> strp(bcf_hdr_nsamples(hdr));
+      std::transform(ftarr.begin(), ftarr.end(), strp.begin(), cstyle_str());
+      bcf_update_format_int32(hdr_out, rec, "CN", cnval, bcf_hdr_nsamples(hdr));
+      bcf_update_format_float(hdr_out, rec, "CNL",  cnl, bcf_hdr_nsamples(hdr) * MAX_CN);
+      bcf_update_format_int32(hdr_out, rec, "GQ", gqval, bcf_hdr_nsamples(hdr));
+      bcf_update_format_string(hdr_out, rec, "FT", &strp[0], bcf_hdr_nsamples(hdr));
+      bcf_update_format_float(hdr_out, rec, "RDCN",  rdcn, bcf_hdr_nsamples(hdr));
+    }
+    bcf_write1(ofile, hdr_out, rec);
+  }
+  bcf_destroy(rec);
+
+  // Clean-up
+  if (svend != NULL) free(svend);
+  if (svt != NULL) free(svt);
+  if (gqval != NULL) free(gqval);
+  if (cnval != NULL) free(cnval);
+  if (cnl != NULL) free(cnl);
+  if (rdcn != NULL) free(rdcn);
+  if (rdsd != NULL) free(rdsd);
+
+  // Close output VCF
+  bcf_hdr_destroy(hdr_out);
+  hts_close(ofile);
+
+  // Build index
+  bcf_index_build(c.outfile.string().c_str(), 14);
+
+  // Close VCF
+  bcf_hdr_destroy(hdr);
+  bcf_close(ifile);
+
+  // End
+  now = boost::posix_time::second_clock::local_time();
+  std::cout << '[' << boost::posix_time::to_simple_string(now) << "] Done." << std::endl;
+
+  return 0;
+}
+
+
+int classify(int argc, char **argv) {
+  ClassifyConfig c;
+
+  // Define generic options
+  boost::program_options::options_description generic("Generic options");
+  generic.add_options()
+    ("help,?", "show help message")
+    ("filter,f", boost::program_options::value<std::string>(&c.filter)->default_value("somatic"), "Filter mode (somatic, germline)")
+    ("outfile,o", boost::program_options::value<boost::filesystem::path>(&c.outfile)->default_value("cnv.bcf"), "Filtered CNV BCF output file")
+    ("minsize,m", boost::program_options::value<int32_t>(&c.minsize)->default_value(1000), "min. CNV size")
+    ("maxsize,n", boost::program_options::value<int32_t>(&c.maxsize)->default_value(500000000), "max. CNV size")
+    ("pass,p", "Filter sites for PASS")
+    ;
+
+  // Define somatic options
+  boost::program_options::options_description somatic("Somatic options");
+  somatic.add_options()
+    ("samples,s", boost::program_options::value<boost::filesystem::path>(&c.samplefile), "Two-column sample file listing sample name and tumor or control")
+    ("pgerm,e", boost::program_options::value<float>(&c.pgerm)->default_value(0.001), "probability germline")
+    ("cn-offset,t", boost::program_options::value<float>(&c.cn_offset)->default_value(0.2), "min. CN offset")
+    ;
+
+  // Define germline options
+  boost::program_options::options_description germline("Germline options");
+  germline.add_options()
+    ("ploidy,y", boost::program_options::value<uint16_t>(&c.ploidy)->default_value(2), "baseline ploidy")
+    ("qual,q", boost::program_options::value<int32_t>(&c.qual)->default_value(50), "min. site quality")
+    ("maxsd,x", boost::program_options::value<float>(&c.maxsd)->default_value(0.15), "max. population SD")
+    ;
+
+  // Define hidden options
+  boost::program_options::options_description hidden("Hidden options");
+  hidden.add_options()
+    ("input-file", boost::program_options::value<boost::filesystem::path>(&c.vcffile), "input file")
+    ;
+  boost::program_options::positional_options_description pos_args;
+  pos_args.add("input-file", -1);
+
+  // Set the visibility
+  boost::program_options::options_description cmdline_options;
+  cmdline_options.add(generic).add(somatic).add(germline).add(hidden);
+  boost::program_options::options_description visible_options;
+  visible_options.add(generic).add(somatic).add(germline);
+  boost::program_options::variables_map vm;
+  boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(pos_args).run(), vm);
+  boost::program_options::notify(vm);
+
+
+  // Check command line arguments
+  if ((vm.count("help")) || (!vm.count("input-file"))) {
+    std::cout << std::endl;
+    std::cout << "Usage: delly " << argv[0] << " [OPTIONS] <input.bcf>" << std::endl;
+    std::cout << visible_options << "\n";
+    return 0;
+  }
+
+  // Filter for PASS
+  if (vm.count("pass")) c.filterForPass = true;
+  else c.filterForPass = false;
+
+  // Check sample file
+  std::set<std::string> tSet;
+  std::set<std::string> cSet;
+  if (c.filter == "somatic") {
+    c.hasSampleFile = true;
+    if (!(boost::filesystem::exists(c.samplefile) && boost::filesystem::is_regular_file(c.samplefile) && boost::filesystem::file_size(c.samplefile))) {
+      std::cerr << "Sample file is missing " << c.samplefile.string() << std::endl;
+      return 1;
+    } else {
+      // Get samples
+      std::ifstream sampleFile(c.samplefile.string().c_str(), std::ifstream::in);
+      if (sampleFile.is_open()) {
+	while (sampleFile.good()) {
+	  std::string sampleFromFile;
+	  getline(sampleFile, sampleFromFile);
+	  typedef boost::tokenizer< boost::char_separator<char> > Tokenizer;
+	  boost::char_separator<char> sep(",\t ");
+	  Tokenizer tokens(sampleFromFile, sep);
+	  Tokenizer::iterator tokIter = tokens.begin();
+	  if (tokIter != tokens.end()) {
+	    std::string sample = *tokIter++;
+	    if (tokIter != tokens.end()) {
+	      std::string type = *tokIter;
+	      if (type == "control") cSet.insert(sample);
+	      else if (type == "tumor") tSet.insert(sample);
+	      else {
+		std::cerr << "Sample type for " << sample << " is neither tumor nor control" << std::endl;
+		return 1;
+	      }
+	    }
+	  }
+	}
+	sampleFile.close();
+      }
+      if (tSet.empty()) {
+	std::cerr << "No tumor samples specified." << std::endl;
+	return 1;
+      }
+      if (cSet.empty()) {
+	std::cerr << "No control samples specified." << std::endl;
+	return 1;
+      }
+      std::vector<std::string> intersection;
+      std::set_intersection(cSet.begin(), cSet.end(), tSet.begin(), tSet.end(), std::back_inserter(intersection));
+      if (!intersection.empty()) {
+	std::cerr << "Sample " << intersection[0] << " is both a tumor and control sample." << std::endl;
+	return 1;
+      }
+    }
+  } else c.hasSampleFile = false;
+
+  // Check input VCF file
+  if (vm.count("input-file")) {
+    if (!(boost::filesystem::exists(c.vcffile) && boost::filesystem::is_regular_file(c.vcffile) && boost::filesystem::file_size(c.vcffile))) {
+      std::cerr << "Input VCF/BCF file is missing: " << c.vcffile.string() << std::endl;
+      return 1;
+    }
+    htsFile* ifile = bcf_open(c.vcffile.string().c_str(), "r");
+    if (ifile == NULL) {
+      std::cerr << "Fail to open file " << c.vcffile.string() << std::endl;
+      return 1;
+    }
+    hts_idx_t* bcfidx = NULL;
+    tbx_t* tbx = NULL;
+    if (hts_get_format(ifile)->format==vcf) tbx = tbx_index_load(c.vcffile.string().c_str());
+    else bcfidx = bcf_index_load(c.vcffile.string().c_str());
+    if ((bcfidx == NULL) && (tbx == NULL)) {
+      std::cerr << "Fail to open index file for " << c.vcffile.string() << std::endl;
+      return 1;
+    }
+    bcf_hdr_t* hdr = bcf_hdr_read(ifile);
+    if (hdr == NULL) {
+      std::cerr << "Fail to header for " << c.vcffile.string() << std::endl;
+      return 1;
+    }
+    if (!(bcf_hdr_nsamples(hdr)>0)) {
+      std::cerr << "BCF/VCF file has no sample genotypes!" << std::endl;
+      return 1;
+    }
+    // Check sample names
+    if (c.filter == "somatic") {
+      for (int i = 0; i < bcf_hdr_nsamples(hdr); ++i) {
+	if (tSet.find(hdr->samples[i]) != tSet.end()) c.tumorSet.insert(hdr->samples[i]);
+	else if (cSet.find(hdr->samples[i]) != cSet.end()) c.controlSet.insert(hdr->samples[i]);
+	else std::cerr << "Warning: Sample " << hdr->samples[i] << " is missing in sample file." << std::endl;
+      }
+      if (c.tumorSet.empty()) {
+	std::cerr << "No tumor samples specified." << std::endl;
+	return 1;
+      }
+      if (c.controlSet.empty()) {
+	std::cerr << "No control samples specified." << std::endl;
+	return 1;
+      }
+    }
+    bcf_hdr_destroy(hdr);
+    if (bcfidx) hts_idx_destroy(bcfidx);
+    if (tbx) tbx_destroy(tbx);
+    bcf_close(ifile);
+  }
+
+  // Show cmd
+  boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+  std::cout << '[' << boost::posix_time::to_simple_string(now) << "] ";
+  std::cout << "delly ";
+  for(int i=0; i<argc; ++i) { std::cout << argv[i] << ' '; }
+  std::cout << std::endl;
+
+  return classifyRun(c);
+}
+
+}
+
+#endif


=====================================
src/cnv.h
=====================================
@@ -16,96 +16,714 @@
 #include <boost/iostreams/filter/gzip.hpp>
 #include <boost/iostreams/device/file.hpp>
 #include <boost/iostreams/filter/zlib.hpp>
+#include <boost/accumulators/accumulators.hpp>
+#include <boost/accumulators/statistics.hpp>
 
 #include <htslib/faidx.h>
 #include <htslib/vcf.h>
 #include <htslib/sam.h>
+#include "util.h"
 
 
 namespace torali
 {
 
-  struct CNV {
-    int32_t chr;
+  struct SVBreakpoint {
+    int32_t pos;
+    int32_t cilow;
+    int32_t cihigh;
+    int32_t qual;
+
+    explicit SVBreakpoint(int32_t const p) : pos(p), cilow(0), cihigh(0), qual(0) {}
+    SVBreakpoint(int32_t const p, int32_t const cil, int32_t const cih, int32_t q) : pos(p), cilow(cil), cihigh(cih), qual(q) {}
+  };
+
+
+  template<typename TSVBp>
+  struct SortSVBreakpoint : public std::binary_function<TSVBp, TSVBp, bool>
+  {
+    inline bool operator()(TSVBp const& sv1, TSVBp const& sv2) {
+      return ((sv1.pos<sv2.pos) || ((sv1.pos==sv2.pos) && (sv1.qual<sv2.qual)));
+    }
+  };
+
+  
+  struct BpCNV {
     int32_t start;
     int32_t end;
-    int32_t ciposlow;
-    int32_t ciposhigh;
-    int32_t ciendlow;
-    int32_t ciendhigh;
-    uint32_t srleft;
-    uint32_t srright;
-    uint32_t nsnps;
-    float cn;
-    float rdsupport;
-    float penalty;
-    float mappable;
-    double maf;
-    
-  CNV(int32_t const c, int32_t const s, int32_t const e, int32_t const cil, int32_t const cih, int32_t const cel, int32_t ceh, float const estcn, float const sp, float const pty, float const mp) : chr(c), start(s), end(e), ciposlow(cil), ciposhigh(cih), ciendlow(cel), ciendhigh(ceh), srleft(0), srright(0), nsnps(0), cn(estcn), rdsupport(sp), penalty(pty), mappable(mp), maf(0) {}
+    double zscore;
+
+    BpCNV(int32_t const s, int32_t const e, double const z) : start(s), end(e), zscore(z) {}
   };
 
-    template<typename TCNV>
-    struct SortCNVs : public std::binary_function<TCNV, TCNV, bool>
-      {
-	inline bool operator()(TCNV const& sv1, TCNV const& sv2) {
-	  return ((sv1.chr<sv2.chr) || ((sv1.chr==sv2.chr) && (sv1.start<sv2.start)) || ((sv1.chr==sv2.chr) && (sv1.start==sv2.start) && (sv1.end<sv2.end)) || ((sv1.chr==sv2.chr) && (sv1.start==sv2.start) && (sv1.end==sv2.end) && (sv1.penalty > sv2.penalty)));
+
+  template<typename TConfig>
+  inline void
+  mergeCNVs(TConfig const& c, std::vector<CNV>& chrcnv, std::vector<CNV>& cnvs) {
+    // Merge neighboring segments if too similar
+    bool merged = true;
+    std::vector<CNV> newcnv;
+    while(merged) {
+      int32_t k = -1;
+      for(int32_t i = 0; i < (int32_t) chrcnv.size(); ++i) {
+	if (i <= k) continue;
+	k = i;
+	for(int32_t j = i + 1; j < (int32_t) chrcnv.size(); ++j) {
+	  bool allValid = true;
+	  for(int32_t pre = i; pre < j; ++pre) {
+	    double diff = std::abs(chrcnv[pre].cn - chrcnv[j].cn);
+	    if (diff >= c.cn_offset) {
+	      allValid = false;
+	      break;
+	    }
+	  }
+	  if (allValid) k = j;
+	  else break;
 	}
-      };
-      
+	if (k > i) {
+	  // Merge
+	  double cn = (chrcnv[i].cn + chrcnv[k].cn) / 2.0;
+	  double mp = (chrcnv[i].mappable + chrcnv[k].mappable) / 2.0;
+	  newcnv.push_back(CNV(chrcnv[i].chr, chrcnv[i].start, chrcnv[k].end, chrcnv[i].ciposlow, chrcnv[i].ciposhigh, chrcnv[k].ciendlow, chrcnv[k].ciendhigh, cn, mp));	  
+	} else {
+	  newcnv.push_back(chrcnv[i]);
+	}
+      }
+      if (newcnv.size() == chrcnv.size()) merged = false;
+      else {
+	chrcnv = newcnv;
+	newcnv.clear();
+      }
+    }
+
+    // Insert into global CNV vector
+    for(uint32_t i = 0; i < chrcnv.size(); ++i) {
+      cnvs.push_back(chrcnv[i]);
+      //std::cerr << chrcnv[i].chr << '\t' << chrcnv[i].start << '\t' << chrcnv[i].end << "\tMerged" << std::endl;
+    }
+  }
+
+
+  template<typename TConfig, typename TGcBias, typename TCoverage, typename TGenomicBreakpoints>
+  inline void
+  breakpointRefinement(TConfig const& c, std::pair<uint32_t, uint32_t> const& gcbound, std::vector<uint16_t> const& gcContent, std::vector<uint16_t> const& uniqContent, TGcBias const& gcbias, TCoverage const& cov, bam_hdr_t const* hdr, int32_t const refIndex, TGenomicBreakpoints const& svbp, std::vector<CNV>& cnvs) {
+    typedef typename TGenomicBreakpoints::value_type TSVs;
+    
+    // Estimate CN shift
+    for(uint32_t n = 1; n < cnvs.size(); ++n) {
+      if ((cnvs[n-1].chr != refIndex) || (cnvs[n].chr != refIndex)) continue;
+      double precovsum = 0;
+      double preexpcov = 0;
+      double succovsum = 0;
+      double sucexpcov = 0;
+      int32_t pos = cnvs[n-1].start;
+      while((pos < cnvs[n].end) && (pos < (int32_t) hdr->target_len[refIndex])) {
+	if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
+	  if (pos < cnvs[n-1].end) {
+	    precovsum += cov[pos];
+	    preexpcov += gcbias[gcContent[pos]].coverage;
+	  } else {
+	    succovsum += cov[pos];
+	    sucexpcov += gcbias[gcContent[pos]].coverage;
+	  }
+	}
+	++pos;
+      }
+      double precndiff = std::abs((c.ploidy * precovsum / preexpcov) - (c.ploidy * succovsum / sucexpcov));
+
+      // Intersect with delly SVs
+      int32_t searchWindow = std::max( std::max(1000, cnvs[n].start - cnvs[n].ciposlow), std::max(1000, cnvs[n].ciposhigh - cnvs[n].start));
+      typename TSVs::const_iterator itbest = svbp[refIndex].end();
+      // Current CNV start for this breakpoint
+      typename TSVs::const_iterator itsv = std::lower_bound(svbp[refIndex].begin(), svbp[refIndex].end(), SVBreakpoint(std::max(0, cnvs[n].start - searchWindow)), SortSVBreakpoint<SVBreakpoint>());
+      for(; itsv != svbp[refIndex].end(); ++itsv) {
+	if (itsv->pos - cnvs[n].start > searchWindow) break;
+	if (itsv->pos >= cnvs[n].end) continue;
+	if (itsv->pos <= cnvs[n-1].start) continue;
+	if ((itbest == svbp[refIndex].end()) || (itsv->qual > itbest->qual)) itbest = itsv;
+      }
+      if ((itbest != svbp[refIndex].end()) && (itbest->qual > 50)) {
+	// Check refined CNV
+	precovsum = 0;
+	preexpcov = 0;
+	succovsum = 0;
+	sucexpcov = 0;
+	pos = cnvs[n-1].start;
+	while((pos < cnvs[n].end) && (pos < (int32_t) hdr->target_len[refIndex])) {
+	  if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
+	    if (pos < itbest->pos) {
+	      precovsum += cov[pos];
+	      preexpcov += gcbias[gcContent[pos]].coverage;
+	    } else {
+	      succovsum += cov[pos];
+	      sucexpcov += gcbias[gcContent[pos]].coverage;
+	    }
+	  }
+	  ++pos;
+	}
+	double postcndiff = std::abs((c.ploidy * precovsum / preexpcov) - (c.ploidy * succovsum / sucexpcov));
+	//std::cerr << cnvs[n-1].end << ',' << itbest->pos << ',' << precndiff << ',' << postcndiff << std::endl;
+	if ((precndiff < postcndiff + c.cn_offset) && (std::abs(cnvs[n].start - itbest->pos) < 50000)) {
+	  // Accept new breakpoint
+	  cnvs[n-1].end = itbest->pos;
+	  cnvs[n].start = itbest->pos;
+	  cnvs[n-1].ciendlow = itbest->pos + itbest->cilow;
+	  cnvs[n-1].ciendhigh = itbest->pos + itbest->cihigh;
+	  cnvs[n].ciposlow = itbest->pos + itbest->cilow;
+	  cnvs[n].ciposhigh = itbest->pos + itbest->cihigh;
+	}
+      }
+    }
+  }
   
+
   template<typename TConfig, typename TGcBias, typename TCoverage>
   inline void
-  callCNVs(TConfig const& c, std::pair<uint32_t, uint32_t> const& gcbound, std::vector<uint16_t> const& gcContent, std::vector<uint16_t> const& uniqContent, TGcBias const& gcbias, TCoverage const& cov, bam_hdr_t const* hdr, int32_t const refIndex) {
-    
-    // cnv tiling windows, window_offset smallest window, window_size largest window
-    uint32_t numTiling = 0;
-    {
-      uint32_t winbound = c.window_offset;
-      while (winbound < c.window_size) {
-	++numTiling;
-	winbound *= 2;
+  breakpointRefinement2(TConfig const& c, std::pair<uint32_t, uint32_t> const& gcbound, std::vector<uint16_t> const& gcContent, std::vector<uint16_t> const& uniqContent, TGcBias const& gcbias, TCoverage const& cov, bam_hdr_t const* hdr, int32_t const refIndex, std::vector<CNV>& cnvs) {
+
+    int32_t maxbpshift = 10000;
+	
+    // Breakpoint refinement
+    for(uint32_t n = 1; n < cnvs.size(); ++n) {
+      int32_t prehalf = (cnvs[n-1].start + cnvs[n-1].end) / 2;
+      prehalf = std::max(cnvs[n-1].end - maxbpshift, prehalf);
+      int32_t suchalf = (cnvs[n].start + cnvs[n].end) / 2;
+      suchalf = std::min(cnvs[n].start + maxbpshift, suchalf);
+      double precovsum = 0;
+      double preexpcov = 0;
+      double succovsum = 0;
+      double sucexpcov = 0;
+      int32_t pos = cnvs[n-1].start;
+      std::vector<int32_t> validpos;
+      while((pos < cnvs[n].end) && (pos < (int32_t) hdr->target_len[refIndex])) {
+	if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
+	  if (pos < prehalf) {
+	    precovsum += cov[pos];
+	    preexpcov += gcbias[gcContent[pos]].coverage;
+	  } else {
+	    if (pos <= suchalf) validpos.push_back(pos);
+	    succovsum += cov[pos];
+	    sucexpcov += gcbias[gcContent[pos]].coverage;
+	  }
+	}
+	++pos;
+      }
+      double precn = c.ploidy * precovsum / preexpcov;
+      double succn = c.ploidy * succovsum / sucexpcov;
+      // Shift Bp
+      std::vector<double> diffcn(validpos.size(), 0);
+      for(uint32_t idx = 0; idx < validpos.size(); ++idx) {
+	if ((preexpcov > 0) && (sucexpcov > 0)) {
+	  precn = c.ploidy * precovsum / preexpcov;
+	  succn = c.ploidy * succovsum / sucexpcov;
+	  diffcn[idx] = std::abs(precn - succn);
+	  //if (validpos[idx] == cnvs[n-1].end) std::cerr << "-->";
+	  //std::cerr << validpos[idx] << ',' << precn << ',' << succn << ',' << diffcn[idx] << std::endl;
+	}
+	// Add to pre, remove from suc
+	precovsum += cov[validpos[idx]];
+	preexpcov += gcbias[gcContent[validpos[idx]]].coverage;
+	succovsum -= cov[validpos[idx]];
+	sucexpcov -= gcbias[gcContent[validpos[idx]]].coverage;
+      }
+      // Find best
+      int32_t bestIdx = -1;
+      for(uint32_t idx = 0; idx < validpos.size(); ++idx) {
+	if ((bestIdx == -1) || (diffcn[idx] > diffcn[bestIdx])) bestIdx = idx;
+      }
+      if (bestIdx != -1) {
+	// Update breakpoint
+	cnvs[n-1].end = validpos[bestIdx];
+	cnvs[n].start = validpos[bestIdx];
       }
     }
+    //for(uint32_t n = 0; n < cnvs.size(); ++n) std::cerr << hdr->target_name[cnvs[n].chr] << '\t' << cnvs[n].start << '\t' << cnvs[n].end << "\tRefinement" << std::endl;
+  }
+  
+
+  template<typename TConfig, typename TGcBias, typename TCoverage>
+  inline void
+  genotypeCNVs(TConfig const& c, std::pair<uint32_t, uint32_t> const& gcbound, std::vector<uint16_t> const& gcContent, std::vector<uint16_t> const& uniqContent, TGcBias const& gcbias, TCoverage const& cov, bam_hdr_t const* hdr, int32_t const refIndex, std::vector<CNV>& cnvs) {
+    for(uint32_t n = 0; n < cnvs.size(); ++n) {
+      if (cnvs[n].chr != refIndex) continue;
+      double covsum = 0;
+      double expcov = 0;
+      int32_t winlen = 0;
+      int32_t pos = cnvs[n].start;
+      while((pos < cnvs[n].end) && (pos < (int32_t) hdr->target_len[refIndex])) {
+	if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
+	  covsum += cov[pos];
+	  expcov += gcbias[gcContent[pos]].coverage;
+	  ++winlen;
+	}
+	++pos;
+      }
+      double cn = c.ploidy;
+      if (expcov > 0) cn = c.ploidy * covsum / expcov;
+      double mp = (double) winlen / (double) (cnvs[n].end - cnvs[n].start);
+      cnvs[n].cn = cn;
+      cnvs[n].mappable = mp;
 
-    typedef std::vector<int16_t> TCNVec;
-    typedef std::vector<TCNVec> TTilingCN;
-    TTilingCN cnvec(numTiling, TCNVec());
-    for(uint32_t i = 0; i < numTiling; ++i) cnvec[i].resize(hdr->target_len[refIndex]/50, -1);
-    for(uint32_t start = 0; start < hdr->target_len[refIndex]; start = start + 50) {
-      if (start + c.window_offset < hdr->target_len[refIndex]) {
-	double covsum = 0;
-	double expcov = 0;
-	uint32_t winlen = 0;
-	uint32_t winbound = c.window_offset;
-	uint32_t tilingPos = 0;
-	for(uint32_t pos = start; ((pos < start + c.window_size) && (pos < hdr->target_len[refIndex])); ++pos) {
+      // Estimate SD
+      boost::accumulators::accumulator_set<double, boost::accumulators::features<boost::accumulators::tag::mean, boost::accumulators::tag::variance> > acc;
+      uint32_t wsz = winlen / 10;
+      if (wsz > 1) {
+	covsum = 0;
+	expcov = 0;
+	winlen = 0;
+	pos = cnvs[n].start;
+	while((pos < cnvs[n].end) && (pos < (int32_t) hdr->target_len[refIndex])) {
 	  if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
 	    covsum += cov[pos];
 	    expcov += gcbias[gcContent[pos]].coverage;
 	    ++winlen;
+	    if (winlen % wsz == 0) {
+	      double cn = c.ploidy;
+	      if (expcov > 0) cn = c.ploidy * covsum / expcov;
+	      acc(cn);
+	      covsum = 0;
+	      expcov = 0;
+	    }
 	  }
-	  // Multiple of window size?
-	  if ((pos - start) == winbound) {
-	    if (winlen >= c.fracWindow * (pos - start)) {
-	      cnvec[tilingPos][start/hdr->target_len[refIndex]] = (int32_t) boost::math::round(c.ploidy * covsum / expcov * 100.0);
+	  ++pos;
+	}
+	cnvs[n].sd = sqrt(boost::accumulators::variance(acc));
+	if (cnvs[n].sd < 0.025) cnvs[n].sd = 0.025;
+      } else {
+	// Invalid
+	cnvs[n].cn = -1;
+	cnvs[n].sd = 0.025;
+      }
+    }
+  }
+  
+  template<typename TConfig, typename TGcBias, typename TCoverage>
+  inline void
+  callCNVs(TConfig const& c, std::pair<uint32_t, uint32_t> const& gcbound, std::vector<uint16_t> const& gcContent, std::vector<uint16_t> const& uniqContent, TGcBias const& gcbias, TCoverage const& cov, bam_hdr_t const* hdr, int32_t const refIndex, std::vector<CNV>& cnvs) {
+
+    // Parameters
+    int32_t smallestWin = c.minCnvSize / 10;
+    int32_t biggestWin = smallestWin * 200;
+    uint32_t chain = 10;
+
+    // Find breakpoints
+    std::vector<BpCNV> bpmax;
+    if (bpmax.empty()) {
+      // Scanning window sizes
+      std::vector<int32_t> winsize;
+      int32_t wsize = smallestWin;
+      while (wsize < biggestWin) {
+	winsize.push_back(wsize);
+	wsize *= 2;
+      }
+
+      // Iterate window sizes
+      typedef int32_t TCnVal;
+      typedef std::vector<TCnVal> TCN;
+      typedef std::vector<int32_t> TChrPos;
+      std::vector<BpCNV> bpvec;
+      for(uint32_t idx = 0; idx < winsize.size(); ++idx) {
+	uint32_t idxOffset = winsize[idx] / winsize[0];
+	//std::cerr << idx << ',' << winsize[idx] << ',' << idxOffset << ',' << bpvec.size() << ',' << hdr->target_len[refIndex] << std::endl;
+	TCN cnvec;
+	TChrPos wpos;
+	uint32_t wstart = 0;
+	while(wstart < hdr->target_len[refIndex]) {
+	  double covsum = 0;
+	  double expcov = 0;
+	  int32_t winlen = 0;
+	  uint32_t pos = wstart;
+	  while ((winlen < winsize[idx]) && (pos < hdr->target_len[refIndex])) {
+	    if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
+	      covsum += cov[pos];
+	      expcov += gcbias[gcContent[pos]].coverage;
+	      ++winlen;
+	    }
+	    ++pos;
+	  }
+	  if (winlen == winsize[idx]) {
+	    // Full window
+	    if (expcov > 0) cnvec.push_back((int32_t) boost::math::round(c.ploidy * covsum / expcov * 100.0));
+	    else cnvec.push_back((int32_t) boost::math::round(c.ploidy * 100.0));
+	    wpos.push_back(wstart);
+	  }
+	  wstart = pos;
+	}
+	
+	// Identify breakpoints
+	TCN pre(chain, -1);
+	TCN suc(chain, -1);
+	TChrPos prep(chain, 0);
+	TChrPos sucp(chain, 0);
+	uint32_t idxbp = 0;
+	for(uint32_t k = 0; k < cnvec.size(); ++k) {
+	  if (k < chain) {
+	    pre[k % chain] = cnvec[k];
+	    prep[k % chain] = wpos[k];
+	    if (k + 1 < cnvec.size()) {
+	      if (idx == 0 ) bpvec.push_back(BpCNV(wpos[k], wpos[k+1], 0));
+	      else idxbp += idxOffset;
+	    }
+	  } else if (k < 2 * chain) {
+	    suc[k % chain] = cnvec[k];
+	    sucp[k % chain] = wpos[k];
+	  } else {
+	    // Midpoint
+	    TCnVal val = suc[k%chain];	  
+	    int32_t pos = sucp[k%chain];
+	    int32_t posNext = sucp[(k+1)%chain];
+	    suc[k%chain] = cnvec[k];
+	    sucp[k%chain] = wpos[k];
+	    
+	    // Debug
+	    //for(uint32_t m = 0; m < pre.size(); ++m) std::cerr << prep[m] << '\t' << pre[m] << std::endl;
+	    //std::cerr << "M:" << pos << '\t' << val << std::endl;
+	    //for(uint32_t m = 0; m < suc.size(); ++m) std::cerr << sucp[m] << '\t' << suc[m] << std::endl;
+	    
+	    // Any shift in CN?
+	    boost::accumulators::accumulator_set<TCnVal, boost::accumulators::features<boost::accumulators::tag::mean, boost::accumulators::tag::variance> > accpre;
+	    boost::accumulators::accumulator_set<TCnVal, boost::accumulators::features<boost::accumulators::tag::mean, boost::accumulators::tag::variance> > accsuc;
+	    for(uint32_t m = 0; m < pre.size(); ++m) accpre(pre[m]);
+	    for(uint32_t m = 0; m < suc.size(); ++m) accsuc(suc[m]);
+	    double diff = std::abs(boost::accumulators::mean(accsuc) - boost::accumulators::mean(accpre));
+	    // Breakpoint candidate
+	    double zscore = 0;
+	    if ((diff > c.stringency * sqrt(boost::accumulators::variance(accpre))) && (diff > c.stringency * sqrt(boost::accumulators::variance(accsuc)))) {
+	      zscore = diff / std::max(sqrt(boost::accumulators::variance(accpre)), sqrt(boost::accumulators::variance(accsuc)));
+	    }
+	    if (idx == 0) bpvec.push_back(BpCNV(pos, posNext, zscore));
+	    else {
+	      for(uint32_t sub = idxbp; sub < idxbp + idxOffset; ++sub) bpvec[sub].zscore += zscore;
+	      idxbp += idxOffset;
+	    }
+	    pre[k%chain] = val;
+	    prep[k%chain] = pos;
+	  }
+	}
+      }
+    
+      // Local maxima
+      if (bpvec.size()) {
+	int32_t pos = bpvec[0].start;
+	int32_t posNext = bpvec[0].end;
+	double bestDiff = bpvec[0].zscore;
+	for(uint32_t n = 1; n < bpvec.size(); ++n) {
+	  //std::cerr << "B:" << bpvec[n].start << '-' << bpvec[n].end << ':' << bpvec[n].zscore << std::endl;
+	  if (bpvec[n].zscore == 0) {
+	    if (bestDiff != 0) {
+	      //std::cerr << "M:" << pos << '-' << posNext << ':' << bestDiff << std::endl;
+	      bpmax.push_back(BpCNV(pos, posNext, bestDiff));
+	      pos = bpvec[n].start;
+	      posNext = bpvec[n].end;
+	      bestDiff = bpvec[n].zscore;
+	    }
+	  } else {
+	    if (bpvec[n].zscore > bestDiff) {
+	      // Replace local max
+	      pos = bpvec[n].start;
+	      posNext = bpvec[n].end;
+	      bestDiff = bpvec[n].zscore;
+	    } else if (bpvec[n].zscore == bestDiff) {
+	      // Extend local max
+	      posNext = bpvec[n].end;
 	    }
-	    winbound *= 2;
-	    ++tilingPos;
 	  }
 	}
       }
     }
 
-    // Iterate CNVs
-    for(int32_t i = numTiling - 1; i>=0; --i) {
-      for(uint32_t k = 0; k < cnvec[i].size(); ++k) {
-	std::cout << cnvec[i][k] << ',';
+    // Breakpoints
+    for(uint32_t n = 0; n <= bpmax.size(); ++n) {
+      int32_t cil = 0;
+      int32_t cih = 0;
+      if (n > 0) {
+	cil = bpmax[n-1].start;
+	cih = bpmax[n-1].end;
+      }
+      int32_t cel = hdr->target_len[refIndex] - 1;
+      int32_t ceh = hdr->target_len[refIndex] - 1;
+      if (n < bpmax.size()) {
+	cel = bpmax[n].start;
+	ceh = bpmax[n].end;
+      }
+      //std::cerr << (cih - cil) << ';' << (ceh - cel) << std::endl;
+      int32_t cnvstart = (int32_t) ((cil + cih)/2);
+      int32_t cnvend = (int32_t) ((cel + ceh)/2);
+      int32_t estcnvstart = -1;
+      int32_t estcnvend = -1;
+      double covsum = 0;
+      double expcov = 0;
+      int32_t winlen = 0;
+      int32_t pos = cnvstart;
+      while((pos < cnvend) && (pos < (int32_t) hdr->target_len[refIndex])) {
+	if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
+	  if (estcnvstart == -1) estcnvstart = pos;
+	  estcnvend = pos;
+	  covsum += cov[pos];
+	  expcov += gcbias[gcContent[pos]].coverage;
+	  ++winlen;
+	}
+	++pos;
+      }
+      if ((estcnvstart != -1) && (estcnvend != -1) && (estcnvend - estcnvstart > 0)) {
+	double cn = c.ploidy;
+	if (expcov > 0) cn = c.ploidy * covsum / expcov;
+	double mp = (double) winlen / (double) (estcnvend - estcnvstart);
+	cnvs.push_back(CNV(refIndex, estcnvstart, estcnvend, cil, cih, cel, ceh, cn, mp));
+	//std::cerr << hdr->target_name[refIndex] << '\t' << estcnvstart << '\t' << estcnvend << '\t' << '(' << cil << ',' << cih << ')' << '\t' << '(' << cel << ',' << ceh << ')' << '\t' << cn << '\t' << mp << std::endl;
       }
-      std::cout << std::endl;
     }
   }
+
+
+  // Parse Delly CNV VCF file
+  template<typename TConfig>
+  inline void
+  parseVcfCNV(TConfig const& c, bam_hdr_t* hd, std::vector<CNV>& cnvs) {
+    // Load bcf file
+    htsFile* ifile = bcf_open(c.genofile.string().c_str(), "r");
+    bcf_hdr_t* hdr = bcf_hdr_read(ifile);
+    bcf1_t* rec = bcf_init();
+
+    // Parse bcf
+    int32_t nsvend = 0;
+    int32_t* svend = NULL;
+    int32_t ncipos = 0;
+    int32_t* cipos = NULL;
+    int32_t nmp = 0;
+    float* mp = NULL;
+    int32_t nsvt = 0;
+    char* svt = NULL;
+    int32_t nmethod = 0;
+    char* method = NULL;
+    uint16_t wimethod = 0; 
+    while (bcf_read(ifile, hdr, rec) == 0) {
+      bcf_unpack(rec, BCF_UN_INFO);
+
+      // Delly BCF file?
+      if (!wimethod) {
+	wimethod = 2;
+	if (bcf_get_info_string(hdr, rec, "SVMETHOD", &method, &nmethod) > 0) {
+	  std::string mstr = std::string(method);
+	  if ((mstr.size() >= 10) && (mstr.substr(0, 10)  == "EMBL.DELLY")) wimethod = 1;
+	}
+      }
+
+      // Delly
+      if (wimethod == 1) {
+	// Fill SV record
+	CNV cnv;
+	std::string chrName = bcf_hdr_id2name(hdr, rec->rid);
+	int32_t tid = bam_name2id(hd, chrName.c_str());
+	cnv.chr = tid;
+	cnv.start = rec->pos - 1;
+	cnv.qval = rec->qual;
+
+	// Parse CNV type
+	if (bcf_get_info_string(hdr, rec, "SVTYPE", &svt, &nsvt) > 0) {
+	  if (std::string(svt) != "CNV") continue;
+	} else continue;
+      
+	// Parse INFO
+	if (bcf_get_info_int32(hdr, rec, "END", &svend, &nsvend) > 0) cnv.end = *svend;
+	else continue;
+	if (bcf_get_info_int32(hdr, rec, "CIPOS", &cipos, &ncipos) > 0) {
+	  cnv.ciposlow = cnv.start + cipos[0];
+	  cnv.ciposhigh = cnv.start + cipos[1];
+	} else {
+	  cnv.ciposlow = cnv.start - 50;
+	  cnv.ciposhigh = cnv.start + 50;
+	}
+	if (bcf_get_info_int32(hdr, rec, "CIEND", &cipos, &ncipos) > 0) {
+	  cnv.ciendlow = cnv.end + cipos[0];
+	  cnv.ciendhigh = cnv.end + cipos[1];
+	} else {
+	  cnv.ciendlow = cnv.end - 50;
+	  cnv.ciendhigh = cnv.end + 50;
+	}
+	if (bcf_get_info_float(hdr, rec, "MP", &mp, &nmp) > 0) cnv.mappable = (double) *mp;
+	else cnv.mappable = 0;
+
+	cnvs.push_back(cnv);
+      }
+    }
+    // Clean-up
+    free(svend);
+    free(svt);
+    free(method);
+    free(cipos);
+    free(mp);
+    
+    // Close VCF
+    bcf_hdr_destroy(hdr);
+    bcf_close(ifile);
+    bcf_destroy(rec);
+  }
+
   
+  template<typename TConfig>
+  inline void
+  cnvVCF(TConfig const& c, std::vector<CNV> const& cnvs) {
+    // Open one bam file header
+    samFile* samfile = sam_open(c.bamFile.string().c_str(), "r");
+    hts_set_fai_filename(samfile, c.genome.string().c_str());
+    bam_hdr_t* bamhd = sam_hdr_read(samfile);
+
+    // Output all copy-number variants
+    htsFile *fp = hts_open(c.cnvfile.string().c_str(), "wb");
+    bcf_hdr_t *hdr = bcf_hdr_init("w");
+
+    // Print vcf header
+    boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+    boost::gregorian::date today = now.date();
+    std::string datestr("##fileDate=");
+    datestr += boost::gregorian::to_iso_string(today);
+    bcf_hdr_append(hdr, datestr.c_str());
+    bcf_hdr_append(hdr, "##ALT=<ID=CNV,Description=\"copy-number variants\">");
+    bcf_hdr_append(hdr, "##FILTER=<ID=LowQual,Description=\"Poor quality copy-number variant\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the copy-number variant\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=MP,Number=1,Type=Float,Description=\"Mappable fraction of CNV\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise copy-number variant\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
+    bcf_hdr_append(hdr, "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect CNV\">");
+    bcf_hdr_append(hdr, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">");
+    bcf_hdr_append(hdr, "##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Integer copy-number\">");
+    bcf_hdr_append(hdr, "##FORMAT=<ID=CNL,Number=.,Type=Float,Description=\"Log10-scaled copy-number likelihoods\">");
+    bcf_hdr_append(hdr, "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">");
+    bcf_hdr_append(hdr, "##FORMAT=<ID=FT,Number=1,Type=String,Description=\"Per-sample genotype filter\">");
+
+    bcf_hdr_append(hdr, "##FORMAT=<ID=RDCN,Number=1,Type=Float,Description=\"Read-depth based copy-number estimate\">");
+    bcf_hdr_append(hdr, "##FORMAT=<ID=RDSD,Number=1,Type=Float,Description=\"Read-depth standard deviation\">");
+
+    // Add reference
+    std::string refloc("##reference=");
+    refloc += c.genome.string();
+    bcf_hdr_append(hdr, refloc.c_str());
+    for (int i = 0; i<bamhd->n_targets; ++i) {
+      std::string refname("##contig=<ID=");
+      refname += std::string(bamhd->target_name[i]) + ",length=" + boost::lexical_cast<std::string>(bamhd->target_len[i]) + ">";
+      bcf_hdr_append(hdr, refname.c_str());
+    }
+    // Add samples
+    bcf_hdr_add_sample(hdr, c.sampleName.c_str());
+    bcf_hdr_add_sample(hdr, NULL);
+    if (bcf_hdr_write(fp, hdr) != 0) std::cerr << "Error: Failed to write BCF header!" << std::endl;
+
+    uint32_t cnvid = 0;
+    if (!cnvs.empty()) {
+      // Genotype arrays
+      int32_t *gts = (int*) malloc(bcf_hdr_nsamples(hdr) * 2 * sizeof(int));
+      int32_t *gqval = (int*) malloc(bcf_hdr_nsamples(hdr) * sizeof(int));
+      int32_t *cnval = (int*) malloc(bcf_hdr_nsamples(hdr) * sizeof(int));
+      float *cnrdval = (float*) malloc(bcf_hdr_nsamples(hdr) * sizeof(float));
+      float *cnsdval = (float*) malloc(bcf_hdr_nsamples(hdr) * sizeof(float));
+      float *cnl = (float*) malloc(bcf_hdr_nsamples(hdr) * MAX_CN * sizeof(float));
+
+      std::vector<std::string> ftarr;
+      ftarr.resize(bcf_hdr_nsamples(hdr));
+    
+      // Iterate all structural variants
+      now = boost::posix_time::second_clock::local_time();
+      std::cout << '[' << boost::posix_time::to_simple_string(now) << "] " << "Genotyping" << std::endl;
+      boost::progress_display show_progress( cnvs.size() );
+      bcf1_t *rec = bcf_init();
+      for(uint32_t i = 0; i < cnvs.size(); ++i) {
+	++show_progress;
+
+	// Invalid CNV?
+	if (cnvs[i].cn == -1) continue;
+
+	// Integer copy-number
+	int32_t absCN = (int32_t) boost::math::round(cnvs[i].cn);
+	if ((!c.segmentation) && (absCN == c.ploidy)) continue;
+      
+	// Output main vcf fields
+	rec->rid = bcf_hdr_name2id(hdr, bamhd->target_name[cnvs[i].chr]);
+	int32_t svStartPos = cnvs[i].start + 1;
+	int32_t svEndPos = cnvs[i].end;
+	if (svEndPos >= (int32_t) bamhd->target_len[cnvs[i].chr]) svEndPos = bamhd->target_len[cnvs[i].chr] - 1;
+	rec->pos = svStartPos;
+	std::string id("CNV");
+	std::string padNumber = boost::lexical_cast<std::string>(++cnvid);
+	padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
+	id += padNumber;
+	bcf_update_id(hdr, rec, id.c_str());
+	std::string svtype = "CNV";
+	std::string alleles = "N,<" + svtype + ">";
+	bcf_update_alleles_str(hdr, rec, alleles.c_str());
+      
+	// Add INFO fields
+	bcf_update_info_flag(hdr, rec, "IMPRECISE", NULL, 1);
+
+	bcf_update_info_string(hdr, rec, "SVTYPE", svtype.c_str());
+	std::string dellyVersion("EMBL.DELLYv");
+	dellyVersion += dellyVersionNumber;
+	bcf_update_info_string(hdr,rec, "SVMETHOD", dellyVersion.c_str());
+	int32_t tmpi = svEndPos;
+	bcf_update_info_int32(hdr, rec, "END", &tmpi, 1);
+	int32_t ciend[2];
+	ciend[0] = cnvs[i].ciendlow - cnvs[i].end;
+	ciend[1] = cnvs[i].ciendhigh - cnvs[i].end;
+	int32_t cipos[2];
+	cipos[0] = cnvs[i].ciposlow - cnvs[i].start;
+	cipos[1] = cnvs[i].ciposhigh - cnvs[i].start;
+	bcf_update_info_int32(hdr, rec, "CIPOS", cipos, 2);
+	bcf_update_info_int32(hdr, rec, "CIEND", ciend, 2);
+	float tmpf = cnvs[i].mappable;
+	bcf_update_info_float(hdr, rec, "MP", &tmpf, 1);
+
+	// Genotyping
+	cnval[0] = absCN;
+	cnrdval[0] = cnvs[i].cn;
+	cnsdval[0] = cnvs[i].sd;
+	gts[0] = bcf_gt_missing;
+	gts[1] = bcf_gt_missing;
+	int32_t qval = _computeCNLs(c, cnvs[i].cn, cnvs[i].sd, cnl, gqval);
+	if (c.hasGenoFile) rec->qual = cnvs[i].qval;  // Leave site quality in genotyping mode
+	else rec->qual = qval;
+	tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS");
+	if (rec->qual < 15) tmpi = bcf_hdr_id2int(hdr, BCF_DT_ID, "LowQual");
+	bcf_update_filter(hdr, rec, &tmpi, 1);
+	
+	if (gqval[0] < 15) ftarr[0] = "LowQual";
+	else ftarr[0] = "PASS";
+	std::vector<const char*> strp(bcf_hdr_nsamples(hdr));
+	std::transform(ftarr.begin(), ftarr.end(), strp.begin(), cstyle_str());	
+	bcf_update_genotypes(hdr, rec, gts, bcf_hdr_nsamples(hdr) * 2);
+	bcf_update_format_int32(hdr, rec, "CN", cnval, bcf_hdr_nsamples(hdr));
+	bcf_update_format_float(hdr, rec, "CNL",  cnl, bcf_hdr_nsamples(hdr) * MAX_CN);
+	bcf_update_format_int32(hdr, rec, "GQ", gqval, bcf_hdr_nsamples(hdr));
+	bcf_update_format_string(hdr, rec, "FT", &strp[0], bcf_hdr_nsamples(hdr));
+	bcf_update_format_float(hdr, rec, "RDCN",  cnrdval, bcf_hdr_nsamples(hdr));
+	bcf_update_format_float(hdr, rec, "RDSD",  cnsdval, bcf_hdr_nsamples(hdr));
+	bcf_write1(fp, hdr, rec);
+	bcf_clear1(rec);
+      }
+      bcf_destroy1(rec);
+    
+      // Clean-up
+      free(gts);
+      free(gqval);
+      free(cnval);
+      free(cnrdval);
+      free(cnsdval);
+      free(cnl);
+    }
+
+    // Close BAM file
+    bam_hdr_destroy(bamhd);
+    sam_close(samfile);
+    
+    // Close VCF file
+    bcf_hdr_destroy(hdr);
+    hts_close(fp);
+    
+    // Build index
+    bcf_index_build(c.cnvfile.string().c_str(), 14);
+  }
+ 
 
 }
 


=====================================
src/coral.h
=====================================
@@ -25,17 +25,20 @@ namespace torali
 
   struct CountDNAConfig {
     bool adaptive;
-    bool hasPanelFile;
     bool hasStatsFile;
     bool hasBedFile;
     bool hasScanFile;
     bool noScanWindowSelection;
+    bool segmentation;
+    bool hasGenoFile;
+    bool hasVcfFile;
     uint32_t nchr;
     uint32_t meanisize;
     uint32_t window_size;
     uint32_t window_offset;
     uint32_t scanWindow;
     uint32_t minChrLen;
+    uint32_t minCnvSize;
     uint16_t minQual;
     uint16_t mad;
     uint16_t ploidy;
@@ -44,9 +47,13 @@ namespace torali
     float fracWindow;
     float fragmentUnique;
     float controlMaf;
+    float stringency;
+    float cn_offset;
     std::string sampleName;
-    boost::filesystem::path outfile;
-    boost::filesystem::path panelfile;
+    boost::filesystem::path vcffile;
+    boost::filesystem::path genofile;
+    boost::filesystem::path cnvfile;
+    boost::filesystem::path covfile;
     boost::filesystem::path genome;
     boost::filesystem::path statsFile;
     boost::filesystem::path mapFile;
@@ -54,7 +61,7 @@ namespace torali
     boost::filesystem::path bedFile;
     boost::filesystem::path scanFile;
   };
-
+  
   struct CountDNAConfigLib {
     uint16_t madCutoff;
     uint16_t madNormalCutoff;
@@ -90,13 +97,25 @@ namespace torali
     // Open output files
     boost::iostreams::filtering_ostream dataOut;
     dataOut.push(boost::iostreams::gzip_compressor());
-    dataOut.push(boost::iostreams::file_sink(c.outfile.c_str(), std::ios_base::out | std::ios_base::binary));
+    dataOut.push(boost::iostreams::file_sink(c.covfile.c_str(), std::ios_base::out | std::ios_base::binary));
     dataOut << "chr\tstart\tend\t" << c.sampleName << "_mappable\t" << c.sampleName << "_counts\t" << c.sampleName << "_CN" << std::endl;
 
-    boost::iostreams::filtering_ostream panelOut;
-    if (c.hasPanelFile) {
-      panelOut.push(boost::iostreams::gzip_compressor());
-      panelOut.push(boost::iostreams::file_sink(c.panelfile.c_str(), std::ios_base::out | std::ios_base::binary));
+    // CNVs
+    std::vector<CNV> cnvs;
+    if (c.hasGenoFile) parseVcfCNV(c, hdr, cnvs);
+
+    // SVs for breakpoint refinement
+    typedef std::vector<SVBreakpoint> TChrBreakpoints;
+    typedef std::vector<TChrBreakpoints> TGenomicBreakpoints;
+    TGenomicBreakpoints svbp(c.nchr, TChrBreakpoints());
+    if (c.hasVcfFile) {
+      std::vector<StructuralVariantRecord> svs;
+      vcfParse(c, hdr, svs);
+      for(uint32_t i = 0; i < svs.size(); ++i) {
+	svbp[svs[i].chr].push_back(SVBreakpoint(svs[i].svStart, svs[i].ciposlow, svs[i].ciposhigh, svs[i].mapq));
+	svbp[svs[i].chr2].push_back(SVBreakpoint(svs[i].svEnd, svs[i].ciendlow, svs[i].ciendhigh, svs[i].mapq));
+      }
+      for (uint32_t i = 0; i < svbp.size(); ++i) sort(svbp[i].begin(), svbp[i].end(), SortSVBreakpoint<SVBreakpoint>());
     }
     
     // Iterate chromosomes
@@ -215,10 +234,21 @@ namespace torali
 	hts_itr_destroy(iter);
       }
 
+      // CNV discovery
+      if (!c.hasGenoFile) {
+	// Call CNVs
+	std::vector<CNV> chrcnv;
+	callCNVs(c, gcbound, gcContent, uniqContent, gcbias, cov, hdr, refIndex, chrcnv);
 
-      // Call CNVs
-      //callCNVs(c, gcbound, gcContent, uniqContent, gcbias, cov, hdr, refIndex);
+	// Merge adjacent CNVs lacking read-depth shift
+	mergeCNVs(c, chrcnv, cnvs);
 
+	// Refine breakpoints
+	if (c.hasVcfFile) breakpointRefinement(c, gcbound, gcContent, uniqContent, gcbias, cov, hdr, refIndex, svbp, cnvs);
+      }
+      
+      // CNV genotyping
+      genotypeCNVs(c, gcbound, gcContent, uniqContent, gcbias, cov, hdr, refIndex, cnvs);
 
       // BED File (target intervals)
       if (c.hasBedFile) {
@@ -259,7 +289,8 @@ namespace torali
 		    if (winlen == c.window_size) {
 		      obsexp /= (double) winlen;
 		      double count = ((double) covsum / obsexp ) * (double) c.window_size / (double) winlen;
-		      double cn = c.ploidy * covsum / expcov;
+		      double cn = c.ploidy;
+		      if (expcov > 0) cn = c.ploidy * covsum / expcov;
 		      dataOut << std::string(hdr->target_name[refIndex]) << "\t" << start << "\t" << (pos + 1) << "\t" << winlen << "\t" << count << "\t" << cn << std::endl;
 		      // reset
 		      covsum = 0;
@@ -314,7 +345,8 @@ namespace torali
 	      if (winlen >= c.fracWindow * (it->second - it->first)) {
 		obsexp /= (double) winlen;
 		double count = ((double) covsum / obsexp ) * (double) (it->second - it->first) / (double) winlen;
-		double cn = c.ploidy * covsum / expcov;
+		double cn = c.ploidy;
+		if (expcov > 0) cn = c.ploidy * covsum / expcov;
 		dataOut << std::string(hdr->target_name[refIndex]) << "\t" << it->first << "\t" << it->second << "\t" << winlen << "\t" << count << "\t" << cn << std::endl;
 	      } else {
 		dataOut << std::string(hdr->target_name[refIndex]) << "\t" << it->first << "\t" << it->second << "\tNA\tNA\tNA" << std::endl;
@@ -340,7 +372,8 @@ namespace torali
 	      if (winlen == c.window_size) {
 		obsexp /= (double) winlen;
 		double count = ((double) covsum / obsexp ) * (double) c.window_size / (double) winlen;
-		double cn = c.ploidy * covsum / expcov;
+		double cn = c.ploidy;
+		if (expcov > 0) cn = c.ploidy * covsum / expcov;
 		dataOut << std::string(hdr->target_name[refIndex]) << "\t" << start << "\t" << (pos + 1) << "\t" << winlen << "\t" << count << "\t" << cn << std::endl;
 		// reset
 		covsum = 0;
@@ -387,63 +420,22 @@ namespace torali
 	      if (winlen >= c.fracWindow * c.window_size) {
 		obsexp /= (double) winlen;
 		double count = ((double) covsum / obsexp ) * (double) c.window_size / (double) winlen;
-		double cn = c.ploidy * covsum / expcov;
+		double cn = c.ploidy;
+		if (expcov > 0) cn = c.ploidy * covsum / expcov;
 		dataOut << std::string(hdr->target_name[refIndex]) << "\t" << start << "\t" << (start + c.window_size) << "\t" << winlen << "\t" << count << "\t" << cn << std::endl;
 	      }
 	    }
 	  }
 	}
-
-	// Panel output?
-	if (c.hasPanelFile) {
-	  // cnv tiling windows
-	  uint32_t smallestWin = 300;
-	  uint32_t largestWin = 40000;
-	  uint32_t numTiling = 0;
-	  {
-	    uint32_t winbound = smallestWin;
-	    while (winbound < largestWin) {
-	      ++numTiling;
-	      winbound *= 2;
-	    }
-	  }
-	  std::vector<int32_t> cnvec(numTiling, -1);
-	  panelOut << "#" << std::string(hdr->target_name[refIndex]) << "," << hdr->target_len[refIndex] << "," << smallestWin << "," << largestWin << "," << c.sampleName << std::endl;
-	  for(uint32_t start = 0; start < hdr->target_len[refIndex]; start = start + 50) {
-	    if (start + smallestWin < hdr->target_len[refIndex]) {
-	      double covsum = 0;
-	      double expcov = 0;
-	      uint32_t winlen = 0;
-	      uint32_t winbound = smallestWin;
-	      uint32_t tilingPos = 0;
-	      std::fill(cnvec.begin(), cnvec.end(), -1);
-	      bool validCN = false;
-	      for(uint32_t pos = start; ((pos < start + largestWin) && (pos < hdr->target_len[refIndex])); ++pos) {
-		if ((gcContent[pos] > gcbound.first) && (gcContent[pos] < gcbound.second) && (uniqContent[pos] >= c.fragmentUnique * c.meanisize)) {
-		  covsum += cov[pos];
-		  expcov += gcbias[gcContent[pos]].coverage;
-		  ++winlen;
-		}
-		// Multiple of window size?
-		if ((pos - start) == winbound) {
-		  if (winlen >= c.fracWindow * (pos - start)) {
-		    cnvec[tilingPos] = (int32_t) boost::math::round(c.ploidy * covsum / expcov * 100.0);
-		    validCN = true;
-		  }
-		  winbound *= 2;
-		  ++tilingPos;
-		}
-	      }
-	      if (validCN) {
-		for(uint32_t i = 0; i < numTiling; ++i) panelOut << cnvec[i] << ',';
-		panelOut << std::endl;
-	      } else panelOut << "-2," << std::endl;
-	    }
-	  }
-	}
       }
     }
 
+    // Sort CNVs
+    sort(cnvs.begin(), cnvs.end(), SortCNVs<CNV>());
+
+    // Genotype CNVs
+    cnvVCF(c, cnvs);
+
     // clean-up
     fai_destroy(faiRef);
     fai_destroy(faiMap);
@@ -452,10 +444,6 @@ namespace torali
     sam_close(samfile);
     dataOut.pop();
     dataOut.pop();
-    if (c.hasPanelFile) {
-      panelOut.pop();
-      panelOut.pop();
-    }
     
     return 0;
   }
@@ -472,21 +460,30 @@ namespace torali
       ("quality,q", boost::program_options::value<uint16_t>(&c.minQual)->default_value(10), "min. mapping quality")
       ("mappability,m", boost::program_options::value<boost::filesystem::path>(&c.mapFile), "input mappability map")
       ("ploidy,y", boost::program_options::value<uint16_t>(&c.ploidy)->default_value(2), "baseline ploidy")
-      ("fragment,e", boost::program_options::value<float>(&c.fragmentUnique)->default_value(0.97), "min. fragment uniqueness [0,1]")
-      ("statsfile,s", boost::program_options::value<boost::filesystem::path>(&c.statsFile), "gzipped stats output file (optional)")
-      ("outfile,o", boost::program_options::value<boost::filesystem::path>(&c.outfile)->default_value("out.cov.gz"), "output file")
-      ("adaptive-windowing,a", "use mappable bases for window size")
+      ("outfile,o", boost::program_options::value<boost::filesystem::path>(&c.cnvfile)->default_value("cnv.bcf"), "output CNV file")
+      ("covfile,c", boost::program_options::value<boost::filesystem::path>(&c.covfile)->default_value("cov.gz"), "output coverage file")
       ;
 
-    boost::program_options::options_description window("Window options");
+    boost::program_options::options_description cnv("CNV calling");
+    cnv.add_options()
+      ("sdrd,x", boost::program_options::value<float>(&c.stringency)->default_value(2), "min. SD read-depth shift")
+      ("cn-offset,t", boost::program_options::value<float>(&c.cn_offset)->default_value(0.1), "min. CN offset")
+      ("cnv-size,z", boost::program_options::value<uint32_t>(&c.minCnvSize)->default_value(1000), "min. CNV size")
+      ("svfile,l", boost::program_options::value<boost::filesystem::path>(&c.vcffile), "delly SV file for breakpoint refinement")
+      ("vcffile,v", boost::program_options::value<boost::filesystem::path>(&c.genofile), "input VCF/BCF file for re-genotyping")
+      ("segmentation,u", "copy-number segmentation")
+      ;
+    
+    boost::program_options::options_description window("Read-depth windows");
     window.add_options()
       ("window-size,i", boost::program_options::value<uint32_t>(&c.window_size)->default_value(10000), "window size")
       ("window-offset,j", boost::program_options::value<uint32_t>(&c.window_offset)->default_value(10000), "window offset")
       ("bed-intervals,b", boost::program_options::value<boost::filesystem::path>(&c.bedFile), "input BED file")
       ("fraction-window,k", boost::program_options::value<float>(&c.fracWindow)->default_value(0.25), "min. callable window fraction [0,1]")
+      ("adaptive-windowing,a", "use mappable bases for window size")
       ;
 
-    boost::program_options::options_description gcopt("GC options");
+    boost::program_options::options_description gcopt("GC fragment normalization");
     gcopt.add_options()
       ("scan-window,w", boost::program_options::value<uint32_t>(&c.scanWindow)->default_value(10000), "scanning window size")
       ("fraction-unique,f", boost::program_options::value<float>(&c.uniqueToTotalCovRatio)->default_value(0.8), "uniqueness filter for scan windows [0,1]")
@@ -499,7 +496,8 @@ namespace torali
     boost::program_options::options_description hidden("Hidden options");
     hidden.add_options()
       ("input-file", boost::program_options::value<boost::filesystem::path>(&c.bamFile), "input bam file")
-      ("panelfile,z", boost::program_options::value<boost::filesystem::path>(&c.panelfile), "output panel file")
+      ("fragment,e", boost::program_options::value<float>(&c.fragmentUnique)->default_value(0.97), "min. fragment uniqueness [0,1]")
+      ("statsfile,s", boost::program_options::value<boost::filesystem::path>(&c.statsFile), "gzipped stats output file (optional)")
       ;
 
     boost::program_options::positional_options_description pos_args;
@@ -507,9 +505,9 @@ namespace torali
 
     // Set the visibility
     boost::program_options::options_description cmdline_options;
-    cmdline_options.add(generic).add(window).add(gcopt).add(hidden);
+    cmdline_options.add(generic).add(cnv).add(window).add(gcopt).add(hidden);
     boost::program_options::options_description visible_options;
-    visible_options.add(generic).add(window).add(gcopt);
+    visible_options.add(generic).add(cnv).add(window).add(gcopt);
 
     // Parse command-line
     boost::program_options::variables_map vm;
@@ -551,15 +549,57 @@ namespace torali
     if (vm.count("adaptive-windowing")) c.adaptive = true;
     else c.adaptive = false;
 
-    // CNV mode
-    if (vm.count("panelfile")) c.hasPanelFile = true;
-    else c.hasPanelFile = false;
-    
+    // Segmentation
+    if (vm.count("segmentation")) c.segmentation = true;
+    else c.segmentation = false;
+
     // Check window size
     if (c.window_offset > c.window_size) c.window_offset = c.window_size;
     if (c.window_size == 0) c.window_size = 1;
     if (c.window_offset == 0) c.window_offset = 1;
 
+    // Check input VCF file (CNV genotyping)
+    if (vm.count("vcffile")) {
+      if (!(boost::filesystem::exists(c.genofile) && boost::filesystem::is_regular_file(c.genofile) && boost::filesystem::file_size(c.genofile))) {
+	std::cerr << "Input VCF/BCF file is missing: " << c.genofile.string() << std::endl;
+	return 1;
+      }
+      htsFile* ifile = bcf_open(c.genofile.string().c_str(), "r");
+      if (ifile == NULL) {
+	std::cerr << "Fail to open file " << c.genofile.string() << std::endl;
+	return 1;
+      }
+      bcf_hdr_t* hdr = bcf_hdr_read(ifile);
+      if (hdr == NULL) {
+	std::cerr << "Fail to open index file " << c.genofile.string() << std::endl;
+	return 1;
+      }
+      bcf_hdr_destroy(hdr);
+      bcf_close(ifile);
+      c.hasGenoFile = true;
+    } else c.hasGenoFile = false;
+
+    // Check input VCF file (delly SV file)
+    if (vm.count("svfile")) {
+      if (!(boost::filesystem::exists(c.vcffile) && boost::filesystem::is_regular_file(c.vcffile) && boost::filesystem::file_size(c.vcffile))) {
+	std::cerr << "Input VCF/BCF file is missing: " << c.vcffile.string() << std::endl;
+	return 1;
+      }
+      htsFile* ifile = bcf_open(c.vcffile.string().c_str(), "r");
+      if (ifile == NULL) {
+	std::cerr << "Fail to open file " << c.vcffile.string() << std::endl;
+	return 1;
+      }
+      bcf_hdr_t* hdr = bcf_hdr_read(ifile);
+      if (hdr == NULL) {
+	std::cerr << "Fail to open index file " << c.vcffile.string() << std::endl;
+	return 1;
+      }
+      bcf_hdr_destroy(hdr);
+      bcf_close(ifile);
+      c.hasVcfFile = true;
+    } else c.hasVcfFile = false;
+    
     // Check bam file
     LibraryInfo li;
     if (!(boost::filesystem::exists(c.bamFile) && boost::filesystem::is_regular_file(c.bamFile) && boost::filesystem::file_size(c.bamFile))) {


=====================================
src/delly.cpp
=====================================
@@ -16,6 +16,7 @@
 #include "version.h"
 #include "delly.h"
 #include "filter.h"
+#include "classify.h"
 #include "merge.h"
 #include "tegua.h"
 #include "coral.h"
@@ -27,16 +28,17 @@ inline void
 displayUsage() {
   std::cout << "Usage: delly <command> <arguments>" << std::endl;
   std::cout << std::endl;
-  std::cout << "Short-read commands:" << std::endl;
+  std::cout << "Short-read SV calling:" << std::endl;
   std::cout << "    call         discover and genotype structural variants" << std::endl;
   std::cout << "    merge        merge structural variants across VCF/BCF files and within a single VCF/BCF file" << std::endl;
   std::cout << "    filter       filter somatic or germline structural variants" << std::endl;
   std::cout << std::endl;
-  std::cout << "Long-read commands:" << std::endl;
+  std::cout << "Long-read SV calling:" << std::endl;
   std::cout << "    lr           long-read SV discovery" << std::endl;
   std::cout << std::endl;
-  std::cout << "Read-depth commands:" << std::endl;
-  std::cout << "    rd           read-depth normalization" << std::endl;
+  std::cout << "Copy-number variant calling:" << std::endl;
+  std::cout << "    cnv          discover and genotype copy-number variants" << std::endl;
+  std::cout << "    classify     classify somatic or germline copy-number variants" << std::endl;
   std::cout << std::endl;
   std::cout << std::endl;
 }
@@ -73,9 +75,12 @@ int main(int argc, char **argv) {
     else if ((std::string(argv[1]) == "lr")) {
       return tegua(argc-1,argv+1);
     }
-    else if ((std::string(argv[1]) == "rd")) {
+    else if ((std::string(argv[1]) == "cnv")) {
       return coral(argc-1,argv+1);
     }
+    else if ((std::string(argv[1]) == "classify")) {
+      return classify(argc-1,argv+1);
+    }
     else if ((std::string(argv[1]) == "filter")) {
       return filter(argc-1,argv+1);
     }


=====================================
src/delly.h
=====================================
@@ -50,6 +50,7 @@ namespace torali
 
   // Config arguments
   struct Config {
+    bool islr;
     uint16_t minMapQual;
     uint16_t minTraQual;
     uint16_t minGenoQual;
@@ -200,7 +201,8 @@ namespace torali
     Config c;
     c.isHaplotagged = false;
     c.madNormalCutoff = 5;
-
+    c.islr = false;
+    
     // Define generic options
     std::string svtype;
     boost::program_options::options_description generic("Generic options");


=====================================
src/genotype.h
=====================================
@@ -31,11 +31,71 @@ namespace torali
 
     Geno() : svStartPrefix(-1), svStartSuffix(-1), svEndPrefix(-1), svEndSuffix(-1), svStart(-1), svEnd(-1), svt(-1) {}
   };
-  
 
-  template<typename TConfig, typename TSRStore, typename TJunctionMap, typename TReadCountMap>
+
+  inline float
+  percentIdentity(std::string const& s1, std::string const& s2, int32_t splitpos, int32_t window) {
+    // Get window boundaries
+    int32_t ws = std::max(splitpos - window, 0);
+    int32_t we = std::min(splitpos + window, (int32_t) s1.size());
+    // Find percent identity
+    bool varSeen = false;
+    bool refSeen = false;
+    int32_t refpos = 0;
+    uint32_t gapMM = 0;
+    uint32_t mm = 0;
+    uint32_t ma = 0;
+    float leftPerc = -1;
+    float rightPerc = -1;
+    bool inGap=false;
+    for(uint32_t j = 0; j < s1.size(); ++j) {
+      if (s2[j] != '-') varSeen = true;
+      if (s1[j] != '-') {
+	refSeen = true;
+	if ((refpos == splitpos) || (refpos == ws) || (refpos == we)) {
+	  if (refpos == splitpos) {
+	    leftPerc = 0;
+	    if (ma + mm > 0) leftPerc = (float) ma / (float) (ma + mm);
+	  }
+	  if (refpos == we) {
+	    rightPerc = 0;
+	    if (ma + mm > 0) rightPerc = (float) ma / (float) (ma + mm);
+	  }
+	  mm = 0;
+	  ma = 0;
+	  gapMM = 0;
+	}
+	++refpos;
+      }
+      if ((refSeen) && (varSeen)) {
+	// Internal gap?
+	if ((s2[j] == '-') || (s1[j] == '-')) {
+	  if (!inGap) {
+	    inGap = true;
+	    gapMM = 0;
+	  }
+	  gapMM += 1;
+	} else {
+	  if (inGap) {
+	    mm += gapMM;
+	    inGap=false;
+	  }
+	  if (s2[j] == s1[j]) ma += 1;
+	  else mm += 1;
+	}
+      }
+    }
+    if (rightPerc == -1) {
+      rightPerc = 0;
+      if (ma + mm > 0) rightPerc = (float) ma / (float) (ma + mm);
+    }
+    //std::cerr << ws << ',' << splitpos << ',' << we << ',' << leftPerc << ',' << rightPerc << std::endl;
+    return std::min(leftPerc, rightPerc); 
+  }
+
+  template<typename TConfig, typename TJunctionMap, typename TReadCountMap>
   inline void
-  trackRef(TConfig& c, std::vector<StructuralVariantRecord>& svs, TSRStore& srStore, TJunctionMap& jctMap, TReadCountMap& covMap) {
+  trackRef(TConfig& c, std::vector<StructuralVariantRecord>& svs, TJunctionMap& jctMap, TReadCountMap& covMap) {
     typedef std::vector<StructuralVariantRecord> TSVs;
     typedef std::vector<uint8_t> TQuality;
     typedef boost::multi_array<char, 2> TAlign;
@@ -113,7 +173,8 @@ namespace torali
       // Iterate all structural variants
       for(typename TSVs::iterator itSV = svs.begin(); itSV != svs.end(); ++itSV) {
 	if ((itSV->chr != refIndex) && (itSV->chr2 != refIndex)) continue;
-
+	if ((itSV->svt != 2) && (itSV->svt != 4)) continue;
+	
 	// Lazy loading of reference sequence
 	if (seq == NULL) {
 	  int32_t seqlen = -1;
@@ -173,14 +234,14 @@ namespace torali
 	  //std::cerr << "svid:" << itSV->id << ",consensus-to-reference-alignment" << std::endl;
 	  //for(uint32_t i = 0; i<align.shape()[0]; ++i) {
 	  //if (i == 0) {
-	  //  int32_t cpos = 0;
-	  //  for(uint32_t j = 0; j<align.shape()[1]; ++j) {
-	  //	if (align[i][j] != '-') ++cpos;
-	  //	if (cpos == ad.cStart) std::cerr << '|';
-	  //	else if (cpos == ad.cEnd) std::cerr << '|';
-	  //	else std::cerr << '#';
-	  //  }
-	  //  std::cerr << std::endl;
+	  //int32_t cpos = 0;
+	  //for(uint32_t j = 0; j<align.shape()[1]; ++j) {
+	  //if (align[i][j] != '-') ++cpos;
+	  //if (cpos == ad.cStart) std::cerr << '|';
+	  //else if (cpos == ad.cEnd) std::cerr << '|';
+	  //else std::cerr << '#';
+	  //}
+	  //std::cerr << std::endl;
 	  //}
 	  //for(uint32_t j = 0; j<align.shape()[1]; ++j) std::cerr << align[i][j];
 	  //std::cerr << std::endl;
@@ -265,18 +326,6 @@ namespace torali
 	  typedef std::map<int32_t, TRefSeq> TSVSeqHit;
 	  TSVSeqHit genoMap;
 
-	  // Any direct SV support
-	  /*
-	  std::size_t seed = hash_lr(rec);
-	  if (srStore.find(seed) != srStore.end()) {
-	    for(uint32_t ri = 0; ri < srStore[seed].size(); ++ri) {
-	      int32_t svid = srStore[seed][ri].svid;
-	      if (gbp[svid].left) genoMap.insert(std::make_pair(srStore[seed][ri].svid, srStore[seed][ri].sstart));
-	      else genoMap.insert(std::make_pair(srStore[seed][ri].svid, srStore[seed][ri].sstart + srStore[seed][ri].inslen));
-	    }
-	  }
-	  */
-	  
 	  // Parse the CIGAR
 	  uint32_t* cigar = bam_get_cigar(rec);
 	  for (std::size_t i = 0; i < rec->core.n_cigar; ++i) {
@@ -541,6 +590,280 @@ namespace torali
       sam_close(samfile[file_c]);
     }
   }
+
+  template<typename TConfig, typename TSRStore, typename TJunctionMap, typename TReadCountMap>
+  inline void
+  genotypeLR(TConfig& c, std::vector<StructuralVariantRecord>& svs, TSRStore& srStore, TJunctionMap& jctMap, TReadCountMap& covMap) {
+    typedef std::vector<StructuralVariantRecord> TSVs;
+    if (svs.empty()) return;
+    
+    typedef uint16_t TMaxCoverage;
+    uint32_t maxCoverage = std::numeric_limits<TMaxCoverage>::max();
+    
+    // Open file handles
+    typedef std::vector<samFile*> TSamFile;
+    typedef std::vector<hts_idx_t*> TIndex;
+    typedef std::vector<bam_hdr_t*> THeader;
+    TSamFile samfile(c.files.size());
+    TIndex idx(c.files.size());
+    THeader hdr(c.files.size());
+    int32_t totalTarget = 0;
+    for(uint32_t file_c = 0; file_c < c.files.size(); ++file_c) {
+      samfile[file_c] = sam_open(c.files[file_c].string().c_str(), "r");
+      hts_set_fai_filename(samfile[file_c], c.genome.string().c_str());
+      idx[file_c] = sam_index_load(samfile[file_c], c.files[file_c].string().c_str());
+      hdr[file_c] = sam_hdr_read(samfile[file_c]);
+      totalTarget += hdr[file_c]->n_targets;
+    }
+
+    // Parse genome chr-by-chr
+    boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+    std::cout << '[' << boost::posix_time::to_simple_string(now) << "] " << "SV annotation" << std::endl;
+    boost::progress_display show_progress( hdr[0]->n_targets );
+
+    // Ref aligned reads
+    typedef std::vector<uint32_t> TRefAlignCount;
+    typedef std::vector<TRefAlignCount> TFileRefAlignCount;
+    TFileRefAlignCount refAlignedReadCount(c.files.size(), TRefAlignCount());
+    for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) refAlignedReadCount[file_c].resize(svs.size(), 0);
+
+    // Dump file
+    boost::iostreams::filtering_ostream dumpOut;
+    if (c.hasDumpFile) {
+      dumpOut.push(boost::iostreams::gzip_compressor());
+      dumpOut.push(boost::iostreams::file_sink(c.dumpfile.string().c_str(), std::ios_base::out | std::ios_base::binary));
+      dumpOut << "#svid\tbam\tqname\tchr\tpos\tmatechr\tmatepos\tmapq\ttype" << std::endl;
+    }
+
+
+    faidx_t* fai = fai_load(c.genome.string().c_str());
+    // Iterate chromosomes
+    for(int32_t refIndex=0; refIndex < (int32_t) hdr[0]->n_targets; ++refIndex) {
+      ++show_progress;
+      char* seq = NULL;
+      
+      // Iterate samples
+      for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
+	// Check we have mapped reads on this chromosome
+	bool nodata = true;
+	std::string suffix("cram");
+	std::string str(c.files[file_c].string());
+	if ((str.size() >= suffix.size()) && (str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0)) nodata = false;
+	uint64_t mapped = 0;
+	uint64_t unmapped = 0;
+	hts_idx_get_stat(idx[file_c], refIndex, &mapped, &unmapped);
+	if (mapped) nodata = false;
+	if (nodata) continue;
+
+	// Flag breakpoints
+	typedef std::set<int32_t> TIdSet;
+	typedef std::map<uint32_t, TIdSet> TBpToIdMap;
+	TBpToIdMap bpid;
+	typedef boost::dynamic_bitset<> TBitSet;
+	TBitSet bpOccupied(hdr[file_c]->target_len[refIndex], false);
+	for(typename TSVs::iterator itSV = svs.begin(); itSV != svs.end(); ++itSV) {
+	  if (itSV->chr == refIndex) {
+	    bpOccupied[itSV->svStart] = 1;
+	    if (bpid.find(itSV->svStart) == bpid.end()) bpid.insert(std::make_pair(itSV->svStart, TIdSet()));
+	    bpid[itSV->svStart].insert(itSV->id);
+	  }
+	  if (itSV->chr2 == refIndex) {
+	    bpOccupied[itSV->svEnd] = 1;
+	    if (bpid.find(itSV->svEnd) == bpid.end()) bpid.insert(std::make_pair(itSV->svEnd, TIdSet()));
+	    bpid[itSV->svEnd].insert(itSV->id);
+	  }
+	}
+	if (bpid.empty()) continue;
+
+	// Lazy loading of reference sequence
+	if (seq == NULL) {
+	  int32_t seqlen = -1;
+	  std::string tname(hdr[0]->target_name[refIndex]);
+	  seq = faidx_fetch_seq(fai, tname.c_str(), 0, hdr[0]->target_len[refIndex], &seqlen);
+	}    
+	
+	// Coverage track
+	typedef std::vector<TMaxCoverage> TBpCoverage;
+	TBpCoverage covBases(hdr[file_c]->target_len[refIndex], 0);
+
+	// Count reads
+	hts_itr_t* iter = sam_itr_queryi(idx[file_c], refIndex, 0, hdr[file_c]->target_len[refIndex]);
+	bam1_t* rec = bam_init1();
+	while (sam_itr_next(samfile[file_c], iter, rec) >= 0) {
+	  // Genotyping only primary alignments
+	  if (rec->core.flag & (BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP | BAM_FSUPPLEMENTARY | BAM_FUNMAP)) continue;
+	  
+	  // Read hash
+	  std::size_t seed = hash_lr(rec);
+
+	  // Reference and sequence pointer
+	  uint32_t rp = rec->core.pos; // reference pointer
+	  uint32_t sp = 0; // sequence pointer
+
+	  // Get sequence
+	  std::string sequence;
+	  sequence.resize(rec->core.l_qseq);
+	  uint8_t* seqptr = bam_get_seq(rec);
+	  for (int i = 0; i < rec->core.l_qseq; ++i) sequence[i] = "=ACMGRSVTWYHKDBN"[bam_seqi(seqptr, i)];
+	  
+	  // Any REF support
+	  std::string refAlign = "";
+	  std::string altAlign = "";
+	  std::vector<uint32_t> hits;
+	  uint32_t* cigar = bam_get_cigar(rec);
+	  for (std::size_t i = 0; i < rec->core.n_cigar; ++i) {
+	    if ((bam_cigar_op(cigar[i]) == BAM_CMATCH) || (bam_cigar_op(cigar[i]) == BAM_CEQUAL) || (bam_cigar_op(cigar[i]) == BAM_CDIFF)) {
+	      // Fetch reference alignments
+	      for(uint32_t k = 0; k < bam_cigar_oplen(cigar[i]); ++k) {
+		if ((rp < hdr[file_c]->target_len[refIndex]) && (covBases[rp] < maxCoverage - 1)) ++covBases[rp];
+		refAlign += seq[rp];
+		altAlign += sequence[sp];
+		if (bpOccupied[rp]) hits.push_back(rp);
+		++sp;
+		++rp;
+	      }
+	    } else if ((bam_cigar_op(cigar[i]) == BAM_CDEL) || (bam_cigar_op(cigar[i]) == BAM_CREF_SKIP)) {
+	      for(uint32_t k = 0; k < bam_cigar_oplen(cigar[i]); ++k) {
+		refAlign += seq[rp];
+		altAlign += "-";
+		if (bpOccupied[rp]) hits.push_back(rp);
+		++rp;
+	      }
+	    } else if (bam_cigar_op(cigar[i]) == BAM_CINS) {
+	      for(uint32_t k = 0; k < bam_cigar_oplen(cigar[i]); ++k) {
+		refAlign += "-";
+		altAlign += sequence[sp];
+		++sp;
+	      }
+	    } else if (bam_cigar_op(cigar[i]) == BAM_CSOFT_CLIP) {
+	      sp += bam_cigar_oplen(cigar[i]);
+	    } else if (bam_cigar_op(cigar[i]) == BAM_CHARD_CLIP) {
+	      // Do nothing
+	    } else {
+	      std::cerr << "Unknown Cigar options" << std::endl;
+	    }
+	  }
+
+	  // Any ALT support?
+	  TIdSet altAssigned;
+	  if (srStore.find(seed) != srStore.end()) {
+	    for(uint32_t ri = 0; ri < srStore[seed].size(); ++ri) {
+	      int32_t svid = srStore[seed][ri].svid;
+	      if (svid == -1) continue;
+	      //if ((svs[svid].svt == 2) || (svs[svid].svt == 4)) continue;
+	      altAssigned.insert(svid);
+	      uint8_t* hpptr = bam_aux_get(rec, "HP");
+	      if (c.hasDumpFile) {
+		std::string svidStr(_addID(svs[svid].svt));
+		std::string padNumber = boost::lexical_cast<std::string>(svid);
+		padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
+		svidStr += padNumber;
+		dumpOut << svidStr << "\t" << c.files[file_c].string() << "\t" << bam_get_qname(rec) << "\t" << hdr[file_c]->target_name[rec->core.tid] << "\t" << rec->core.pos << "\t" << hdr[file_c]->target_name[rec->core.mtid] << "\t" << rec->core.mpos << "\t" << (int32_t) rec->core.qual << "\tSR" << std::endl;
+	      }
+
+
+	      // ToDo
+	      //jctMap[file_c][svid].alt.push_back((uint8_t) std::min((uint32_t) score, (uint32_t) rec->core.qual));
+	      jctMap[file_c][svid].alt.push_back((uint8_t) std::min((uint32_t) 20, (uint32_t) rec->core.qual));
+	      if (hpptr) {
+		c.isHaplotagged = true;
+		int hap = bam_aux2i(hpptr);
+		if (hap == 1) ++jctMap[file_c][svid].alth1;
+		else ++jctMap[file_c][svid].alth2;
+	      }
+	    }
+	  }
+
+	  // Any REF support
+	  if (hits.empty()) continue;
+
+	  // Sufficiently long flank mapping?
+	  if ((rp - rec->core.pos) < c.minimumFlankSize) continue;
+
+	  // Iterate all spanned SVs
+	  for(uint32_t idx = 0; idx < hits.size(); ++idx) {
+	    //std::cerr << hits[idx] - rec->core.pos << ',' << rp - hits[idx] << std::endl;
+	    
+	    // Long enough flanking sequence
+	    if (hits[idx] < rec->core.pos + c.minimumFlankSize) continue;
+	    if (rp < hits[idx] + c.minimumFlankSize) continue;
+
+	    // Confident mapping?
+	    float percid = percentIdentity(refAlign, altAlign, hits[idx] - rec->core.pos, c.minRefSep *  2);
+	    double score = percid * percid * percid * percid * percid * percid * percid * percid * 30;
+	    if (score < c.minGenoQual) continue;
+	    
+	    for(typename TIdSet::const_iterator its = bpid[hits[idx]].begin(); its != bpid[hits[idx]].end(); ++its) {
+	      int32_t svid = *its;
+	      //if ((svs[svid].svt == 2) || (svs[svid].svt == 4)) continue;
+	      if (altAssigned.find(svid) != altAssigned.end()) continue; 
+	      //std::cerr << svs[svid].chr << ',' << svs[svid].svStart << ',' << svs[svid].chr2 << ',' << svs[svid].svEnd << std::endl;
+	      if (++refAlignedReadCount[file_c][svid] % 2) {
+		uint8_t* hpptr = bam_aux_get(rec, "HP");
+		jctMap[file_c][svid].ref.push_back((uint8_t) std::min((uint32_t) score, (uint32_t) rec->core.qual));
+		if (hpptr) {
+		  c.isHaplotagged = true;
+		  int hap = bam_aux2i(hpptr);
+		  if (hap == 1) ++jctMap[file_c][svid].refh1;
+		  else ++jctMap[file_c][svid].refh2;
+		}
+	      }
+	    }
+	  }
+	}
+	// Clean-up
+	bam_destroy1(rec);
+	hts_itr_destroy(iter);
+      
+	// Assign SV support
+	for(uint32_t i = 0; i < svs.size(); ++i) {
+	  if (svs[i].chr == refIndex) {
+	    int32_t halfSize = (svs[i].svEnd - svs[i].svStart)/2;
+	    if ((_translocation(svs[i].svt)) || (svs[i].svt == 4)) halfSize = 500;
+
+	    // Left region
+	    int32_t lstart = std::max(svs[i].svStart - halfSize, 0);
+	    int32_t lend = svs[i].svStart;
+	    int32_t covbase = 0;
+	    for(uint32_t k = lstart; ((k < (uint32_t) lend) && (k < hdr[file_c]->target_len[refIndex])); ++k) covbase += covBases[k];
+	    covMap[file_c][svs[i].id].leftRC = covbase;
+
+	    // Actual SV
+	    covbase = 0;
+	    int32_t mstart = svs[i].svStart;
+	    int32_t mend = svs[i].svEnd;
+	    if ((_translocation(svs[i].svt)) || (svs[i].svt == 4)) {
+	      mstart = std::max(svs[i].svStart - halfSize, 0);
+	      mend = std::min(svs[i].svStart + halfSize, (int32_t) hdr[file_c]->target_len[refIndex]);
+	    }
+	    for(uint32_t k = mstart; ((k < (uint32_t) mend) && (k < hdr[file_c]->target_len[refIndex])); ++k) covbase += covBases[k];
+	    covMap[file_c][svs[i].id].rc = covbase;
+
+	    // Right region
+	    covbase = 0;
+	    int32_t rstart = svs[i].svEnd;
+	    int32_t rend = std::min(svs[i].svEnd + halfSize, (int32_t) hdr[file_c]->target_len[refIndex]);
+	    if ((_translocation(svs[i].svt)) || (svs[i].svt == 4)) {
+	      rstart = svs[i].svStart;
+	      rend = std::min(svs[i].svStart + halfSize, (int32_t) hdr[file_c]->target_len[refIndex]);
+	    }
+	    for(uint32_t k = rstart; ((k < (uint32_t) rend) && (k < hdr[file_c]->target_len[refIndex])); ++k) covbase += covBases[k];
+	    covMap[file_c][svs[i].id].rightRC = covbase;
+	  }
+	}
+      }
+      if (seq != NULL) free(seq);
+    }
+    // Clean-up
+    fai_destroy(fai);
+
+    // Clean-up
+    for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
+      bam_hdr_destroy(hdr[file_c]);	  
+      hts_idx_destroy(idx[file_c]);
+      sam_close(samfile[file_c]);
+    }
+  }
      
   
 }


=====================================
src/junction.h
=====================================
@@ -248,10 +248,10 @@ namespace torali
 		// Same direction, opposing soft-clips
 		if (it->second[chr1ev].scleft != it->second[chr2ev].scleft) {
 		  if (it->second[chr1ev].scleft) {
-		    // 5to3
+		    // 3to5
 		    br[DELLY_SVT_TRANS + 2].push_back(SRBamRecord(it->second[chr2ev].refidx, it->second[chr2ev].refpos, it->second[chr1ev].refidx, it->second[chr1ev].refpos, rst, std::min(it->second[j].seqpos, it->second[i].seqpos), qval, std::abs(it->second[j].seqpos - it->second[i].seqpos), it->first));
 		  } else {
-		    // 3to5
+		    // 5to3
 		    br[DELLY_SVT_TRANS + 3].push_back(SRBamRecord(it->second[chr2ev].refidx, it->second[chr2ev].refpos, it->second[chr1ev].refidx, it->second[chr1ev].refpos, rst, std::min(it->second[j].seqpos, it->second[i].seqpos), qval, std::abs(it->second[j].seqpos - it->second[i].seqpos), it->first));
 		  }
 		}
@@ -259,10 +259,10 @@ namespace torali
 		// Opposing direction, same soft-clips
 		if (it->second[chr1ev].scleft == it->second[chr2ev].scleft) {
 		  if (it->second[chr1ev].scleft) {
-		    // 3to3
+		    // 5to5
 		    br[DELLY_SVT_TRANS + 1].push_back(SRBamRecord(it->second[chr2ev].refidx, it->second[chr2ev].refpos, it->second[chr1ev].refidx, it->second[chr1ev].refpos, rst, std::min(it->second[j].seqpos, it->second[i].seqpos), qval, std::abs(it->second[j].seqpos - it->second[i].seqpos), it->first));
 		  } else {
-		    // 5to5
+		    // 3to3
 		    br[DELLY_SVT_TRANS + 0].push_back(SRBamRecord(it->second[chr2ev].refidx, it->second[chr2ev].refpos, it->second[chr1ev].refidx, it->second[chr1ev].refpos, rst, std::min(it->second[j].seqpos, it->second[i].seqpos), qval, std::abs(it->second[j].seqpos - it->second[i].seqpos), it->first));
 		  }
 		}
@@ -427,10 +427,10 @@ namespace torali
   fetchSVs(TConfig const& c, TReadBp& readBp, std::vector<std::vector<SRBamRecord> >& br) {
     // Extract BAM records
     if ((!c.svtcmd) || (c.svtset.find(2) != c.svtset.end())) selectDeletions(c, readBp, br);
-    //if ((!c.svtcmd) || (c.svtset.find(3) != c.svtset.end())) selectDuplications(c, readBp, br);
-    //if ((!c.svtcmd) || (c.svtset.find(0) != c.svtset.end()) || (c.svtset.find(1) != c.svtset.end())) selectInversions(c, readBp, br);
+    if ((!c.svtcmd) || (c.svtset.find(3) != c.svtset.end())) selectDuplications(c, readBp, br);
+    if ((!c.svtcmd) || (c.svtset.find(0) != c.svtset.end()) || (c.svtset.find(1) != c.svtset.end())) selectInversions(c, readBp, br);
     if ((!c.svtcmd) || (c.svtset.find(4) != c.svtset.end())) selectInsertions(c, readBp, br);
-    //if ((!c.svtcmd) || (c.svtset.find(5) != c.svtset.end()) || (c.svtset.find(6) != c.svtset.end()) || (c.svtset.find(7) != c.svtset.end()) || (c.svtset.find(8) != c.svtset.end())) selectTranslocations(c, readBp, br);
+    if ((!c.svtcmd) || (c.svtset.find(5) != c.svtset.end()) || (c.svtset.find(6) != c.svtset.end()) || (c.svtset.find(7) != c.svtset.end()) || (c.svtset.find(8) != c.svtset.end())) selectTranslocations(c, readBp, br);
   }
 
   template<typename TConfig, typename TValidRegions, typename TSvtSRBamRecord>
@@ -457,7 +457,7 @@ namespace torali
 	 	 
     // Debug
     //outputSRBamRecords(c, srBR);
-    
+
     // Cluster BAM records
     for(uint32_t svt = 0; svt < srBR.size(); ++svt) {
       if (srBR[svt].empty()) continue;
@@ -491,12 +491,12 @@ namespace torali
     bam_hdr_t* hdr = sam_hdr_read(samfile);
 
     // Header
-    std::cerr << "chr1\tpos1\tchr2\tpos2\tsvtype\tct\tinslen" << std::endl;
+    std::cerr << "id\tchr1\tpos1\tchr2\tpos2\tsvtype\tct\tinslen" << std::endl;
 
     // SVs
     for(uint32_t svt = 0; svt < br.size(); ++svt) {
       for(uint32_t i = 0; i < br[svt].size(); ++i) {
-	std::cerr << hdr->target_name[br[svt][i].chr] << '\t' << br[svt][i].pos << '\t' << hdr->target_name[br[svt][i].chr2] << '\t' << br[svt][i].pos2 << '\t' << _addID(svt) << '\t' << _addOrientation(svt) << '\t' << br[svt][i].inslen << std::endl;
+	std::cerr << br[svt][i].id << '\t' << hdr->target_name[br[svt][i].chr] << '\t' << br[svt][i].pos << '\t' << hdr->target_name[br[svt][i].chr2] << '\t' << br[svt][i].pos2 << '\t' << _addID(svt) << '\t' << _addOrientation(svt) << '\t' << br[svt][i].inslen << std::endl;
       }
     }
     // Clean-up
@@ -528,7 +528,7 @@ namespace torali
 	hts_itr_t* iter = sam_itr_queryi(idx[file_c], refIndex, 0, hdr->target_len[refIndex]);
 	bam1_t* rec = bam_init1();
 	while (sam_itr_next(samfile[file_c], iter, rec) >= 0) {
-	  if (rec->core.flag & (BAM_FQCFAIL | BAM_FDUP | BAM_FUNMAP | BAM_FSECONDARY | BAM_FSUPPLEMENTARY)) continue;
+	  if (rec->core.flag & (BAM_FQCFAIL | BAM_FDUP | BAM_FUNMAP)) continue;
 	  std::size_t seed = hash_lr(rec);
 	  std::string qname = bam_get_qname(rec);
 	  if (hm.find(seed) == hm.end()) hm.insert(std::make_pair(seed, qname));
@@ -578,18 +578,24 @@ namespace torali
     bam_hdr_t* hdr = sam_hdr_read(samfile);
 
     // Header
-    std::cerr << "chr1\tpos1\tchr2\tpos2\tsvtype\tct\tpeSupport\tsrSupport" << std::endl;
+    std::cerr << "chr1\tpos1\tchr2\tpos2\tsvtype\tct\tpeSupport\tsrSupport\tconsensus" << std::endl;
     
     // SVs
     for(uint32_t i = 0; i < svs.size(); ++i) {
       if (svs[i].svt != svt) continue;
-      std::cerr << hdr->target_name[svs[i].chr] << '\t' << svs[i].svStart << '\t' << hdr->target_name[svs[i].chr2] << '\t' << svs[i].svEnd << '\t' << _addID(svs[i].svt) << '\t' << _addOrientation(svs[i].svt) << '\t' << svs[i].peSupport << '\t' << svs[i].srSupport << std::endl;
+      std::cerr << hdr->target_name[svs[i].chr] << '\t' << svs[i].svStart << '\t' << hdr->target_name[svs[i].chr2] << '\t' << svs[i].svEnd << '\t' << _addID(svs[i].svt) << '\t' << _addOrientation(svs[i].svt) << '\t' << svs[i].peSupport << '\t' << svs[i].srSupport << '\t' << svs[i].consensus << std::endl;
     }
     
     // Clean-up
     bam_hdr_destroy(hdr);
     sam_close(samfile);
   }
+
+  template<typename TConfig>
+  inline void
+  outputStructuralVariants(TConfig const& c, std::vector<StructuralVariantRecord> const& svs) {
+    for(uint32_t i = 0; i <= 8; ++i) outputStructuralVariants(c, svs, i);
+  }
   
 
 }


=====================================
src/merge.h
=====================================
@@ -41,6 +41,7 @@ namespace torali
 struct MergeConfig {
   bool filterForPass;
   bool filterForPrecise;
+  bool cnvMode;
   uint32_t chunksize;
   uint32_t svcounter;
   uint32_t bpoffset;
@@ -117,7 +118,10 @@ void _fillIntervalMap(MergeConfig const& c, TGenomeIntervals& iScore, TContigMap
 
       // Correct SV type
       int32_t recsvt = -1;
-      if ((bcf_get_info_string(hdr, rec, "SVTYPE", &svt, &nsvt) > 0) && (bcf_get_info_string(hdr, rec, "CT", &ct, &nct) > 0)) recsvt = _decodeOrientation(std::string(ct), std::string(svt));
+      if (bcf_get_info_string(hdr, rec, "SVTYPE", &svt, &nsvt) > 0) {
+	if (bcf_get_info_string(hdr, rec, "CT", &ct, &nct) > 0) recsvt = _decodeOrientation(std::string(ct), std::string(svt));
+	else recsvt = _decodeOrientation(std::string("NA"), std::string(svt));
+      }
       if (recsvt != svtin) continue;
 
       // Correct size?
@@ -183,9 +187,12 @@ void _fillIntervalMap(MergeConfig const& c, TGenomeIntervals& iScore, TContigMap
 	if (rv != NULL) free(rv);
 	if (rr != NULL) free(rr);
 	if (gt != NULL) free(gt);
-	if ((maxvaf < c.vaf) || (maxcov < c.coverage)) continue;
+	if (recsvt != 9) {
+	  if ((maxvaf < c.vaf) || (maxcov < c.coverage)) continue;
+	}
       }
       // Store the interval
+      //std::cerr << tid << ',' << svStart << ',' << svEnd << ',' << rec->qual << std::endl;
       iScore[tid].push_back(IntervalScore(svStart, svEnd, rec->qual));
     }
     if (svend != NULL) free(svend);
@@ -548,6 +555,216 @@ void _outputSelectedIntervals(MergeConfig& c, TGenomeIntervals const& iSelected,
   bcf_index_build(c.outfile.string().c_str(), 14);
 }
 
+
+
+  template<typename TGenomeIntervals, typename TContigMap>
+  void _outputSelectedIntervalsCNVs(MergeConfig& c, TGenomeIntervals const& iSelected, TContigMap& cMap) {
+    typedef typename TGenomeIntervals::value_type TIntervalScores;
+    typedef typename TIntervalScores::value_type IntervalScore;
+
+    boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
+    std::cout << '[' << boost::posix_time::to_simple_string(now) << "] " << "Filtering SVs" << std::endl;
+
+    // Open output VCF file
+    htsFile *fp = hts_open(c.outfile.string().c_str(), "wb");
+    bcf_hdr_t *hdr_out = bcf_hdr_init("w");
+
+    // Write VCF header
+    boost::gregorian::date today = now.date();
+    std::string datestr("##fileDate=");
+    datestr += boost::gregorian::to_iso_string(today);
+    bcf_hdr_append(hdr_out, datestr.c_str());
+    bcf_hdr_append(hdr_out, "##ALT=<ID=CNV,Description=\"copy-number variants\">");
+    bcf_hdr_append(hdr_out, "##FILTER=<ID=LowQual,Description=\"Poor quality copy-number variant\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the copy-number variant\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=MP,Number=1,Type=Float,Description=\"Mappable fraction of CNV\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise copy-number variant\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">");
+    bcf_hdr_append(hdr_out, "##INFO=<ID=SVMETHOD,Number=1,Type=String,Description=\"Type of approach used to detect CNV\">");
+  
+    // Add reference contigs
+    uint32_t numseq = 0;
+    typedef std::map<uint32_t, std::string> TReverseMap;
+    TReverseMap rMap;
+    for(typename TContigMap::iterator cIt = cMap.begin(); cIt != cMap.end(); ++cIt, ++numseq) rMap[cIt->second] = cIt->first;
+    for(typename TReverseMap::iterator rIt = rMap.begin(); rIt != rMap.end(); ++rIt) {
+      std::string refname("##contig=<ID=");
+      refname += rIt->second + ">";
+      bcf_hdr_append(hdr_out, refname.c_str());
+    }
+    bcf_hdr_add_sample(hdr_out, NULL);
+    if (bcf_hdr_write(fp, hdr_out) != 0) std::cerr << "Error: Failed to write BCF header!" << std::endl;
+
+    // Duplicate filter (identical start, end, score values)
+    typedef std::pair<uint32_t, uint32_t> TStartEnd;
+    typedef std::set<TStartEnd> TIntervalSet;
+    typedef std::vector<TIntervalSet> TGenomicIntervalSet;
+    TGenomicIntervalSet gis(numseq);
+
+    // Parse input VCF files
+    bcf1_t *rout = bcf_init();
+    typedef std::vector<htsFile*> THtsFile;
+    typedef std::vector<bcf_hdr_t*> TBcfHeader;
+    typedef std::vector<bcf1_t*> TBcfRecord;
+    typedef std::vector<bool> TEof;
+    THtsFile ifile(c.files.size());
+    TBcfHeader hdr(c.files.size());
+    TBcfRecord rec(c.files.size());
+    TEof eof(c.files.size());
+    uint32_t allEOF = 0;
+    for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
+      ifile[file_c] = bcf_open(c.files[file_c].string().c_str(), "r");
+      hdr[file_c] = bcf_hdr_read(ifile[file_c]);
+      if (bcf_hdr_set_samples(hdr[file_c], NULL, false) != 0) std::cerr << "Error: Failed to set sample information!" << std::endl;
+      rec[file_c] = bcf_init();
+      if (bcf_read(ifile[file_c], hdr[file_c], rec[file_c]) == 0) {
+	bcf_unpack(rec[file_c], BCF_UN_INFO);
+	eof[file_c] = false;
+      } else {
+	++allEOF;
+	eof[file_c] = true;
+      }
+    }
+
+    int32_t nsvend = 0;
+    int32_t* svend = NULL;
+    int32_t nmp = 0;
+    float* mp = NULL;
+    int32_t nsvt = 0;
+    char* svt = NULL;
+    int32_t ncipos = 0;
+    int32_t* cipos = NULL;
+    int32_t nciend = 0;
+    int32_t* ciend = NULL;
+    while (allEOF < c.files.size()) {
+      // Find next sorted record
+      int32_t idx = -1;
+      for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
+	if (!eof[file_c]) {
+	  if ((idx < 0) || (rec[idx]->rid > rec[file_c]->rid) || ((rec[idx]->rid == rec[file_c]->rid) && (rec[idx]->pos > rec[file_c]->pos))) idx = file_c;
+	}
+      }
+
+      // Correct SV type
+      int32_t recsvt = -1;
+      if (bcf_get_info_string(hdr[idx], rec[idx], "SVTYPE", &svt, &nsvt) > 0) recsvt = _decodeOrientation(std::string("NA"), std::string(svt));
+      // CNV ?
+      if (recsvt == 9) {
+	// Check PASS
+	bool pass = true;
+	if (c.filterForPass) pass = (bcf_has_filter(hdr[idx], rec[idx], const_cast<char*>("PASS"))==1);
+
+	// Check PRECISE
+	bool precise = false;
+	bool passPrecise = true;
+	if (bcf_get_info_flag(hdr[idx], rec[idx], "PRECISE", 0, 0) > 0) precise=true;
+	if ((c.filterForPrecise) && (!precise)) passPrecise = false;
+
+	// Check PASS and precise
+	if ((passPrecise) && (pass)) {
+	  // Correct size
+	  std::string chrName(bcf_hdr_id2name(hdr[idx], rec[idx]->rid));
+	  uint32_t tid = cMap[chrName];
+	  uint32_t svStart = rec[idx]->pos;
+	  uint32_t svEnd = svStart + 1;
+	  if (bcf_get_info_int32(hdr[idx], rec[idx], "END", &svend, &nsvend) > 0) svEnd = *svend;
+	  
+	  // Parse INFO fields
+	  if ((svEnd - svStart >= c.minsize) && (svEnd - svStart <= c.maxsize)) {
+	    int32_t score = rec[idx]->qual;
+	  
+	    // Is this a selected interval
+	    typename TIntervalScores::const_iterator iter = std::lower_bound(iSelected[tid].begin(), iSelected[tid].end(), IntervalScore(svStart, svEnd, score), SortIScores<IntervalScore>());
+	    bool foundInterval = false;
+	    for(; (iter != iSelected[tid].end()) && (iter->start == svStart); ++iter) {
+	      if ((iter->start == svStart) && (iter->end == svEnd) && (iter->score == score)) {
+		// Duplicate?
+		if (gis[tid].find(std::make_pair(svStart, svEnd)) == gis[tid].end()) {
+		  foundInterval = true;
+		  gis[tid].insert(std::make_pair(svStart, svEnd));
+		}
+		break;
+	      }
+	    }
+	    if (foundInterval) {
+	      // Fetch missing INFO fields
+	      bcf_get_info_int32(hdr[idx], rec[idx], "CIPOS", &cipos, &ncipos);
+	      bcf_get_info_int32(hdr[idx], rec[idx], "CIEND", &ciend, &nciend);
+	      float mpval = 0;
+	      if (bcf_get_info_float(hdr[idx], rec[idx], "MP", &mp, &nmp) > 0) mpval = *mp;
+	      
+	      // Create new record
+	      rout->rid = bcf_hdr_name2id(hdr_out, chrName.c_str());
+	      rout->pos = rec[idx]->pos;
+	      rout->qual = rec[idx]->qual;
+	      std::string id;
+	      if (c.files.size() == 1) id = std::string(rec[idx]->d.id); // Within one VCF file IDs are unique
+	      else {
+		id += _addID(recsvt);
+		std::string padNumber = boost::lexical_cast<std::string>(c.svcounter++);
+		padNumber.insert(padNumber.begin(), 8 - padNumber.length(), '0');
+		id += padNumber;
+	      }
+	      bcf_update_id(hdr_out, rout, id.c_str());
+	      std::string refAllele = rec[idx]->d.allele[0];
+	      std::string altAllele = rec[idx]->d.allele[1];
+	      std::string alleles = refAllele + "," + altAllele;
+	      bcf_update_alleles_str(hdr_out, rout, alleles.c_str());
+	      int32_t tmppass = bcf_hdr_id2int(hdr_out, BCF_DT_ID, "PASS");
+	      bcf_update_filter(hdr_out, rout, &tmppass, 1);
+	    
+	      // Add INFO fields
+	      if (precise) bcf_update_info_flag(hdr_out, rout, "PRECISE", NULL, 1);
+	      else bcf_update_info_flag(hdr_out, rout, "IMPRECISE", NULL, 1);
+	      bcf_update_info_string(hdr_out, rout, "SVTYPE", _addID(recsvt).c_str());
+	      std::string dellyVersion("EMBL.DELLYv");
+	      dellyVersion += dellyVersionNumber;
+	      bcf_update_info_string(hdr_out,rout, "SVMETHOD", dellyVersion.c_str());
+	      bcf_update_info_int32(hdr_out, rout, "END", &svEnd, 1);
+	      bcf_update_info_int32(hdr_out, rout, "CIPOS", cipos, 2);
+	      bcf_update_info_int32(hdr_out, rout, "CIEND", ciend, 2);
+	      bcf_update_info_float(hdr_out, rout, "MP", &mpval, 1);
+
+	      // Write record
+	      bcf_write1(fp, hdr_out, rout);
+	      bcf_clear1(rout);	  
+	      //std::cerr << bcf_hdr_id2name(hdr[idx], tid) << '\t' << svStart << '\t' << svEnd << std::endl;
+	    }
+	  }
+	}
+      }
+
+      // Fetch next record
+      if (bcf_read(ifile[idx], hdr[idx], rec[idx]) == 0) bcf_unpack(rec[idx], BCF_UN_INFO);
+      else {
+	++allEOF;
+	eof[idx] = true;
+      }
+    }
+    if (svend != NULL) free(svend);
+    if (mp != NULL) free(mp);
+    if (svt != NULL) free(svt);
+    if (cipos != NULL) free(cipos);
+    if (ciend != NULL) free(ciend);
+
+    // Clean-up
+    for(unsigned int file_c = 0; file_c < c.files.size(); ++file_c) {
+      bcf_hdr_destroy(hdr[file_c]);
+      bcf_close(ifile[file_c]);
+      bcf_destroy(rec[file_c]);
+    }
+
+    // Close VCF file
+    bcf_destroy(rout);
+    bcf_hdr_destroy(hdr_out);
+    hts_close(fp);
+
+    // Build index
+    bcf_index_build(c.outfile.string().c_str(), 14);
+  }
+
 inline void
 mergeBCFs(MergeConfig& c, std::vector<boost::filesystem::path> const& cts) {
   boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
@@ -660,7 +877,8 @@ mergeRun(MergeConfig& c, int32_t const svt) {
   for(uint32_t i = 0; i<numseq; ++i) std::sort(iSelected[i].begin(), iSelected[i].end(), SortIScores<IntervalScore>());
 
   // Output best intervals
-  _outputSelectedIntervals(c, iSelected, contigMap, svt);
+  if (svt == 9) _outputSelectedIntervalsCNVs(c, iSelected, contigMap);
+  else _outputSelectedIntervals(c, iSelected, contigMap, svt);
 
   // End
   boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
@@ -683,6 +901,7 @@ int merge(int argc, char **argv) {
     ("coverage,v", boost::program_options::value<uint32_t>(&c.coverage)->default_value(10), "min. coverage")
     ("minsize,m", boost::program_options::value<uint32_t>(&c.minsize)->default_value(0), "min. SV size")
     ("maxsize,n", boost::program_options::value<uint32_t>(&c.maxsize)->default_value(1000000), "max. SV size")
+    ("cnvmode,e", "Merge delly CNV files")
     ("precise,c", "Filter sites for PRECISE")
     ("pass,p", "Filter sites for PASS")
     ;
@@ -728,6 +947,14 @@ int merge(int argc, char **argv) {
   if (vm.count("precise")) c.filterForPrecise = true;
   else c.filterForPrecise = false;
 
+  // Merge CNVs
+  if (vm.count("cnvmode")) c.cnvMode = true;
+  else c.cnvMode = false;
+
+  // Check output files
+  if (!_outfileValid(c.outfile)) return 1;
+  if (!_outfileValid(boost::filesystem::path(c.outfile.string() + ".csi"))) return 1;
+
   // Show cmd
   boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
   std::cout << '[' << boost::posix_time::to_simple_string(now) << "] ";
@@ -797,9 +1024,15 @@ int merge(int argc, char **argv) {
   }
   
   // Run merging
+  int32_t minSVT = 0;
+  int32_t maxSVT = 9;
+  if (c.cnvMode) {
+    minSVT = 9;
+    maxSVT = 10;
+  }
   boost::filesystem::path oldPath = c.outfile;
-  std::vector<boost::filesystem::path> svtCollect(9);
-  for(int32_t svt = 0; svt < 9; ++svt) {
+  std::vector<boost::filesystem::path> svtCollect(maxSVT);
+  for(int32_t svt = minSVT; svt < maxSVT; ++svt) {
     boost::uuids::uuid uuid = boost::uuids::random_generator()();
     std::string filename = "svt" + boost::lexical_cast<std::string>(svt) + "_" + boost::lexical_cast<std::string>(uuid) + ".bcf";
     svtCollect[svt] = filename;
@@ -843,10 +1076,19 @@ int merge(int argc, char **argv) {
   
   // Merge temporary files
   c.outfile = oldPath;
-  mergeBCFs(c, svtCollect);
-  for(int32_t svt = 0; svt < 9; ++svt) {
-    boost::filesystem::remove(svtCollect[svt]);
-    boost::filesystem::remove(boost::filesystem::path(svtCollect[svt].string() + ".csi"));
+  if (c.cnvMode) {
+    // Copy
+    boost::filesystem::copy_file(svtCollect[9], c.outfile);
+    boost::filesystem::copy_file(boost::filesystem::path(svtCollect[9].string() + ".csi"), boost::filesystem::path(c.outfile.string() + ".csi"));
+    // Delete
+    boost::filesystem::remove(svtCollect[9]);
+    boost::filesystem::remove(boost::filesystem::path(svtCollect[9].string() + ".csi"));
+  } else {
+    mergeBCFs(c, svtCollect);
+    for(int32_t svt = minSVT; svt < maxSVT; ++svt) {
+      boost::filesystem::remove(svtCollect[svt]);
+      boost::filesystem::remove(boost::filesystem::path(svtCollect[svt].string() + ".csi"));
+    }
   }
   return 0;
 }


=====================================
src/modvcf.h
=====================================
@@ -453,7 +453,7 @@ vcfOutput(TConfig const& c, std::vector<TStructuralVariantRecord> const& svs, TJ
   bcf_hdr_append(hdr, "##FORMAT=<ID=RC,Number=1,Type=Integer,Description=\"Raw high-quality read counts or base counts for the SV\">");
   bcf_hdr_append(hdr, "##FORMAT=<ID=RCL,Number=1,Type=Integer,Description=\"Raw high-quality read counts or base counts for the left control region\">");
   bcf_hdr_append(hdr, "##FORMAT=<ID=RCR,Number=1,Type=Integer,Description=\"Raw high-quality read counts or base counts for the right control region\">");
-  bcf_hdr_append(hdr, "##FORMAT=<ID=CN,Number=1,Type=Integer,Description=\"Read-depth based copy-number estimate for autosomal sites\">");
+  bcf_hdr_append(hdr, "##FORMAT=<ID=RDCN,Number=1,Type=Integer,Description=\"Read-depth based copy-number estimate for autosomal sites\">");
   bcf_hdr_append(hdr, "##FORMAT=<ID=DR,Number=1,Type=Integer,Description=\"# high-quality reference pairs\">");
   bcf_hdr_append(hdr, "##FORMAT=<ID=DV,Number=1,Type=Integer,Description=\"# high-quality variant pairs\">");
   if (c.isHaplotagged) {
@@ -665,7 +665,7 @@ vcfOutput(TConfig const& c, std::vector<TStructuralVariantRecord> const& svs, TJ
       bcf_update_format_int32(hdr, rec, "RCL", rcl, bcf_hdr_nsamples(hdr));
       bcf_update_format_int32(hdr, rec, "RC", rc, bcf_hdr_nsamples(hdr));
       bcf_update_format_int32(hdr, rec, "RCR", rcr, bcf_hdr_nsamples(hdr));
-      bcf_update_format_int32(hdr, rec, "CN", cnest, bcf_hdr_nsamples(hdr));
+      bcf_update_format_int32(hdr, rec, "RDCN", cnest, bcf_hdr_nsamples(hdr));
       bcf_update_format_int32(hdr, rec, "DR", drcount, bcf_hdr_nsamples(hdr));
       bcf_update_format_int32(hdr, rec, "DV", dvcount, bcf_hdr_nsamples(hdr));
       if (c.isHaplotagged) {


=====================================
src/msa.h
=====================================
@@ -288,6 +288,13 @@ namespace torali {
     return align.shape()[0];
   }
 
+  template<typename TStructuralVariant>
+  inline void
+  outputConsensus(bam_hdr_t* hdr, TStructuralVariant const& sv, std::string const& cons) {
+    std::cerr << ">" << hdr->target_name[sv.chr] << ':' << sv.svStart << ',' << hdr->target_name[sv.chr2] << ':' << sv.svEnd << " SVT:" << sv.svt << " SR:" << sv.srSupport << " PE:" << sv.peSupport << std::endl;
+    std::cerr << cons << std::endl;
+  }
+
 }
 
 #endif


=====================================
src/split.h
=====================================
@@ -463,7 +463,7 @@ namespace torali
     unsigned int finalGapStart = 0;
     unsigned int finalGapEnd = 0;
     if (!_coordTransform(svRefStr, bp, ad, finalGapStart, finalGapEnd, sv.svt)) return false;
-	
+    
     sv.precise=true;
     sv.svStart=finalGapStart;
     sv.svEnd=finalGapEnd;
@@ -475,6 +475,32 @@ namespace torali
     sv.ciposhigh = ci_wiggle;
     sv.ciendlow = -ci_wiggle;
     sv.ciendhigh = ci_wiggle;
+
+    if (c.islr) {
+      // Set alleles
+      sv.alleles = _addAlleles(boost::to_upper_copy(std::string(seq + sv.svStart - 1, seq + sv.svStart)), std::string(hdr->target_name[sv.chr2]), sv, sv.svt);
+    
+      // Get exact alleles for INS and DEL                                                                         
+      if ((sv.svt == 2) || (sv.svt == 4)) {
+	std::string refVCF;
+	std::string altVCF;
+	int32_t cpos = 0;
+	bool inSV = false;
+	for(uint32_t j = 0; j<align.shape()[1]; ++j) {
+	  if (align[0][j] != '-') {
+	    ++cpos;
+	    if (cpos == ad.cStart) inSV = true;
+	    else if (cpos == ad.cEnd) inSV = false;
+	  }
+	  if (inSV) {
+	    if (align[0][j] != '-') altVCF += align[0][j];
+	    if (align[1][j] != '-') refVCF += align[1][j];
+	  }
+	}
+	sv.alleles = _addAlleles(refVCF, altVCF);
+      }
+    }
+
     return true;
   }
 


=====================================
src/tags.h
=====================================
@@ -17,7 +17,7 @@ namespace torali {
 
   inline bool
   _translocation(int32_t const svt) {
-    return (DELLY_SVT_TRANS <= svt);
+    return (DELLY_SVT_TRANS <= svt) && (svt < 9);
   }
 
   inline bool
@@ -198,8 +198,21 @@ namespace torali {
 
   inline int32_t
   _isizeMappingPos(bam1_t* rec, int32_t isize) {
-    if (_translocation(rec)) return DELLY_SVT_TRANS + getSVType(rec->core);
-    else {
+    if (_translocation(rec)) {
+      uint8_t orient = getSVType(rec->core);
+      if (orient == 0) return DELLY_SVT_TRANS + 0;
+      else if (orient == 1) return DELLY_SVT_TRANS + 1;
+      else {
+	// 3to5 or 5to3?
+	if (rec->core.tid > rec->core.mtid) {
+	  if (!(rec->core.flag & BAM_FREVERSE)) return DELLY_SVT_TRANS + 2;
+	  else return DELLY_SVT_TRANS + 3;
+	} else {
+	  if (!(rec->core.flag & BAM_FREVERSE)) return DELLY_SVT_TRANS + 3;
+	  else return DELLY_SVT_TRANS + 2;
+	}
+      }
+    } else {
       if (rec->core.pos == rec->core.mpos) return -1; // No SV
       uint8_t orient = getSVType(rec->core);
       if (orient == 0) return 0;
@@ -211,7 +224,7 @@ namespace torali {
 	if (std::abs(rec->core.pos - rec->core.mpos) < 100) return -1; // Too small
 	return 3;
       }
-    } 
+    }
   }
 
   inline unsigned hash_string(const char *s) {


=====================================
src/tegua.h
=====================================
@@ -34,8 +34,8 @@ namespace torali {
 
 
   struct TeguaConfig {
+    bool islr;
     bool hasDumpFile;
-    bool hasVcfFile;
     bool hasExcludeFile;
     bool isHaplotagged;
     bool svtcmd;
@@ -46,6 +46,7 @@ namespace torali {
     uint32_t maxReadSep;
     uint32_t graphPruning;
     uint32_t minCliqueSize;
+    uint32_t maxReadPerSV;
     int32_t nchr;
     int32_t minimumFlankSize;
     float indelExtension;
@@ -54,7 +55,6 @@ namespace torali {
     DnaScore<int> aliscore;
     boost::filesystem::path dumpfile;
     boost::filesystem::path outfile;
-    boost::filesystem::path vcffile;
     std::vector<boost::filesystem::path> files;
     boost::filesystem::path genome;
     boost::filesystem::path exclude;
@@ -106,20 +106,24 @@ namespace torali {
      return 1;
    }
      
-   // Split-read assembly?
-   if (!c.hasVcfFile) {
+   // SR Store
+   typedef std::vector<SeqSlice> TSvPosVector;
+   typedef boost::unordered_map<std::size_t, TSvPosVector> TReadSV;
+   TReadSV srStore;
+
+   // Identify SVs
+   if (srStore.empty()) {
+       
      // Structural Variant Candidates
      typedef std::vector<StructuralVariantRecord> TVariants;
      TVariants svc;
 
-     // SR Store
-     typedef std::vector<SeqSlice> TSvPosVector;
-     typedef boost::unordered_map<std::size_t, TSvPosVector> TReadSV;
+     // Temporary split-read store
      TReadSV tmpStore;
 
      // SV Discovery
      _clusterSRReads(c, validRegions, svc, tmpStore);
-     
+
      // Assemble
      assemble(c, validRegions, svc, tmpStore);
 
@@ -137,54 +141,34 @@ namespace torali {
        lastSV = *svIter;
        svs.push_back(*svIter);
      }
-   } else vcfParse(c, hdr, svs);   // Re-genotyping
-   // Clean-up
-   bam_hdr_destroy(hdr);
-   sam_close(samfile);
 
-   // Re-number SVs
-   sort(svs.begin(), svs.end(), SortSVs<StructuralVariantRecord>());
-   uint32_t cliqueCount = 0;
-   for(typename TVariants::iterator svIt = svs.begin(); svIt != svs.end(); ++svIt, ++cliqueCount) svIt->id = cliqueCount;
-
-   // Genotyping
-   typedef std::vector<SeqSlice> TSvPosVector;
-   typedef boost::unordered_map<std::size_t, TSvPosVector> TReadSV;
-   TReadSV srStore;
-   if (srStore.empty()) {
-     // Structural Variant Candidates
-     typedef std::vector<StructuralVariantRecord> TVariants;
-     TVariants svc;
-
-     // Tmp SR Store
-     TReadSV tmpStore;
-     //_clusterSRReads(c, validRegions, svc, tmpStore);
-
-     // Update srStore
-     typedef std::vector<SeqSlice> TSvPosVector;
-     typedef boost::unordered_map<std::size_t, TSvPosVector> TReadSV;
-     for(typename TReadSV::iterator readit = tmpStore.begin(); readit != tmpStore.end(); ++readit) {
-       std::set<int32_t> svIdSet;
-       for(uint32_t i = 0; i < readit->second.size(); ++i) {
-	 int32_t svid = readit->second[i].svid;
-	 typename TVariants::iterator itGeno = std::lower_bound(svs.begin(), svs.end(), StructuralVariantRecord(svc[svid].chr, std::max(0, svc[svid].svStart - (int32_t) c.minRefSep), svc[svid].svEnd), SortSVs<StructuralVariantRecord>());
-	 for(;((itGeno != svs.end()) && (std::abs(svc[svid].svStart - itGeno->svStart) <= c.minRefSep));++itGeno) {
-	   if ((svc[svid].chr != itGeno->chr) || (svc[svid].chr2 != itGeno->chr2) || (svc[svid].svt != itGeno->svt)) continue;
-	   if (std::abs(svc[svid].svEnd - itGeno->svEnd) > c.minRefSep) continue;
-	   if (srStore.find(readit->first) == srStore.end()) srStore.insert(std::make_pair(readit->first, TSvPosVector()));
-	   // Assign read only once to a given SV
-	   if (svIdSet.find(itGeno->id) == svIdSet.end()) {
-	     // Debug
-	     //std::cerr << readit->first << ':' << svc[svid].chr << ',' << svc[svid].svStart << ',' << svc[svid].chr2 << ',' << svc[svid].svEnd << ',' << svc[svid].svt << ';' << itGeno->chr << ',' << itGeno->svStart << ',' << itGeno->chr2 << ',' << itGeno->svEnd << ',' << itGeno->svt << std::endl;
-
-	     svIdSet.insert(itGeno->id);
-	     readit->second[i].svid = itGeno->id;
-	     srStore[readit->first].push_back(readit->second[i]);
-	   }
+     // Sort
+     sort(svs.begin(), svs.end(), SortSVs<StructuralVariantRecord>());
+     
+     // Re-number SVs and update SR Store
+     typedef std::map<uint32_t, uint32_t> TIdMap;
+     TIdMap idmap;
+     uint32_t cliqueCount = 0;
+     for(typename TVariants::iterator svIt = svs.begin(); svIt != svs.end(); ++svIt, ++cliqueCount) {
+       idmap.insert(std::make_pair(svIt->id, cliqueCount));
+       svIt->id = cliqueCount;
+     }
+     for(typename TReadSV::iterator ts = tmpStore.begin(); ts != tmpStore.end(); ++ts) {
+       bool keep = false;
+       for(uint32_t idx = 0; idx < ts->second.size(); ++idx) {
+	 if (idmap.find(ts->second[idx].svid) == idmap.end()) ts->second[idx].svid = -1;
+	 else {
+	   ts->second[idx].svid = idmap.find(ts->second[idx].svid)->second;
+	   keep = true;
 	 }
        }
+       if (keep) srStore.insert(*ts);
      }
+     //outputStructuralVariants(c, svs);
    }
+   // Clean-up
+   bam_hdr_destroy(hdr);
+   sam_close(samfile);
    
    // Annotate junction reads
    typedef std::vector<JunctionCount> TSVJunctionMap;
@@ -209,7 +193,8 @@ namespace torali {
    }
       
    // Reference SV Genotyping
-   trackRef(c, svs, srStore, jctMap, rcMap);
+   //trackRef(c, svs, jctMap, rcMap);
+   genotypeLR(c, svs, srStore, jctMap, rcMap);
 
    // VCF Output
    vcfOutput(c, svs, jctMap, rcMap, spanMap);
@@ -228,6 +213,7 @@ namespace torali {
  int tegua(int argc, char **argv) {
    TeguaConfig c;
    c.isHaplotagged = false;
+   c.islr = true;
    
    // Parameter
    std::string svtype;
@@ -252,9 +238,15 @@ namespace torali {
      ("maxreadsep,n", boost::program_options::value<uint32_t>(&c.maxReadSep)->default_value(75), "max. read separation")
      ;
 
+   boost::program_options::options_description cons("Consensus options");
+   cons.add_options()
+     ("max-reads,p", boost::program_options::value<uint32_t>(&c.maxReadPerSV)->default_value(5), "max. reads for consensus computation")
+     ("flank-size,f", boost::program_options::value<int32_t>(&c.minimumFlankSize)->default_value(400), "min. flank size")
+     ("flank-quality,a", boost::program_options::value<float>(&c.flankQuality)->default_value(0.9), "min. flank quality")
+     ;     
+   
    boost::program_options::options_description geno("Genotyping options");
    geno.add_options()
-     ("vcffile,v", boost::program_options::value<boost::filesystem::path>(&c.vcffile), "input VCF/BCF file for genotyping")
      ("geno-qual,u", boost::program_options::value<uint16_t>(&c.minGenoQual)->default_value(5), "min. mapping quality for genotyping")
      ("dump,d", boost::program_options::value<boost::filesystem::path>(&c.dumpfile), "gzipped output file for SV-reads")
      ;
@@ -264,8 +256,6 @@ namespace torali {
      ("input-file", boost::program_options::value< std::vector<boost::filesystem::path> >(&c.files), "input file")
      ("pruning,j", boost::program_options::value<uint32_t>(&c.graphPruning)->default_value(1000), "graph pruning cutoff")
      ("extension,e", boost::program_options::value<float>(&c.indelExtension)->default_value(0.5), "enforce indel extension")
-     ("flank-size,f", boost::program_options::value<int32_t>(&c.minimumFlankSize)->default_value(400), "min. flank size")
-     ("flank-quality,a", boost::program_options::value<float>(&c.flankQuality)->default_value(0.9), "min. flank quality")
      ("scoring,s", boost::program_options::value<std::string>(&scoring)->default_value("3,-2,-3,-1"), "alignment scoring")
      ;
    
@@ -273,9 +263,9 @@ namespace torali {
    pos_args.add("input-file", -1);
    
    boost::program_options::options_description cmdline_options;
-   cmdline_options.add(generic).add(disc).add(geno).add(hidden);
+   cmdline_options.add(generic).add(disc).add(cons).add(geno).add(hidden);
    boost::program_options::options_description visible_options;
-   visible_options.add(generic).add(disc).add(geno);
+   visible_options.add(generic).add(disc).add(cons).add(geno);
    boost::program_options::variables_map vm;
    boost::program_options::store(boost::program_options::command_line_parser(argc, argv).options(cmdline_options).positional(pos_args).run(), vm);
    boost::program_options::notify(vm);
@@ -372,28 +362,6 @@ namespace torali {
      c.hasExcludeFile = true;
    } else c.hasExcludeFile = false;
    
-   // Check input VCF file
-   if (vm.count("vcffile")) {
-     if (!(boost::filesystem::exists(c.vcffile) && boost::filesystem::is_regular_file(c.vcffile) && boost::filesystem::file_size(c.vcffile))) {
-       std::cerr << "Input VCF/BCF file is missing: " << c.vcffile.string() << std::endl;
-       return 1;
-     }
-     htsFile* ifile = bcf_open(c.vcffile.string().c_str(), "r");
-     if (ifile == NULL) {
-       std::cerr << "Fail to open file " << c.vcffile.string() << std::endl;
-       return 1;
-     }
-     bcf_hdr_t* hdr = bcf_hdr_read(ifile);
-     if (hdr == NULL) {
-       std::cerr << "Fail to open index file " << c.vcffile.string() << std::endl;
-       return 1;
-     }
-     bcf_hdr_destroy(hdr);
-     bcf_close(ifile);
-     c.hasVcfFile = true;
-   } else c.hasVcfFile = false;
-
-
    // Check output directory
    if (!_outfileValid(c.outfile)) return 1;
 
@@ -405,13 +373,8 @@ namespace torali {
    std::cout << std::endl;
    
    // Run Tegua
-   if (mode == "pb") {
-     c.indelExtension = 0.7;
-     c.flankQuality = 0.85;
-   } else if (mode == "ont") {
-     c.indelExtension = 0.5;
-     c.flankQuality = 0.9;
-   }
+   if (mode == "pb") c.indelExtension = 0.7;
+   else if (mode == "ont") c.indelExtension = 0.5;
    return runTegua(c);
  }
 


=====================================
src/util.h
=====================================
@@ -19,6 +19,10 @@ namespace torali
   #ifndef LAST_BIN
   #define LAST_BIN 65535
   #endif
+
+  #ifndef MAX_CN
+  #define MAX_CN 10
+  #endif
   
   struct LibraryInfo {
     int32_t rs;
@@ -33,6 +37,34 @@ namespace torali
     LibraryInfo() : rs(0), median(0), mad(0), minNormalISize(0), minISizeCutoff(0), maxNormalISize(0), maxISizeCutoff(0), abnormal_pairs(0) {}
   };
 
+  struct CNV {
+    int32_t chr;
+    int32_t start;
+    int32_t end;
+    int32_t ciposlow;
+    int32_t ciposhigh;
+    int32_t ciendlow;
+    int32_t ciendhigh;
+    int32_t qval;
+    double cn;
+    double mappable;
+    double sd;
+
+
+    CNV() : chr(0), start(0), end(0), ciposlow(0), ciposhigh(0), ciendlow(0), ciendhigh(0), qval(0), cn(-1), mappable(0), sd(1) {}
+    CNV(int32_t const c, int32_t const s, int32_t const e, int32_t const cil, int32_t const cih, int32_t const cel, int32_t ceh, double const estcn, double const mp) : chr(c), start(s), end(e), ciposlow(cil), ciposhigh(cih), ciendlow(cel), ciendhigh(ceh), qval(0), cn(estcn), mappable(mp), sd(1) {}
+  };
+
+  template<typename TCNV>
+  struct SortCNVs : public std::binary_function<TCNV, TCNV, bool>
+  {
+    inline bool operator()(TCNV const& sv1, TCNV const& sv2) {
+      return ((sv1.chr<sv2.chr) || ((sv1.chr==sv2.chr) && (sv1.start<sv2.start)) || ((sv1.chr==sv2.chr) && (sv1.start==sv2.start) && (sv1.end<sv2.end)) || ((sv1.chr==sv2.chr) && (sv1.start==sv2.start) && (sv1.end==sv2.end) && (sv1.cn < sv2.cn)));
+    }
+  };
+
+
+
 
   // Read count struct
   struct ReadCount {
@@ -72,6 +104,8 @@ namespace torali
       else if (value=="3to5") return DELLY_SVT_TRANS + 2;
       else if (value=="5to3") return DELLY_SVT_TRANS + 3;
       else return -1;
+    } else if (svt == "CNV") {
+      return 9;
     } else {
       if (value=="3to3") return 0;
       else if (value=="5to5") return 1;
@@ -90,6 +124,7 @@ namespace torali
     else if (svt == 2) return "DEL";
     else if (svt == 3) return "DUP";
     else if (svt == 4) return "INS";
+    else if (svt == 9) return "CNV";
     else return "BND";
   }
 
@@ -130,7 +165,7 @@ namespace torali
 
   // Output directory/file checks
   inline bool
-  _outfileValid(boost::filesystem::path& outfile) {
+  _outfileValid(boost::filesystem::path const& outfile) {
     try {
       boost::filesystem::path outdir;
       if (outfile.has_parent_path()) outdir = outfile.parent_path();


=====================================
src/version.h
=====================================
@@ -5,7 +5,7 @@ namespace torali
 {
 
 
-  std::string dellyVersionNumber = "0.8.5";
+  std::string dellyVersionNumber = "0.8.7";
 
   inline 
     void printTitle(std::string const& title) 



View it on GitLab: https://salsa.debian.org/med-team/delly/-/commit/faeb27c17a5e3c2645c14c6778f2897982887c26

-- 
View it on GitLab: https://salsa.debian.org/med-team/delly/-/commit/faeb27c17a5e3c2645c14c6778f2897982887c26
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210117/c7db25b3/attachment-0001.html>


More information about the debian-med-commit mailing list