[med-svn] [qtltools] 01/06: New upstream version 1.1+dfsg

Andreas Tille tille at debian.org
Mon Jan 16 15:47:14 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository qtltools.

commit f241cdb35d16d35b6e763d93378ffba0b6ceca4b
Author: Andreas Tille <tille at debian.org>
Date:   Mon Jan 16 10:04:57 2017 +0100

    New upstream version 1.1+dfsg
---
 Makefile                                     | 81 +++++----------------------
 README                                       |  2 +-
 lib/OTools/otools.h                          |  6 +-
 src/QTLtools.cpp                             |  8 ++-
 src/common/data.h                            |  2 +-
 src/mode_cis/cis_chunking.cpp                |  4 ++
 src/mode_cis/cis_data.h                      |  2 +
 src/mode_cis/cis_initilization.cpp           |  1 +
 src/mode_cis/cis_main.cpp                    | 21 ++++++-
 src/mode_cis/cis_permutation_pass.cpp        |  4 +-
 src/mode_cis/cis_read_phenotypes.cpp         |  9 ++-
 src/mode_correct/correct_data.h              |  8 ++-
 src/mode_correct/correct_main.cpp            |  9 ++-
 src/mode_correct/correct_management.cpp      |  8 ---
 src/mode_correct/correct_processing.cpp      | 34 +++++++++++-
 src/mode_correct/correct_read_covariates.cpp | 81 +++++++++++++++++++++++++++
 src/mode_fdensity/fdensity_process.cpp       | 36 ++++++++++--
 src/mode_fenrich/fenrich_management.cpp      | 11 +++-
 src/mode_match/match_process.cpp             |  7 ++-
 src/mode_quan/quan_chunking.cpp              | 19 ++++++-
 src/mode_quan/quan_data.h                    |  8 +--
 src/mode_quan/quan_main.cpp                  | 22 +++++++-
 src/mode_quan/quan_printResults.cpp          |  2 +
 src/mode_quan/quan_readBAM.cpp               | 83 +++++++++++++++++-----------
 src/mode_rtc/rtc_common.cpp                  |  2 +-
 src/mode_trans/trans_chunking.cpp            | 18 +++++-
 26 files changed, 339 insertions(+), 149 deletions(-)

diff --git a/Makefile b/Makefile
index 28851fe..33b2436 100644
--- a/Makefile
+++ b/Makefile
@@ -9,17 +9,13 @@ HTSLD_INC=
 HTSLD_LIB=
 
 #COMPILER MODE C++11
-CXX=g++ -std=c++11
+CXX=g++ -std=c++0x
 
 #COMPILER FLAGS
 CXXFLAG_REL=-O2
 CXXFLAG_DBG=-g
 CXXFLAG_WRN=-Wall -Wextra -Wno-sign-compare -Wno-unused-local-typedefs -Wno-deprecated -Wno-unused-parameter
 
-#LINKER FLAGS
-LDFLAG_REL=-O2
-LDFLAG_DBG=-g
-
 #BASE LIBRARIES
 LIB_FLAGS=-Wl,-Bstatic -lz -lgsl -lblas -lbz2 -Wl,-Bdynamic -lm -lpthread 
 
@@ -32,66 +28,25 @@ OFILE=$(shell for file in `find src -name *.cpp`; do echo obj/$$(basename $$file
 VPATH=$(shell for file in `find src -name *.cpp`; do echo $$(dirname $$file); done)
 
 #DEFAULT VERSION (I.E. UNIGE DESKTOP RELEASE VERSION)
-all: default
-
-#DEFAULT RELEASE VERSION
-default: CXXFLAG=$(CXXFLAG_REL) $(CXXFLAG_WRN)
-default: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
-default: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-default: LDFLAG=$(LDFLAG_REL)
-default: $(BFILE)
+all: desktop
 
 #UNIGE DESKTOP RELEASE VERSION
 desktop: RMATH_INC=$(HOME)/Tools/R-3.2.2/src/include
 desktop: RMATH_LIB=$(HOME)/Tools/R-3.2.2/src/nmath/standalone
-desktop: HTSLD_INC=$(HOME)/Tools/htslib-1.3
-desktop: HTSLD_LIB=$(HOME)/Tools/htslib-1.3
+desktop: HTSLD_INC=$(HOME)/Tools/htslib-1.3.1
+desktop: HTSLD_LIB=$(HOME)/Tools/htslib-1.3.1
 desktop: BOOST_INC=/usr/include
-desktop: BOOST_LIB=/usr/lib/x86_64-linux-gnu
+desktop: BOOST_LIB=/usr/lib
 desktop: CXXFLAG=$(CXXFLAG_REL) $(CXXFLAG_WRN)
 desktop: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
 desktop: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-desktop: LDFLAG=$(LDFLAG_REL)
+desktop: LDFLAG=$(CXXFLAG_REL)
 desktop: $(BFILE)
 
 #UNIGE DESKTOP DEBUG VERSION
-desktop-dbg: RMATH_INC=$(HOME)/Tools/R-3.2.2/src/include
-desktop-dbg: RMATH_LIB=$(HOME)/Tools/R-3.2.2/src/nmath/standalone
-desktop-dbg: HTSLD_INC=$(HOME)/Tools/htslib-1.3
-desktop-dbg: HTSLD_LIB=$(HOME)/Tools/htslib-1.3
-desktop-dbg: BOOST_INC=/usr/include
-desktop-dbg: BOOST_LIB=/usr/lib/x86_64-linux-gnu
+desktop-dbg: desktop
 desktop-dbg: CXXFLAG=$(CXXFLAG_DBG) $(CXXFLAG_WRN)
-desktop-dbg: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
-desktop-dbg: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-desktop-dbg: LDFLAG=$(LDFLAG_DBG)
-desktop-dbg: $(BFILE)
-
-#DELL LAPTOP RELEASE VERSION
-laptop: RMATH_INC=$(HOME)/Libraries/R-3.2.2/src/include
-laptop: RMATH_LIB=$(HOME)/Libraries/R-3.2.2/src/nmath/standalone
-laptop: HTSLD_INC=$(HOME)/Libraries/htslib-1.2.1
-laptop: HTSLD_LIB=$(HOME)/Libraries/htslib-1.2.1
-laptop: BOOST_INC=/usr/include
-laptop: BOOST_LIB=/usr/lib/x86_64-linux-gnu
-laptop: CXXFLAG=$(CXXFLAG_REL) $(CXXFLAG_WRN)
-laptop: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
-laptop: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-laptop: LDFLAG=$(LDFLAG_REL)
-laptop: $(BFILE)
-
-#DELL LAPTOP DEBUG VERSION
-laptop-dbg: RMATH_INC=$(HOME)/Libraries/R-3.2.2/src/include
-laptop-dbg: RMATH_LIB=$(HOME)/Libraries/R-3.2.2/src/nmath/standalone
-laptop-dbg: HTSLD_INC=$(HOME)/Libraries/htslib-1.2.1
-laptop-dbg: HTSLD_LIB=$(HOME)/Libraries/htslib-1.2.1
-laptop-dbg: BOOST_INC=/usr/include
-laptop-dbg: BOOST_LIB=/usr/lib/x86_64-linux-gnu
-laptop-dbg: CXXFLAG=$(CXXFLAG_DBG) $(CXXFLAG_WRN)
-laptop-dbg: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
-laptop-dbg: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-laptop-dbg: LDFLAG=$(LDFLAG_DBG)
-laptop-dbg: $(BFILE)
+desktop-dbg: LDFLAG=$(CXXFLAG_DBG)
 
 #VITAL-IT RELEASE VERSION
 cluster: RMATH_INC=/software/R/3.1.1/include
@@ -103,21 +58,13 @@ cluster: BOOST_LIB=/software/lib64
 cluster: CXXFLAG=$(CXXFLAG_REL) $(CXXFLAG_WRN)
 cluster: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
 cluster: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-cluster: LDFLAG=$(LDFLAG_REL)
+cluster: LDFLAG=$(CXXFLAG_REL)
 cluster: $(BFILE)
 
 #VITAL-IT DEBUG VERSION
-cluster-dbg: RMATH_INC=/software/R/3.1.1/include
-cluster-dbg: RMATH_LIB=/software/R/3.1.1/lib64
-cluster-dbg: HTSLD_INC=/software/UHTS/Analysis/samtools/1.2/include
-cluster-dbg: HTSLD_LIB=/software/UHTS/Analysis/samtools/1.2/lib64
-cluster-dbg: BOOST_INC=/software/include
-cluster-dbg: BOOST_LIB=/software/lib64
+cluster-dbg: cluster
 cluster-dbg: CXXFLAG=$(CXXFLAG_DBG) $(CXXFLAG_WRN)
-cluster-dbg: IFLAG=-Ilib/OTools -Ilib -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
-cluster-dbg: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams.a $(BOOST_LIB)/libboost_program_options.a
-cluster-dbg: LDFLAG=$(LDFLAG_DBG)
-cluster-dbg: $(BFILE)
+cluster-dbg: LDFLAG=$(CXXFLAG_DBG)
 
 #MAC RELEASE VERSION
 mac: RMATH_INC=$(HOME)/Libraries/R-3.2.2/src/include
@@ -129,7 +76,7 @@ mac: BOOST_LIB=/opt/local/lib
 mac: CXXFLAG=$(CXXFLAG_REL) $(CXXFLAG_WRN)
 mac: IFLAG=-Ilib/OTools -Ilib/ -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
 mac: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams-mt.a $(BOOST_LIB)/libboost_program_options-mt.a
-mac: LDFLAG=$(LDFLAG_REL) -L /opt/local/lib
+mac: LDFLAG=$(CXXFLAG_REL) -L /opt/local/lib
 mac: $(BFILE)
 
 #MAC DEBUG VERSION
@@ -142,7 +89,7 @@ mac-dbg: BOOST_LIB=/opt/local/lib
 mac-dbg: CXXFLAG=$(CXXFLAG_DBG) $(CXXFLAG_WRN)
 mac-dbg: IFLAG=-Ilib/OTools -Ilib/ -I$(RMATH_INC) -I$(HTSLD_INC) -I$(BOOST_INC)
 mac-dbg: LIB_FILES=$(RMATH_LIB)/libRmath.a $(HTSLD_LIB)/libhts.a $(BOOST_LIB)/libboost_iostreams-mt.a $(BOOST_LIB)/libboost_program_options-mt.a
-mac-dbg: LDFLAG=$(LDFLAG_DBG) -L /opt/local/lib
+mac-dbg: LDFLAG=$(CXXFLAG_DBG) -L /opt/local/lib
 mac-dbg: $(BFILE)
 
 #COMPILATION RULES
@@ -242,4 +189,4 @@ clean-bamstat:
 clean-fdensity:
 	rm -f obj/fdensity_*.o $(BFILE)
 		
-		
\ No newline at end of file
+		
diff --git a/README b/README
index 2c64161..c61c13f 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
 QTLtools: a tool set for molecular QTL discovery and analysis
 
-ver: 1.0
+ver: 1.1
 git: https://github.com/qtltools/qtltools
 web:https://qtltools.github.io/qtltools/
 
diff --git a/lib/OTools/otools.h b/lib/OTools/otools.h
index e375fb1..d544f66 100644
--- a/lib/OTools/otools.h
+++ b/lib/OTools/otools.h
@@ -67,9 +67,9 @@ extern "C" {
 
 //MAKE SOME TOOL FULLY ACCESSIBLE THROUGHOUT THE SOFTWARE
 #ifdef _DECLARE_TOOLBOX_HERE
-	random_number_generator rng;
-	string_utils stb;
-	verbose vrb;
+	random_number_generator rng;	//Random number generator
+	string_utils stb;				//Utils for string manipulation
+	verbose vrb;					//Verbose
 #else
 	extern random_number_generator rng;
 	extern string_utils stb;
diff --git a/src/QTLtools.cpp b/src/QTLtools.cpp
index d7f04b6..3598b9a 100644
--- a/src/QTLtools.cpp
+++ b/src/QTLtools.cpp
@@ -34,7 +34,7 @@ void printModes(){
     vrb.print("  eg: QTLtools cis --help");
     vrb.ctitle("Available modes:");
     vrb.print("  bamstat   Calculate basic QC metrics for BAM/SAM");
-    vrb.print("  match     Match VCF genotypes to BAM/SAM file");
+    vrb.print("  mbv       Match BAM to VCF ");
     vrb.print("  pca       Calculate principal components for a BED/VCF/BCF file");
     vrb.print("  correct   Covariate correction of a BED file");
     vrb.print("  cis       cis QTL analysis");
@@ -55,9 +55,11 @@ int main(int argc, char ** argv) {
 	timer running_timer;
 
 	//2. Open LOG file if necessary
+	bool match_mode = false;
 	for (int a = 1 ; a < argc - 1 ; a ++) {
 		if ((strcmp(argv[a], "--log") == 0) && !vrb.open_log(string(argv[a+1]))) vrb.error("Impossible to open log file!");
 		if (strcmp(argv[a], "--silent") == 0) vrb.set_silent();
+		if (strcmp(argv[a], "mbv") == 0) match_mode = true;
 	}
 
 	//3. Print header on screen
@@ -67,6 +69,8 @@ int main(int argc, char ** argv) {
 	vrb.bullet("Webpage : https://qtltools.github.io/qtltools/");
 	vrb.bullet("Version : " + string(QTLTOOLS_VERSION));
 	vrb.bullet("Date    : " + running_timer.date());
+	if (!match_mode) vrb.bullet("Citation: A complete tool set for molecular QTL discovery and analysis, https://doi.org/10.1101/068635");
+	else vrb.bullet("Citation: MBV; a method to solve sample mislabeling and detect technical bias in large combined genotype and sequencing assay data sets");
 
 	//4. Switch mode
 	vector < string > args;
@@ -83,7 +87,7 @@ int main(int argc, char ** argv) {
 	else if (strcmp(argv[1], "trans") == 0) trans_main(args);
 
 	//5.3. MATCH mode
-	else if (strcmp(argv[1], "match") == 0) match_main(args);
+	else if (strcmp(argv[1], "mbv") == 0) match_main(args);
 
 	//5.4. FENRICH mode
 	else if (strcmp(argv[1], "fenrich") == 0) fenrich_main(args);
diff --git a/src/common/data.h b/src/common/data.h
index 79d5e48..1a0ebba 100644
--- a/src/common/data.h
+++ b/src/common/data.h
@@ -19,7 +19,7 @@
 #include "otools.h"
 #include "filter.h"
 
-#define QTLTOOLS_VERSION "1.0"
+#define QTLTOOLS_VERSION "1.1"
 
 class data {
 public:
diff --git a/src/mode_cis/cis_chunking.cpp b/src/mode_cis/cis_chunking.cpp
index 6c88ec0..878f880 100644
--- a/src/mode_cis/cis_chunking.cpp
+++ b/src/mode_cis/cis_chunking.cpp
@@ -59,6 +59,10 @@ bool cis_data::setPhenotypeRegion(string reg) {
 	return regionPhenotype.parse(reg);
 }
 
+bool cis_data::setGenotypeRegion(string reg) {
+	return regionGenotype.parse(reg);
+}
+
 class pgroup {
 public:
 	int start, end;
diff --git a/src/mode_cis/cis_data.h b/src/mode_cis/cis_data.h
index 001ac3c..cf47bad 100644
--- a/src/mode_cis/cis_data.h
+++ b/src/mode_cis/cis_data.h
@@ -42,6 +42,7 @@ public:
 	//REGIONS
 	genomic_region regionPhenotype;
 	genomic_region regionGenotype;
+	bool full_test;
 
 	//GENOTYPES
 	int genotype_count;									//variant site number
@@ -79,6 +80,7 @@ public:
 
 	//DATA REGION
 	bool setPhenotypeRegion(string);
+	bool setGenotypeRegion(string);
 	void setPhenotypeRegion(int, int);
 
 	//READ DATA
diff --git a/src/mode_cis/cis_initilization.cpp b/src/mode_cis/cis_initilization.cpp
index ce9377d..a2bfa0a 100644
--- a/src/mode_cis/cis_initilization.cpp
+++ b/src/mode_cis/cis_initilization.cpp
@@ -24,6 +24,7 @@ cis_data::cis_data() {
 	genotype_count = 0;
 	phenotype_count = 0;
 	covariate_count = 0;
+	full_test = false;
 }
 
 void cis_data::clear() {
diff --git a/src/mode_cis/cis_main.cpp b/src/mode_cis/cis_main.cpp
index 42dc335..b9c28f9 100644
--- a/src/mode_cis/cis_main.cpp
+++ b/src/mode_cis/cis_main.cpp
@@ -51,7 +51,8 @@ void cis_main(vector < string > & argv) {
 	boost::program_options::options_description opt_parallel ("\x1B[32mParallelization\33[0m");
 	opt_parallel.add_options()
 		("chunk", boost::program_options::value< vector < int > >()->multitoken(), "Specify which chunk needs to be processed")
-		("region", boost::program_options::value< string >(), "Region of interest.");
+		("region", boost::program_options::value< string >(), "Region of interest.")
+		("region-pair", boost::program_options::value< vector < string > >()->multitoken(), "Pairs of interest.");
 
 	D.option_descriptions.add(opt_files).add(opt_parameters).add(opt_modes).add(opt_aggr).add(opt_parallel);
 
@@ -81,8 +82,8 @@ void cis_main(vector < string > & argv) {
 	if (!D.options.count("vcf")) vrb.error("Genotype data needs to be specified with --vcf [file.vcf]");
 	if (!D.options.count("bed")) vrb.error("Phenotype data needs to be specified with --bed [file.bed]");
 	if (!D.options.count("out")) vrb.error("Output needs to be specified with --out [file.out]");
-	int nParallel = D.options.count("chunk") + D.options.count("region");
-	if (nParallel > 1) vrb.error("Please, specify one of these options [--region, --chunk]");
+	int nParallel = D.options.count("chunk") + D.options.count("region") + D.options.count("region-pair");
+	if (nParallel > 1) vrb.error("Please, specify one of these options [--region, --chunk, --region-pair]");
 	int nMode = D.options.count("permute") + D.options.count("mapping") + D.options.count("nominal");
 	if (nMode != 1) vrb.error("Please, specify only one of these options [--permute, --nominal, --mapping]");
 	string outFile = D.options["out"].as < string > ();
@@ -124,6 +125,12 @@ void cis_main(vector < string > & argv) {
 		if (nChunk.size() != 2 || nChunk[0] > nChunk[1] || nChunk[0] < 0) vrb.error("Incorrect --chunk arguments!");
 		vrb.bullet("Chunk = [" + stb.str(nChunk[0]) + "/" + stb.str(nChunk[1]) + "]");
 	} else if(D.options.count("region")) vrb.bullet("Region = [" + D.options["region"].as < string > () +"]");
+	else if(D.options.count("region-pair")) {
+		vector < string > regions = D.options["region-pair"].as < vector < string > > ();
+		vrb.bullet("Region for genotypes = [" + regions[0] +"]");
+		vrb.bullet("Region for phenotypes = [" + regions[1] +"]");
+		D.full_test = true;
+	}
 
 	//---------------------------
 	// 7. SET AGGREGATION METHODS
@@ -160,6 +167,14 @@ void cis_main(vector < string > & argv) {
     } else if (D.options.count("region")){
         if (!D.setPhenotypeRegion(D.options["region"].as < string > ())) vrb.error("Impossible to interpret region [" + D.options["region"].as < string > () + "]");
         D.regionGenotype = genomic_region(D.regionPhenotype, D.options["window"].as < unsigned int > ());
+    } else if (D.options.count("region-pair")) {
+    	vector < string > regions = D.options["region-pair"].as < vector < string > > ();
+    	if (!D.setPhenotypeRegion(regions[0])) vrb.error("Impossible to interpret region [" + regions[0] + "]");
+    	if (!D.setGenotypeRegion(regions[1])) vrb.error("Impossible to interpret region [" + regions[1] + "]");
+    	D.regionGenotype = genomic_region(D.regionGenotype, D.options["window"].as < unsigned int > ());
+    	D.regionPhenotype = genomic_region(D.regionPhenotype, D.options["window"].as < unsigned int > ());
+    	D.full_test = true;
+    	D.grp_mode = GRP_BEST;
     }
 	
 
diff --git a/src/mode_cis/cis_permutation_pass.cpp b/src/mode_cis/cis_permutation_pass.cpp
index 52cd961..200d58f 100644
--- a/src/mode_cis/cis_permutation_pass.cpp
+++ b/src/mode_cis/cis_permutation_pass.cpp
@@ -50,11 +50,11 @@ void cis_data::runPermutationPass(string fout) {
 		vector < unsigned int > variant_indexes;
 		vector < int > variant_distances;
 		for (unsigned int v = 0 ; v < genotype_count ; v ++) {
-			if (phenotype_chr[group_idx[i_group][0]] != genotype_chr[v]) continue;
+			if (!full_test && phenotype_chr[group_idx[i_group][0]] != genotype_chr[v]) continue;
 			int ps = (phenotype_start[group_idx[i_group][0]]>cis_window)?(phenotype_start[group_idx[i_group][0]]-cis_window):0;
 			int pe = phenotype_end[group_idx[i_group][0]] + cis_window;
 
-			if (genotype_start[v] <= pe && ps <= genotype_end[v]) {
+			if (full_test || (genotype_start[v] <= pe && ps <= genotype_end[v])) {
 				int cisdistance = 0;
 				if (genotype_start[v] <= phenotype_end[group_idx[i_group][0]] && phenotype_start[group_idx[i_group][0]] <= genotype_end[v]) cisdistance = 0;
 				else if (genotype_end[v] < phenotype_start[group_idx[i_group][0]]) cisdistance = genotype_end[v] - phenotype_start[group_idx[i_group][0]];
diff --git a/src/mode_cis/cis_read_phenotypes.cpp b/src/mode_cis/cis_read_phenotypes.cpp
index c26a757..c2440a5 100644
--- a/src/mode_cis/cis_read_phenotypes.cpp
+++ b/src/mode_cis/cis_read_phenotypes.cpp
@@ -59,7 +59,8 @@ void cis_data::readPhenotypes(string fbed) {
                 phenotype_chr.push_back(tokens[0]);
                 phenotype_start.push_back(atoi(tokens[1].c_str()) + 1);
                 phenotype_end.push_back(atoi(tokens[2].c_str()));
-                if (grp_mode > 0) phenotype_grp.push_back(tokens[4]);
+				if (grp_mode > 0 && full_test) phenotype_grp.push_back("ALL_GENES");
+				if (grp_mode > 0 && !full_test) phenotype_grp.push_back(tokens[4]);
                 phenotype_neg.push_back(tokens[5] == "-");
                 if (phenotype_neg.back()) n_negativeStrd ++;
                 phenotype_val.push_back(vector < float > (sample_count, 0.0));
@@ -85,7 +86,8 @@ void cis_data::readPhenotypes(string fbed) {
                     phenotype_chr.push_back(tokens[0]);
                     phenotype_start.push_back(atoi(tokens[1].c_str()) + 1);
                     phenotype_end.push_back(atoi(tokens[2].c_str()));
-                    if (grp_mode > 0) phenotype_grp.push_back(tokens[4]);
+    				if (grp_mode > 0 && full_test) phenotype_grp.push_back("ALL_GENES");
+    				if (grp_mode > 0 && !full_test) phenotype_grp.push_back(tokens[4]);
                     phenotype_neg.push_back(tokens[5] == "-");
                     if (phenotype_neg.back()) n_negativeStrd ++;
                     phenotype_val.push_back(vector < float > (sample_count, 0.0));
@@ -138,7 +140,8 @@ void cis_data::scanPhenotypes(string fbed) {
 				phenotype_chr.push_back(tokens[0]);
 				phenotype_start.push_back(atoi(tokens[1].c_str()) + 1);
 				phenotype_end.push_back(atoi(tokens[2].c_str()));
-				if (grp_mode > 0) phenotype_grp.push_back(tokens[4]);
+				if (grp_mode > 0 && full_test) phenotype_grp.push_back("ALL_GENES");
+				if (grp_mode > 0 && !full_test) phenotype_grp.push_back(tokens[4]);
                 phenotype_neg.push_back(tokens[5] == "-");
                 if (phenotype_neg.back()) n_negativeStrd ++;
 				n_includedP++;
diff --git a/src/mode_correct/correct_data.h b/src/mode_correct/correct_data.h
index 63fa01f..278d88c 100644
--- a/src/mode_correct/correct_data.h
+++ b/src/mode_correct/correct_data.h
@@ -34,18 +34,20 @@ public:
 	int covariate_count;								//covariate number
 	vector < vector < string > > covariate_val;			//covariate values
 	vector < string > covariate_id;						//covariate ids
+	vector < string > covariate_target;
 	residualizer * covariate_engine;					//covariate engine machinery
 
+	//QTL COVARIATES
+
+
 	//CONSTRUCTOR / DESTRUCTOR
 	correct_data();
 	~correct_data();
 	void clear();
 
-	//INITIALIZE
-	void initializeResidualizer();
-
 	//READ DATA
 	void readCovariates(string);
+	void readQTLCovariates(string, string);
 
 	//GENOTYPE & PHENOTYPE MANAGEMENT
 	void imputeMissing(vector < float > &);
diff --git a/src/mode_correct/correct_main.cpp b/src/mode_correct/correct_main.cpp
index ab9184a..f3f9fe2 100644
--- a/src/mode_correct/correct_main.cpp
+++ b/src/mode_correct/correct_main.cpp
@@ -28,6 +28,7 @@ void correct_main(vector < string > & argv) {
 		("vcf", boost::program_options::value< string >(), "Genotypes in VCF/BCF/BED format.")
 		("bed", boost::program_options::value< string >(), "Phenotypes in BED format.")
 		("cov", boost::program_options::value< string >(), "Covariates in TXT format.")
+		("qtl", boost::program_options::value< vector < string > >()->multitoken(), "QTL covariates.")
 		("out", boost::program_options::value< string >(), "Output file.");
 
 	boost::program_options::options_description opt_param ("\x1B[32mParameters\33[0m");
@@ -87,9 +88,11 @@ void correct_main(vector < string > & argv) {
 	if (D.options.count("vcf")) D.readSampleFromVCF(D.options["vcf"].as < string > ());
 	if (D.options.count("cov")) D.readSampleFromCOV(D.options["cov"].as < string > ());
 	D.mergeSampleLists();
-	if (D.options.count("cov")) {
-		D.readCovariates(D.options["cov"].as < string > ());
-		D.initializeResidualizer();
+	if (D.options.count("cov")) D.readCovariates(D.options["cov"].as < string > ());
+	if (D.options.count("qtl")) {
+		vector < string > files = D.options["qtl"].as < vector < string > > ();
+		assert(files .size() == 2);
+		D.readQTLCovariates(files[0], files[1]);
 	}
 
 	//----------------
diff --git a/src/mode_correct/correct_management.cpp b/src/mode_correct/correct_management.cpp
index 4504fe2..fa2f379 100644
--- a/src/mode_correct/correct_management.cpp
+++ b/src/mode_correct/correct_management.cpp
@@ -81,11 +81,3 @@ void correct_data::normalTransform(float * V) {
 		V[s] = qnorm(R[s], 0.0, 1.0, 1, 0);
 	}
 }
-
-void correct_data::initializeResidualizer() {
-	vrb.title("Initialize residualizer");
-	covariate_engine = new residualizer (sample_count);
-	for (int c = 0 ; c < covariate_count ; c ++) covariate_engine->push(covariate_val[c]);
-	covariate_engine->build();
-	vrb.bullet("#covariates = " + stb.str(covariate_count));
-}
diff --git a/src/mode_correct/correct_processing.cpp b/src/mode_correct/correct_processing.cpp
index 77534bb..dd3e6f9 100644
--- a/src/mode_correct/correct_processing.cpp
+++ b/src/mode_correct/correct_processing.cpp
@@ -18,6 +18,14 @@
 void correct_data::processBED(string fin, string fout) {
 	int n_includedP = 0, n_excludedP_user = 0;
 
+	vrb.title("Initialize residualizer");
+	if (covariate_target.size() == 0) {
+		covariate_engine = new residualizer (sample_count);
+		for (int c = 0 ; c < covariate_count ; c ++) covariate_engine->push(covariate_val[c]);
+		covariate_engine->build();
+		vrb.bullet("#covariates = " + stb.str(covariate_count));
+	}
+
 	//Open BED file
 	vrb.title("Reading phenotype data in [" + fin + "] and writing [" + fout + "]");
 	output_file fdo (fout.c_str());
@@ -27,7 +35,7 @@ void correct_data::processBED(string fin, string fout) {
 	if (!fp) vrb.error("Cannot open file for reading!");
 	kstring_t str = {0,0,0};
 	if (hts_getline(fp, KS_SEP_LINE, &str) <= 0 || !str.l || str.s[0] != '#' ) vrb.error("Cannot read header!");
-	fdo << "#chr\tstart\tend\tid";
+	fdo << "#chr\tstart\tend\tid\tgid\tstrd";
 
 	//Read and map sample names
 	vector < int > mappingS;
@@ -48,7 +56,21 @@ void correct_data::processBED(string fin, string fout) {
 			fdo << tokens[0] << "\t" << tokens[1] << "\t" << tokens[2] << "\t" << tokens[3] << "\t" << tokens[4] << "\t" << tokens[5];
 			for (int t = 6 ; t < tokens.size() ; t ++) if (mappingS[t-6] >= 0) values[mappingS[t-6]] = ((tokens[t] != "NA")?stof(tokens[t]):bcf_float_missing);
 			imputeMissing(values);
-			if (residualize) covariate_engine->residualize(values);
+			if (residualize) {
+				if (covariate_target.size() > 0) {
+					covariate_engine = new residualizer (sample_count);
+					for (int c = 0 ; c < covariate_count ; c ++) {
+						if (covariate_target[c] == "ALL") covariate_engine->push(covariate_val[c]);
+						if (covariate_target[c] == tokens[4]) covariate_engine->push(covariate_val[c]);
+					}
+					covariate_engine->build();
+				}
+				covariate_engine->residualize(values);
+				if (covariate_target.size() > 0) {
+					delete covariate_engine;
+					covariate_engine = NULL;
+				}
+			}
 			if (normalize) normalTransform(values);
 			for (int s = 0 ; s < sample_count ; s++) fdo << "\t" << values[s];
 			n_includedP ++ ;
@@ -66,8 +88,14 @@ void correct_data::processBED(string fin, string fout) {
 void correct_data::processVCF(string fin, string fout) {
 	int n_includedG = 0, n_excludedG_user = 0, n_excludedG_miss = 0, n_excludedG_mult = 0;
 
-	vrb.title("Reading genotype data in [" + fin + "] and writing [" + fout + "]");
+	if (covariate_target.size() == 0) {
+		covariate_engine = new residualizer (sample_count);
+		for (int c = 0 ; c < covariate_count ; c ++) covariate_engine->push(covariate_val[c]);
+		covariate_engine->build();
+		vrb.bullet("#covariates = " + stb.str(covariate_count));
+	}
 
+	vrb.title("Reading genotype data in [" + fin + "] and writing [" + fout + "]");
 	bcf_sweep_t * sw = bcf_sweep_init(fin.c_str());
 	if (!sw) vrb.error("Cannot open file for reading [" + fin + "]");
 	bcf_hdr_t * hdr_old  = bcf_sweep_hdr(sw);
diff --git a/src/mode_correct/correct_read_covariates.cpp b/src/mode_correct/correct_read_covariates.cpp
index aff6c16..d71f1d3 100644
--- a/src/mode_correct/correct_read_covariates.cpp
+++ b/src/mode_correct/correct_read_covariates.cpp
@@ -50,3 +50,84 @@ void correct_data::readCovariates(string fcov) {
 	if (n_excludedC > 0) vrb.bullet(stb.str(n_excludedC) + " covariate(s) excluded by user");
 	fd.close();
 }
+
+
+void correct_data::readQTLCovariates(string fqtl, string fvcf) {
+	string buffer; vector < string > tokens;
+
+	//File qtl
+	map < string, string > map_qtl;
+	vrb.title("Reading QTLs in [" + fqtl + "]");
+	input_file fd (fqtl);
+	if (fd.fail()) vrb.error("Cannot open file!");
+	while (getline(fd, buffer)) {
+		stb.split(buffer, tokens);
+		if (tokens.size() != 2) vrb.error("Wrong Incorrect number of columns, expected 2!");
+		map_qtl.insert(pair < string, string > (tokens[1], tokens[0]));
+	}
+	vrb.bullet(stb.str(map_qtl.size()) + " QTL(s) read");
+	fd.close();
+
+	//
+	for (int c = 0 ; c < covariate_count ; c ++) covariate_target.push_back("ALL");
+
+	//File VCF
+	vector < int > mappingS;
+	int n_includedG = 0, n_excludedG = 0, n_includedS = 0;
+	bcf_srs_t * sr =  bcf_sr_init();
+	if(!(bcf_sr_add_reader (sr, fvcf.c_str()))) {
+		switch (sr->errnum) {
+		case not_bgzf: vrb.error("File not compressed with bgzip!"); break;
+		case idx_load_failed: vrb.error("Impossible to load index file!"); break;
+		case file_type_error: vrb.error("File format not detected by htslib!"); break;
+		default : vrb.error("Unknown error!");
+		}
+	}
+	int n_samples = bcf_hdr_nsamples(sr->readers[0].header);
+	for (int i0 = 0 ; i0 < n_samples ; i0 ++) {
+		mappingS.push_back(findSample(string(sr->readers[0].header->samples[i0])));
+		if (mappingS.back() >= 0) n_includedS++;
+	}
+    unsigned int linecount=0;
+	int ngt, ngt_arr = 0, nds, nds_arr = 0, * gt_arr = NULL, nsl, nsl_arr = 0, * sl_arr = NULL;
+	float * ds_arr = NULL;
+	bcf1_t * line;
+	while(bcf_sr_next_line (sr)) {
+        linecount ++;
+        if (linecount % 100000 == 0) vrb.bullet("Read " + stb.str(linecount) + " lines");
+		line =  bcf_sr_get_line(sr, 0);
+		if (line->n_allele == 2) {
+			ngt = bcf_get_genotypes(sr->readers[0].header, line, &gt_arr, &ngt_arr);
+			nds = bcf_get_format_float(sr->readers[0].header, line,"DS", &ds_arr, &nds_arr);
+			if (nds == n_samples || ngt == 2*n_samples) {
+				bcf_unpack(line, BCF_UN_STR);
+				string sid = string(line->d.id);
+
+				map < string, string > :: iterator itM = map_qtl.find(sid);
+
+				if (itM != map_qtl.end() && filter_covariate.check(sid)) {
+					covariate_val.push_back(vector < string > (sample_count, "0"));
+					for(int i = 0 ; i < n_samples ; i ++) {
+						if (mappingS[i] >= 0) {
+							if (nds > 0) covariate_val.back()[mappingS[i]] = stb.str(ds_arr[i]);
+							else {
+								if (gt_arr[2*i+0] == bcf_gt_missing || gt_arr[2*i+1] == bcf_gt_missing) covariate_val.back()[mappingS[i]] ="NA";
+								else covariate_val.back()[mappingS[i]] = stb.str(bcf_gt_allele(gt_arr[2*i+0]) + bcf_gt_allele(gt_arr[2*i+1]));
+							}
+						}
+					}
+					covariate_target.push_back(itM->second);
+					covariate_count++;
+					n_includedG++;
+				} else n_excludedG ++;
+			}
+		}
+	}
+
+	//Finalize
+	free(gt_arr);
+	free(ds_arr);
+	bcf_sr_destroy(sr);
+	vrb.bullet(stb.str(n_includedG) + " variants included");
+	if (n_excludedG > 0) vrb.bullet(stb.str(n_excludedG) + " variants excluded");
+}
diff --git a/src/mode_fdensity/fdensity_process.cpp b/src/mode_fdensity/fdensity_process.cpp
index b34edc6..0efba73 100644
--- a/src/mode_fdensity/fdensity_process.cpp
+++ b/src/mode_fdensity/fdensity_process.cpp
@@ -13,6 +13,7 @@
  You should have received a copy of the GNU General Public License
  along with this program.  If not, see <http://www.gnu.org/licenses/>.*/
 
+
 #include "fdensity_data.h"
 
 void fdensity_data::buildIntervalTrees() {
@@ -45,6 +46,10 @@ void fdensity_data::buildIntervalTrees() {
 	for (int c = 0 ; c < n_chr ; c ++) Ctree[c] = IntervalTree < bool > (Ivec[c]);
 
 	//4. Build functional neighborhoods
+
+	int n_neg = 0, n_pos = 0, n_mid = 0;
+	int s_neg = 0, s_pos = 0, s_mid = 0;
+
 	basic_stats Rstat;
 	Itree = vector <  IntervalTree < bool > > (tss_count);
 	for (int t = 0 ; t < tss_count ; t ++) {
@@ -52,17 +57,38 @@ void fdensity_data::buildIntervalTrees() {
 		assert(itC != chr2idx.end());
 		vector < Interval < bool > > ann_in_cis;
 		Ctree[itC->second].findOverlapping(tss_pos[t] - window, tss_pos[t] + window, ann_in_cis);
-		Rstat.push(ann_in_cis.size() * 1.0);
+
+		IntervalStartSorter < bool > intervalStartSorter;
+		sort(ann_in_cis.begin(), ann_in_cis.end(), intervalStartSorter);
+
+        Rstat.push(ann_in_cis.size() * 1.0);
 		vector < Interval < bool > > Rvec;
-		if (!tss_neg[t]) for (int a = 0 ; a < ann_in_cis.size() ; a ++) Rvec.push_back(Interval < bool > (ann_in_cis[a].start - tss_pos[t], ann_in_cis[a].stop - tss_pos[t], true));
-		else for (int a = 0 ; a < ann_in_cis.size() ; a ++) Rvec.push_back(Interval < bool > (-1 * (ann_in_cis[a].stop - tss_pos[t]), -1 * (ann_in_cis[a].start - tss_pos[t]), true));
+
+		for (int a = 0 ; a < ann_in_cis.size() ; a ++) {
+
+			if (!tss_neg[t]) Rvec.push_back(Interval < bool > (ann_in_cis[a].start - tss_pos[t], ann_in_cis[a].stop - tss_pos[t], true));
+			else Rvec.push_back(Interval < bool > (-1 * (ann_in_cis[a].stop - tss_pos[t]), -1 * (ann_in_cis[a].start - tss_pos[t]), true));
+			assert(Rvec.back().start <= Rvec.back().stop);
+
+			if (Rvec.back().start < 0 && Rvec.back().stop < 0) {
+				s_neg += Rvec.back().stop - Rvec.back().start;
+				n_neg ++;
+			} else if (Rvec.back().start > 0 && Rvec.back().stop > 0) {
+				s_pos += Rvec.back().stop - Rvec.back().start;
+				n_pos ++;
+			} else {
+				s_mid += Rvec.back().stop - Rvec.back().start;
+				n_mid ++;
+			}
+		}
+		sort(Rvec.begin(), Rvec.end(), intervalStartSorter);
 		Itree[t] = IntervalTree < bool > (Rvec);
 	}
+
 	vrb.bullet("#annotated cis-windows = " + stb.str(Rstat.size()));
 	vrb.bullet("#annotations per cis-window = " + stb.str(Rstat.mean(), 2) + " +/- " + stb.str(Rstat.sd(), 2));
 }
 
-
 void fdensity_data::runDensityCalculation(string fout) {
 
 	vrb.title("Density analysis");
@@ -72,6 +98,7 @@ void fdensity_data::runDensityCalculation(string fout) {
 		int wto = w + bin - 1;
 		int n_annotation = 0;
 
+
 		for (int t = 0 ; t < tss_count ; t ++) {
 			vector < Interval < bool > > ann_in_bin;
 			Itree[t].findOverlapping(wfrom, wto, ann_in_bin);
@@ -80,5 +107,6 @@ void fdensity_data::runDensityCalculation(string fout) {
 
 		fdo << wfrom << " " << wto << " " << n_annotation << endl;
 	}
+
 	fdo.close();
 }
diff --git a/src/mode_fenrich/fenrich_management.cpp b/src/mode_fenrich/fenrich_management.cpp
index b8abb48..edf8ab7 100644
--- a/src/mode_fenrich/fenrich_management.cpp
+++ b/src/mode_fenrich/fenrich_management.cpp
@@ -61,10 +61,17 @@ void fenrich_data::mapAnnotation2QTL() {
 		assert(itC != chr2idx.end());
 		vector < Interval < bool > > ann_in_cis;
 		Ttree[itC->second].findOverlapping(tss_pos[t] - max_distance - 10000, tss_pos[t] + max_distance + 10000, ann_in_cis);
+		IntervalStartSorter < bool > intervalStartSorter;
+		sort(ann_in_cis.begin(), ann_in_cis.end(), intervalStartSorter);
 		Rstat.push(ann_in_cis.size() * 1.0);
 		vector < Interval < bool > > Rvec;
-		if (!tss_neg[t]) for (int a = 0 ; a < ann_in_cis.size() ; a ++) Rvec.push_back(Interval < bool > (ann_in_cis[a].start - tss_pos[t], ann_in_cis[a].stop - tss_pos[t], true));
-		else for (int a = 0 ; a < ann_in_cis.size() ; a ++) Rvec.push_back(Interval < bool > (-1 * (ann_in_cis[a].start - tss_pos[t]), -1 * (ann_in_cis[a].stop - tss_pos[t]), true));
+
+		for (int a = 0 ; a < ann_in_cis.size() ; a ++) {
+			if (!tss_neg[t]) Rvec.push_back(Interval < bool > (ann_in_cis[a].start - tss_pos[t], ann_in_cis[a].stop - tss_pos[t], true));
+			else Rvec.push_back(Interval < bool > (-1 * (ann_in_cis[a].stop - tss_pos[t]), -1 * (ann_in_cis[a].start - tss_pos[t]), true));
+			assert(Rvec.back().start <= Rvec.back().stop);
+		}
+		sort(Rvec.begin(), Rvec.end(), intervalStartSorter);
 		R[t] = IntervalTree < bool > (Rvec);
 	}
 	vrb.bullet("#annotated cis-windows = " + stb.str(Rstat.size()));
diff --git a/src/mode_match/match_process.cpp b/src/mode_match/match_process.cpp
index 309db27..e7c397a 100644
--- a/src/mode_match/match_process.cpp
+++ b/src/mode_match/match_process.cpp
@@ -20,6 +20,8 @@ void match_data::writeOutput(string filename) {
 	vrb.title("Write summary report in ["  + filename + "]");
 	output_file fd (filename);
 
+	fd << "SampleID n_geno_missing n_het_total n_hom_total n_het_covered n_hom_covered n_het_consistent n_hom_consistent perc_het_consistent perc_hom_consistent n_het_in_ase" << endl;
+
 	for (int i = 0 ; i < sample_count ; i ++) {
 		unsigned int n_mis_tot = 0, n_het_tot = 0, n_hom_tot = 0, n_het_cov = 0, n_hom_cov = 0, n_het_fit = 0, n_hom_fit = 0, n_het_ase = 0;
 		for (int r = 0; r < regions.size() ; r ++) {
@@ -61,7 +63,10 @@ void match_data::writeOutput(string filename) {
 		fd << " " << n_hom_cov;
 		fd << " " << n_het_fit;
 		fd << " " << n_hom_fit;
-		fd << " " << n_het_ase << endl;
+		fd << " " << n_het_fit/n_het_cov;
+		fd << " " << n_hom_fit/n_hom_cov;
+		fd << " " << n_het_ase;
+		fd << endl;
 	}
 	fd.close();
 }
diff --git a/src/mode_quan/quan_chunking.cpp b/src/mode_quan/quan_chunking.cpp
index e125e56..f26d981 100644
--- a/src/mode_quan/quan_chunking.cpp
+++ b/src/mode_quan/quan_chunking.cpp
@@ -34,9 +34,22 @@ void quan_data::setChunk(int k, int K){
     unsigned long int max_length =0 ;
     if (gene_grps.size() % K == 0) max_length = gene_grps.size() / K;
     else for ( unsigned long int l = 1 ; l * (K-1) < gene_grps.size(); l++ ) max_length = l;
-    unsigned long int start_idx = (k-1) * max_length;
-    unsigned long int end_idx = k * max_length;
+    unsigned long int start_idx, end_idx;
+    if (K * max_length < gene_grps.size()){
+		int diff = gene_grps.size() - (K * max_length);
+		if (k <= diff){
+			start_idx = (k-1) * (max_length + 1) + 1;
+			end_idx = k * (max_length + 1);
+		}else{
+			int prev = diff * (max_length + 1);
+			start_idx = (k-diff-1) * max_length + 1 + prev;
+			end_idx = (k-diff) * max_length + prev;
+		}
+	}else{
+		start_idx = (k-1) * max_length + 1;
+		end_idx = k * max_length;
+	}
     if (end_idx > gene_grps.size()) end_idx = gene_grps.size();
     gene_grps = vector < quan_gene_grp > (gene_grps.begin()+start_idx, gene_grps.begin()+end_idx);
     vrb.bullet("Number of gene groups in chunk [" + stb.str(k) + " / " + stb.str(K) +"] = " + stb.str(gene_grps.size()));
-}
\ No newline at end of file
+}
diff --git a/src/mode_quan/quan_data.h b/src/mode_quan/quan_data.h
index 314e980..b7e0914 100644
--- a/src/mode_quan/quan_data.h
+++ b/src/mode_quan/quan_data.h
@@ -197,7 +197,7 @@ typedef struct {     			// auxiliary data structure
     hts_itr_t * iter;			// NULL if a region not specified
     hts_idx_t * idx;
     int min_mapQ;				// mapQ filter
-    bool dup_remove;			// remove duplicates
+    bool dup_remove,fail_qc;	// remove duplicates, failed QC reads
     int max_intron_length;
     double max_mismatch_count;
     double max_mismatch_count_total;
@@ -253,9 +253,9 @@ public:
     //FILTERS
     int max_intron_length;
     double max_mismatch_count,max_mismatch_count_total;
-    unsigned int min_mapQ,max_read_length;
-    bool dup_remove,proper_pair,check_consistency,debug,merge,fraction_mm,fraction_mmt;
-    quan_data(){max_intron_length=0;max_mismatch_count=0.0;max_mismatch_count_total=0.0;min_mapQ=0;max_read_length=0;dup_remove=false;proper_pair=false;check_consistency=false;debug=false;merge=true;fraction_mm=false;fraction_mmt=false;}
+    unsigned int min_mapQ,max_read_length,min_exon;
+    bool dup_remove,proper_pair,check_consistency,debug,merge,fraction_mm,fraction_mmt,fail_qc,old_wrong_split;
+    quan_data(){max_intron_length=0;max_mismatch_count=0.0;max_mismatch_count_total=0.0;min_mapQ=0;max_read_length=0;dup_remove=false;proper_pair=false;check_consistency=false;debug=false;merge=true;fraction_mm=false;fraction_mmt=false;fail_qc=false;old_wrong_split=false;}
     
     void setChunk(int,int);
     void setRegion(string);
diff --git a/src/mode_quan/quan_main.cpp b/src/mode_quan/quan_main.cpp
index 61d23f0..dd0549d 100644
--- a/src/mode_quan/quan_main.cpp
+++ b/src/mode_quan/quan_main.cpp
@@ -37,12 +37,15 @@ void quan_main(vector < string > & argv) {
 
     boost::program_options::options_description opt_filters ("\x1B[32mFilters\33[0m");
     opt_filters.add_options()
-    	("filter-mapping-quality", boost::program_options::value< unsigned int >()->default_value(10), "Minimal phred mapping quality for a read to be considered.")
+    	("filter-mapping-quality", boost::program_options::value< unsigned int >()->default_value(10), "Minimal mapping quality for a read to be considered.")
 		("filter-mismatch", boost::program_options::value< double >()->default_value(-1.0,"OFF"), "Maximum mismatches allowed in a read. If between 0 and 1 taken as the fraction of read length. (Requires NM attribute)")
 		("filter-mismatch-total", boost::program_options::value< double >()->default_value(-1.0,"OFF"), "Maximum total mismatches allowed in paired reads. If between 0 and 1 taken as the fraction of combined read length. (Requires NM attribute)")
 		("check-proper-pairing", "If provided only properly paired reads according to the aligner that are in correct orientation will be considered. Otherwise all pairs in correct orientation will be considered.")
         ("check-consistency", "If provided checks the consistency of split reads with annotation, rather than pure overlap of one of the blocks of the split read.")
         ("no-merge", "If provided overlapping mate pairs will not be merged.")
+		("legacy-options", "Exactly replicate Dermitzakis lab original quantification script. (DO NOT USE UNLESS YOU HAVE A GOOD REASON). Sets --no-merge as well.")
+		("filter-failed-qc", "Remove fastq reads that fail sequencing QC (as indicated by the sequencer)")
+		("filter-min-exon", boost::program_options::value< unsigned int >()->default_value(0), "Minimal exon length to consider. Exons smaller than this will not be printed out in the exon quantifications, but will still count towards gene quantifications.")
 		("filter-remove-duplicates", "Remove duplicate sequencing reads in the process.");
     
     boost::program_options::options_description opt_parallel ("\x1B[32mParallelization\33[0m");
@@ -119,11 +122,28 @@ void quan_main(vector < string > & argv) {
         D.dup_remove = true;
     }
     
+    if (D.options.count("filter-failed-qc")){
+        vrb.bullet("Filtering reads flagged as failing QC");
+        D.fail_qc = true;
+    }
+
     if (D.options.count("no-merge")){
         vrb.bullet("Not merging overlapping mate pairs");
         D.merge = false;
     }
     
+    if (D.options.count("legacy-options")){
+    	if (!D.options.count("no-merge")) vrb.bullet("Not merging overlapping mate pairs");
+    	D.min_exon = 2;
+    	vrb.bullet("Excluding exons smaller than " + stb.str(D.min_exon) );
+        vrb.warning("You are using --legacy-options, do you know what you are doing?");
+        D.old_wrong_split = true;
+        D.merge = false;
+    }else{
+    	D.min_exon = D.options["filter-min-exon"].as < unsigned int > ();
+    	vrb.bullet("Excluding exons smaller than " + stb.str(D.min_exon) );
+    }
+
     if (D.options.count("gene-types")){
     	vector < string > t = D.options["gene-types"].as < vector < string > > ();
     	D.gene_types = set < string > (t.begin(),t.end());
diff --git a/src/mode_quan/quan_printResults.cpp b/src/mode_quan/quan_printResults.cpp
index 1e75b88..84554de 100644
--- a/src/mode_quan/quan_printResults.cpp
+++ b/src/mode_quan/quan_printResults.cpp
@@ -52,6 +52,7 @@ void quan_data::printBEDcount(string fout){
             for (int i = 0 ; i < bams.size(); i++) fdo << "\t" << gene_grps[gr].genes[g].read_count[i];
             fdo << endl;
             for (int e = 0 ; e < gene_grps[gr].genes[g].exons.size(); e++){
+            	if (gene_grps[gr].genes[g].exons[e].length < min_exon) continue;
                 fdoe << chr;
                 fdoe << "\t" << gene_grps[gr].genes[g].tss-1;
                 fdoe << "\t" << gene_grps[gr].genes[g].tss;
@@ -104,6 +105,7 @@ void quan_data::printBEDrpkm(string fout){
             for (int i = 0 ; i < bams.size(); i++) fdo << "\t" << ((gene_grps[gr].genes[g].read_count[i] * 1000.0) / (double) gene_grps[gr].genes[g].length) * (1000000.0 / (double)stats[i].exonic);
             fdo << endl;
             for (int e = 0 ; e < gene_grps[gr].genes[g].exons.size(); e++){
+            	if (gene_grps[gr].genes[g].exons[e].length < min_exon) continue;
                 fdoe << chr;
                 fdoe << "\t" << gene_grps[gr].genes[g].tss-1;
                 fdoe << "\t" << gene_grps[gr].genes[g].tss;
diff --git a/src/mode_quan/quan_readBAM.cpp b/src/mode_quan/quan_readBAM.cpp
index 54a2e07..6d850f1 100644
--- a/src/mode_quan/quan_readBAM.cpp
+++ b/src/mode_quan/quan_readBAM.cpp
@@ -36,7 +36,14 @@ int quan_data::read_bam(void *data, bam1_t *b, quan_stats &f, unsigned int &mmc)
 			continue;
 		}
         
-		if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL)) {
+		if (b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY)) {
+			f.failed[name] = 'u';
+            if (debug) cerr << f.failed[name] << "\t" << name << endl;
+			f.unmapped++;
+			continue;
+		}
+
+		if(aux->fail_qc && b->core.flag & BAM_FQCFAIL){
 			f.failed[name] = 'u';
             if (debug) cerr << f.failed[name] << "\t" << name << endl;
 			f.unmapped++;
@@ -172,6 +179,7 @@ void quan_data::readBams(){
         data->max_mismatch_count = max_mismatch_count;
         data->max_mismatch_count_total = max_mismatch_count_total;
         data->max_intron_length = max_intron_length;
+        data->fail_qc = fail_qc;
         data->fp = sam_open(fbam.c_str(), "r");
         if (data->fp == 0) vrb.error("Cannot open file! [" + fbam + "]");
         data->hdr = sam_hdr_read(data->fp);
@@ -204,8 +212,8 @@ void quan_data::readBams(){
                     for (int i = 0; i < c->n_cigar; ++i) {
                         int l = bam_cigar_oplen(cigar[i]);
                         char c = bam_cigar_opchr(cigar[i]);
-                        if(c=='S' || c=='D' || c =='H' || c=='P') continue;
-                        else if (c=='N' && l){
+                        if(c=='S' || c =='H' || c =='I' || c=='P') continue;
+                        else if ( (c=='N' || c == 'D' ) && l){
                             B.starts.push_back(bS);
                             B.ends.push_back(bS+bL-1);
                             B.lengths.push_back(bL);
@@ -233,12 +241,13 @@ void quan_data::readBams(){
                             	stat.mismatch+=2;
                             	continue;
                             }
-                            if(A.core.mtid != B.core.tid || (A.core.flag & BAM_FREVERSE) || !(B.core.flag & BAM_FREVERSE) || (B.core.pos < A.core.mpos) ){
+                            if(A.core.mtid != B.core.tid || (A.core.flag & BAM_FREVERSE) || !(B.core.flag & BAM_FREVERSE) || ( old_wrong_split && B.core.pos <= A.core.pos) || ( !old_wrong_split && B.core.pos < A.core.pos)){
                                 stat.failed[name] = 'p';
                                 if (debug) cerr << stat.failed[name] << "\t" << name << endl;
                                 stat.unpaired++;
                                 continue;
                             }
+
                             if (merge) A.merge(B);
                             stat.good +=2;
                             bool both_found = false;
@@ -254,7 +263,10 @@ void quan_data::readBams(){
                                                 idx = e;
                                                 any_found1 = true;
                                                 exon_overlap1.push_back(idx);
-                                                exon_overlap1_length.push_back(min(gene_grps[gr].genes[g].exons[idx].end, A.ends[i]) - max(gene_grps[gr].genes[g].exons[idx].start, A.starts[i]) + 1);
+                                                if ( old_wrong_split){
+                                                	int mul = ( (int) A.starts[i] / 100000 != (int) A.ends[i] / 100000 && (int) gene_grps[gr].genes[g].exons[idx].start / 100000  != (int) gene_grps[gr].genes[g].exons[idx].end / 100000 ) ?  2 : 1;
+                                                	exon_overlap1_length.push_back(gene_grps[gr].genes[g].exons[idx].end - A.starts[i] < A.ends[i] - A.starts[i] ? (gene_grps[gr].genes[g].exons[idx].end - A.starts[i] + 1) * mul : A.lengths[i] * mul);
+                                                }else exon_overlap1_length.push_back(min(gene_grps[gr].genes[g].exons[idx].end, A.ends[i]) - max(gene_grps[gr].genes[g].exons[idx].start, A.starts[i]) + 1);
                                                 exon_overlap1_length_total += exon_overlap1_length.back();
                                                 exon_map1.push_back(i);
                                             }
@@ -262,26 +274,8 @@ void quan_data::readBams(){
                                     }
                                     if (idx == -1) all_found1 = false;
                                 }
-                                if (!all_found1 && check_consistency){
-                                    if (debug){
-                                        cerr << "NCONS\t" << name<<endl;
-                                        cerr << gene_grps[gr].genes[g];
-                                        cerr << A;
-                                        cerr << "NCONS\t" << name<<endl;
-                                    }
-                                    continue;
-                                }
-                                if (!any_found1){
-                                    if (debug){
-                                        cerr << "NCONS\t" << name<<endl;
-                                        cerr << gene_grps[gr].genes[g];
-                                        cerr << A;
-                                        cerr << "NCONS\t" << name<<endl;
-                                    }
-                                    continue;
-                                }
-                                
-                                
+
+
                                 for (int i = 0 ; i < B.starts.size(); i++){
                                     int idx = -1;
                                     if (gene_grps[gr].genes[g].overlap(B.starts[i],B.ends[i])){
@@ -290,7 +284,10 @@ void quan_data::readBams(){
                                                 idx = e;
                                                 any_found2 = true;
                                                 exon_overlap2.push_back(idx);
-                                                exon_overlap2_length.push_back(min(gene_grps[gr].genes[g].exons[idx].end, B.ends[i]) - max(gene_grps[gr].genes[g].exons[idx].start, B.starts[i]) + 1);
+                                                if ( old_wrong_split) {
+                                                	int mul = ( (int) B.starts[i] / 100000 != (int) B.ends[i] / 100000 && (int) gene_grps[gr].genes[g].exons[idx].start / 100000  != (int) gene_grps[gr].genes[g].exons[idx].end / 100000 ) ?  2 : 1;
+                                                	exon_overlap2_length.push_back(gene_grps[gr].genes[g].exons[idx].end - B.starts[i] < B.ends[i] - B.starts[i] ? (gene_grps[gr].genes[g].exons[idx].end - B.starts[i] + 1) * mul: B.lengths[i] * mul);
+                                                }else exon_overlap2_length.push_back(min(gene_grps[gr].genes[g].exons[idx].end, B.ends[i]) - max(gene_grps[gr].genes[g].exons[idx].start, B.starts[i]) + 1);
                                                 exon_overlap2_length_total += exon_overlap2_length.back();
                                                 exon_map2.push_back(i);
                                             }
@@ -298,20 +295,41 @@ void quan_data::readBams(){
                                     }
                                     if (idx == -1) all_found2 = false;
                                 }
+
+                                if (!all_found1 && check_consistency){
+                                    if (debug){
+                                        cerr << "NCONSALL1\t" << name<<endl;
+                                        cerr << gene_grps[gr].genes[g];
+                                        cerr << A;
+                                        cerr << B;
+                                    }
+                                    continue;
+                                }
+                                //for debugging
+                                if (!any_found1){
+                                    if (debug){
+                                        cerr << "NCONSANY1\t" << name<<endl;
+                                        cerr << gene_grps[gr].genes[g];
+                                        cerr << A;
+                                        cerr << B;
+                                    }
+                                    continue;
+                                }
+                                
                                 if (!all_found2 && check_consistency){
                                     if (debug){
-                                        cerr << "NCONS\t" << name<<endl;
-                                        cerr << "NCONS\t" << name<<endl;
+                                        cerr << "NCONSALL2\t" << name<<endl;
                                         cerr << gene_grps[gr].genes[g];
+                                        cerr << A;
                                         cerr << B;
                                     }
                                     continue;
                                 }
                                 if (!any_found2){
                                     if(debug){
-                                        cerr << "NCONS\t" << name<<endl;
-                                        cerr << "NCONS\t" << name<<endl;
+                                        cerr << "NCONSANY2\t" << name<<endl;
                                         cerr << gene_grps[gr].genes[g];
+                                        cerr << A;
                                         cerr << B;
                                     }
                                     continue;
@@ -356,7 +374,10 @@ void quan_data::readBams(){
                                             idx = e;
                                             any_found2 = true;
                                             exon_overlap2.push_back(idx);
-                                            exon_overlap2_length.push_back(min(gene_grps[gr].genes[g].exons[idx].end, B.ends[i]) - max(gene_grps[gr].genes[g].exons[idx].start, B.starts[i]) + 1);
+                                            if ( old_wrong_split) {
+                                            	int mul = ( (int) B.starts[i] / 100000 != (int) B.ends[i] / 100000 && (int) gene_grps[gr].genes[g].exons[idx].start / 100000  != (int) gene_grps[gr].genes[g].exons[idx].end / 100000 ) ?  2 : 1;
+                                            	exon_overlap2_length.push_back(gene_grps[gr].genes[g].exons[idx].end - B.starts[i] < B.ends[i] - B.starts[i] ? (gene_grps[gr].genes[g].exons[idx].end - B.starts[i] + 1) * mul: B.lengths[i] * mul);
+                                            } else exon_overlap2_length.push_back(min(gene_grps[gr].genes[g].exons[idx].end, B.ends[i]) - max(gene_grps[gr].genes[g].exons[idx].start, B.starts[i]) + 1);
                                             exon_overlap2_length_total += exon_overlap2_length[i];
                                             exon_map2.push_back(i);
                                         }
diff --git a/src/mode_rtc/rtc_common.cpp b/src/mode_rtc/rtc_common.cpp
index 10dc2a6..154379e 100644
--- a/src/mode_rtc/rtc_common.cpp
+++ b/src/mode_rtc/rtc_common.cpp
@@ -214,7 +214,7 @@ void rtc_data::calculateRTC(string fout){
     if (fdo.fail()) vrb.error("Cannot open file [" + fout + "]");
     if (options.count("header") || (!options.count("chunk") && !options.count("region"))){
         fdo <<"other_variant our_variant phenotype phenotype_group other_variant_chr other_variant_start other_variant_rank our_variant_chr our_variant_start our_variant_rank phenotype_chr phenotype_start distance_between_variants distance_between_other_variant_and_pheno other_variant_region_index our_variant_region_index region_start region_end variant_count_in_region RTC D' r^2";
-        if (options.count("sample")) fdo << " p_value unique_picks_H0 unique_picks_H1 rtc_bin_start rtc_bin_end rtc_bin_H0_proportion rtc_bin_H1_proportion median_r^2 median_H0 median_H1 H0 H1";
+        if (options["sample"].as <unsigned int> ()) fdo << " p_value unique_picks_H0 unique_picks_H1 rtc_bin_start rtc_bin_end rtc_bin_H0_proportion rtc_bin_H1_proportion median_r^2 median_H0 median_H1 H0 H1";
         fdo << endl;
     }
     map < string ,vector < pairsToTestForRTC > >::iterator it;
diff --git a/src/mode_trans/trans_chunking.cpp b/src/mode_trans/trans_chunking.cpp
index b7319cb..efa52e0 100644
--- a/src/mode_trans/trans_chunking.cpp
+++ b/src/mode_trans/trans_chunking.cpp
@@ -25,7 +25,19 @@ void trans_data::setPhenotypeLines(int k, int K) {
     unsigned long int max_length =0 ;
     if (phenotype_count % K == 0) max_length = phenotype_count / K;
     else for ( unsigned long int l = 1 ; l * (K-1) < phenotype_count; l++ ) max_length = l;
-    start_line = (k-1) * max_length + 1;
-    end_line = k * max_length;
+    if (K * max_length < phenotype_count){
+    	int diff = phenotype_count - (K * max_length);
+    	if (k <= diff){
+        	start_line = (k-1) * (max_length + 1) + 1;
+        	end_line = k * (max_length + 1);
+    	}else{
+    		int prev = diff * (max_length + 1);
+        	start_line = (k-diff-1) * max_length + 1 + prev;
+        	end_line = (k-diff) * max_length + prev;
+    	}
+    }else{
+    	start_line = (k-1) * max_length + 1;
+    	end_line = k * max_length;
+    }
     if (end_line > phenotype_count) end_line = phenotype_count;
-}
\ No newline at end of file
+}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/qtltools.git



More information about the debian-med-commit mailing list