[med-svn] [eagle] 01/02: Imported Upstream version 2.3.1

Dylan Aïssi bob.dybian-guest at moszumanska.debian.org
Sun Mar 5 21:40:55 UTC 2017


This is an automated email from the git hooks/post-receive script.

bob.dybian-guest pushed a commit to branch master
in repository eagle.

commit e12c01b64a8d5f688fe4531f8783a384cfbca506
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date:   Sun Mar 5 22:40:34 2017 +0100

    Imported Upstream version 2.3.1
---
 example/example.log          |  70 ++++++++++++++++++++---------------------
 example/example_ref.log      |  21 +++++++------
 example/example_vcf.log      |  72 +++++++++++++++++++++----------------------
 example/phased.vcf.gz        | Bin 200389 -> 200385 bytes
 example/target.phased.vcf.gz | Bin 21049 -> 21050 bytes
 src/Eagle.cpp                |   8 +++++
 src/Eagle.hpp                |   4 +--
 src/EagleImpMiss.cpp         |   1 +
 src/EagleMain.cpp            |  16 +++++-----
 src/EaglePBWT.cpp            |  11 ++++---
 src/EagleParams.cpp          |  16 +++++++---
 src/FileUtils.cpp            |   4 +--
 src/GenoData.cpp             |   6 +++-
 src/HapHedge.cpp             |   2 +-
 src/MapInterpolater.cpp      |   8 ++++-
 src/SyncedVcfData.cpp        |  44 +++++++++++++++++++-------
 src/Version.hpp              |   4 +--
 17 files changed, 170 insertions(+), 117 deletions(-)

diff --git a/example/example.log b/example/example.log
index 394a0ed..bd2d755 100644
--- a/example/example.log
+++ b/example/example.log
@@ -1,13 +1,13 @@
                       +-----------------------------+
                       |                             |
-                      |   Eagle v2.3                |
-                      |   July 22, 2016             |
+                      |   Eagle v2.3.1              |
+                      |   March 3, 2017             |
                       |   Po-Ru Loh                 |
                       |                             |
                       +-----------------------------+
 
 Copyright (C) 2015-2016 Harvard University.
-Distributed under the GNU GPLv3 open source license.
+Distributed under the GNU GPLv3+ open source license.
 
 Command line options:
 
@@ -56,25 +56,25 @@ Setting --histFactor=1.00
 
 BEGINNING STEP 1
 
-Time for step 1: 0.373152
-Time for step 1 MN^2: 0.0242926
+Time for step 1: 0.380579
+Time for step 1 MN^2: 0.024638
 
-Making hard calls (time: 0.0283859)
+Making hard calls (time: 0.0283101)
 
 
 BEGINNING STEP 2
 
 BATCH 1 OF 1
 Building hash tables
-.................................................................. (time: 0.0523882)
+.................................................................. (time: 0.0659139)
 
 Phasing samples 1-379
-Time for phasing batch: 0.449098
+Time for phasing batch: 0.466222
 
-Making hard calls (time: 0.029567)
+Making hard calls (time: 0.029881)
 
-Time for step 2: 0.531062
-Time for step 2 MN^2: 0.0591508
+Time for step 2: 0.562044
+Time for step 2 MN^2: 0.0596148
 
 
 BEGINNING STEP 3 (PBWT ITERS)
@@ -87,108 +87,108 @@ BEGINNING PBWT ITER 1
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 1.75458
+Time for phasing batch: 1.84304
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 1.59901
+Time for phasing batch: 1.67975
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 1.69437
+Time for phasing batch: 1.79137
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 1.62289
+Time for phasing batch: 1.71214
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 1.59901
+Time for phasing batch: 1.62954
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 1.63321
+Time for phasing batch: 1.67016
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 1.63894
+Time for phasing batch: 1.66139
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 1.57722
+Time for phasing batch: 1.6077
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 1.56258
+Time for phasing batch: 1.63602
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 1.63269
+Time for phasing batch: 1.69574
 
-Time for PBWT iter 1: 16.3145
+Time for PBWT iter 1: 16.9269
 
 BEGINNING PBWT ITER 2
 
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 2.73515
+Time for phasing batch: 2.81822
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 2.58695
+Time for phasing batch: 2.59811
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 2.70714
+Time for phasing batch: 2.79582
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 2.54992
+Time for phasing batch: 2.55964
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 2.51631
+Time for phasing batch: 2.70656
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 2.62562
+Time for phasing batch: 2.66884
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 2.60083
+Time for phasing batch: 2.66954
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 2.4503
+Time for phasing batch: 2.51312
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 2.54366
+Time for phasing batch: 2.58987
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 2.56976
+Time for phasing batch: 2.608
 
-Time for PBWT iter 2: 25.8857
+Time for PBWT iter 2: 26.5278
 Writing .haps.gz and .sample output
-Time for writing output: 0.370972
-Total elapsed time for analysis = 43.6733 sec
+Time for writing output: 0.354415
+Total elapsed time for analysis = 44.9045 sec
diff --git a/example/example_ref.log b/example/example_ref.log
index e8fb1b7..f53b54d 100644
--- a/example/example_ref.log
+++ b/example/example_ref.log
@@ -1,13 +1,13 @@
                       +-----------------------------+
                       |                             |
-                      |   Eagle v2.3                |
-                      |   July 22, 2016             |
+                      |   Eagle v2.3.1              |
+                      |   March 3, 2017             |
                       |   Po-Ru Loh                 |
                       |                             |
                       +-----------------------------+
 
 Copyright (C) 2015-2016 Harvard University.
-Distributed under the GNU GPLv3 open source license.
+Distributed under the GNU GPLv3+ open source license.
 
 Command line options:
 
@@ -29,10 +29,11 @@ Target samples: Ntarget = 8
 [W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
 SNPs to analyze: M = 430 SNPs in both target and reference
 
-SNPs ignored: 0 SNPs in target but not reference
+SNPs ignored: 215 SNPs in target but not reference
+              --> WARNING: Check REF/ALT agreement between target and ref <--
               215 SNPs in reference but not target
-              0 multi-allelic SNPs
-              0 monomorphic SNPs
+              0 multi-allelic SNPs in target
+              0 monomorphic SNPs in target
 
 Missing rate in target genotypes: 0.00116279
 
@@ -57,15 +58,15 @@ PHASING ITER 1 OF 1
 
 Phasing target samples
 ................................................................................
-Time for phasing iter 1: 0.23367
-Writing vcf.gz output to target.phased.vcf.gz
+Time for phasing iter 1: 0.236171
+Writing vcf output to target.phased.vcf
 [W::vcf_parse] INFO 'AC' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'AN' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'DP' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'AFR_AF' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
-Time for writing output: 0.028584
-Total elapsed time for analysis = 9.43616 sec
+Time for writing output: 0.051106
+Total elapsed time for analysis = 9.22206 sec
 
 Mean phase confidence of each target individual:
 ID	PHASE_CONFIDENCE
diff --git a/example/example_vcf.log b/example/example_vcf.log
index 23decc9..6932e15 100644
--- a/example/example_vcf.log
+++ b/example/example_vcf.log
@@ -1,13 +1,13 @@
                       +-----------------------------+
                       |                             |
-                      |   Eagle v2.3                |
-                      |   July 22, 2016             |
+                      |   Eagle v2.3.1              |
+                      |   March 3, 2017             |
                       |   Po-Ru Loh                 |
                       |                             |
                       +-----------------------------+
 
 Copyright (C) 2015-2016 Harvard University.
-Distributed under the GNU GPLv3 open source license.
+Distributed under the GNU GPLv3+ open source license.
 
 Command line options:
 
@@ -39,25 +39,25 @@ Setting --histFactor=1.00
 
 BEGINNING STEP 1
 
-Time for step 1: 0.375713
-Time for step 1 MN^2: 0.0244864
+Time for step 1: 0.383094
+Time for step 1 MN^2: 0.024787
 
-Making hard calls (time: 0.028863)
+Making hard calls (time: 0.0283229)
 
 
 BEGINNING STEP 2
 
 BATCH 1 OF 1
 Building hash tables
-.................................................................. (time: 0.0508139)
+.................................................................. (time: 0.0526478)
 
 Phasing samples 1-379
-Time for phasing batch: 0.437996
+Time for phasing batch: 0.469969
 
-Making hard calls (time: 0.029588)
+Making hard calls (time: 0.029753)
 
-Time for step 2: 0.51841
-Time for step 2 MN^2: 0.0594165
+Time for step 2: 0.552398
+Time for step 2 MN^2: 0.0597794
 
 
 BEGINNING STEP 3 (PBWT ITERS)
@@ -70,108 +70,108 @@ BEGINNING PBWT ITER 1
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 1.77567
+Time for phasing batch: 1.77488
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 1.64262
+Time for phasing batch: 1.69304
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 1.71332
+Time for phasing batch: 1.74526
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 1.61786
+Time for phasing batch: 1.64103
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 1.6091
+Time for phasing batch: 1.66061
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 1.6461
+Time for phasing batch: 1.66104
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 1.651
+Time for phasing batch: 1.6719
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 1.58484
+Time for phasing batch: 1.58273
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 1.59481
+Time for phasing batch: 1.63833
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 1.63671
+Time for phasing batch: 1.6582
 
-Time for PBWT iter 1: 16.4721
+Time for PBWT iter 1: 16.727
 
 BEGINNING PBWT ITER 2
 
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 2.72875
+Time for phasing batch: 2.75391
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 2.59217
+Time for phasing batch: 2.66628
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 2.72183
+Time for phasing batch: 2.74627
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 2.57471
+Time for phasing batch: 2.55569
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 2.5283
+Time for phasing batch: 2.56301
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 2.64632
+Time for phasing batch: 2.63885
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 2.63781
+Time for phasing batch: 2.61097
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 2.52006
+Time for phasing batch: 2.51258
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 2.53771
+Time for phasing batch: 2.57989
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 2.51691
+Time for phasing batch: 2.57872
 
-Time for PBWT iter 2: 26.0046
-Writing vcf.gz output to phased.vcf.gz
-Time for writing output: 0.331217
-Total elapsed time for analysis = 52.9965 sec
+Time for PBWT iter 2: 26.2062
+Writing vcf output to phased.vcf
+Time for writing output: 0.168121
+Total elapsed time for analysis = 53.6103 sec
diff --git a/example/phased.vcf.gz b/example/phased.vcf.gz
index 33f7f98..962c54b 100644
Binary files a/example/phased.vcf.gz and b/example/phased.vcf.gz differ
diff --git a/example/target.phased.vcf.gz b/example/target.phased.vcf.gz
index 270221f..f74a9f3 100644
Binary files a/example/target.phased.vcf.gz and b/example/target.phased.vcf.gz differ
diff --git a/src/Eagle.cpp b/src/Eagle.cpp
index 45bb95c..4860b60 100644
--- a/src/Eagle.cpp
+++ b/src/Eagle.cpp
@@ -3221,7 +3221,15 @@ namespace EAGLE {
 		       char **argv) const {
 
     htsFile *htsTmp = hts_open(tmpFile.c_str(), "r");
+    if (htsTmp == NULL) {
+      cerr << "ERROR: Could not open temporary file " << tmpFile << endl;
+      exit(1);
+    }
     htsFile *out = hts_open(outFile.c_str(), writeMode.c_str());
+    if (out == NULL) {
+      cerr << "ERROR: Could not write to file " << outFile << endl;
+      exit(1);
+    }
     
     bcf_hdr_t *hdr = bcf_hdr_read(htsTmp);
     bcf_hdr_append_eagle_version(hdr, argc, argv);
diff --git a/src/Eagle.hpp b/src/Eagle.hpp
index 23c463a..dfe7bb8 100644
--- a/src/Eagle.hpp
+++ b/src/Eagle.hpp
@@ -173,10 +173,10 @@ namespace EAGLE {
     double runHMM(uint64 n0, uint64 nF1, uint64 nF2, int iter, uint beamWidth, uint maxHapStates);
     std::vector <bool> computeRefIsMono(const std::vector <uint> &bestHaps) const;
     float runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect, double histFactor,
-		  bool runReverse, bool useTargetHaps, bool impMissing);
+		  bool runReverse, bool useTargetHaps, bool impMissing, bool isChrX);
     float runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect, double histFactor,
 		  bool runReverse, bool useTargetHaps, bool impMissing, int usePS,
-		  const std::vector < std::pair <int, int> > &conPS);
+		  const std::vector < std::pair <int, int> > &conPS, bool isChrX);
     void imputeMissing(const HapHedge &hapHedge, uint64 n0);
     void writePhaseConfs(const std::string &tmpPhaseFile) const;
     void readPhaseConfs(const std::string &tmpPhaseFile);
diff --git a/src/EagleImpMiss.cpp b/src/EagleImpMiss.cpp
index f2d42bf..69601fe 100644
--- a/src/EagleImpMiss.cpp
+++ b/src/EagleImpMiss.cpp
@@ -18,6 +18,7 @@
 
 #include <vector>
 #include <iostream>
+#include <cmath>
 
 #include "HapHedge.hpp"
 #include "MemoryUtils.hpp"
diff --git a/src/EagleMain.cpp b/src/EagleMain.cpp
index 124ec1d..d2c28b1 100644
--- a/src/EagleMain.cpp
+++ b/src/EagleMain.cpp
@@ -136,7 +136,8 @@ void phaseWithRef(EagleParams &params, Timer &timer, double t0, int argc, char *
 	}
 	confs[i-Nref] = eagle.runPBWT
 	  (i, nF1, nF2, params.Kpbwt/(iter<iters?2:1), params.expectIBDcM, params.histFactor,
-	   iter==iters, iter>1, !params.noImpMissing, params.usePS, conPSall[i-Nref]);
+	   iter==iters, iter>1, !params.noImpMissing, params.usePS, conPSall[i-Nref],
+	   params.chrom==params.chromX);
 #ifdef OLD_IMP_MISSING
 	if (!params.noImpMissing) {
 	  Timer tim;
@@ -214,7 +215,7 @@ int main(int argc, char *argv[]) {
   cout << endl;
 
   cout << "Copyright (C) 2015-2016 Harvard University." << endl;
-  cout << "Distributed under the GNU GPLv3 open source license." << endl << endl;
+  cout << "Distributed under the GNU GPLv3+ open source license." << endl << endl;
 
   //cout << "Boost version: " << BOOST_LIB_VERSION << endl;
   //cout << endl;
@@ -274,8 +275,6 @@ int main(int argc, char *argv[]) {
 		     params.maxMissingPerSnp, params.maxMissingPerIndiv, params.noMapCheck,
 		     params.cMmax);
 
-  vector <double> invLD64j = genoData.computeInvLD64j(1000);
-  
   if (!params.usePBWT) { // Eagle v1 algorithm
     params.pbwtOnly = false; // should already be false
     params.runStep2 = 1;
@@ -292,6 +291,9 @@ int main(int argc, char *argv[]) {
       params.runStep2 = (genoData.getMseg64() >= 3U); // can't run Step 2 with < 3 SNP segments
   }
 
+  vector <double> invLD64j;
+  if (!params.pbwtOnly) invLD64j = genoData.computeInvLD64j(1000);
+  
   Eagle eagle(genoData.getN(), genoData.getMseg64(), genoData.getGenoBits(),
 	      genoData.getSeg64cMvecs(), genoData.getSeg64logPs(), invLD64j, genoData.getIndivs(),
 	      genoData.getSnps(), params.maskFile, genoData.getIsFlipped64j(), params.pErr,
@@ -462,14 +464,14 @@ int main(int argc, char *argv[]) {
 	  if (b == 1)
 	    for (uint att = 0; att < min(9U, (uint) children.size()); att++) // run on trios
 	      eagle.runPBWT(children[att], nF1s[att], nF2s[att], Kpbwt, params.expectIBDcM,
-			    params.histFactor, runReverse, true, false);
+			    params.histFactor, runReverse, true, false, params.chrom==params.chromX);
 
 	  uint iStart = (b-1)*N/numBatches, iEnd = b*N/numBatches;
 	  cout << endl << "Phasing samples " << (iStart+1) << "-" << iEnd << endl;
 #pragma omp parallel for reduction(+:timeImpMissing) schedule(dynamic, 4)
 	  for (uint i = iStart; i < iEnd; i++) {	
 	    eagle.runPBWT(i, -1, -1, Kpbwt, params.expectIBDcM, params.histFactor, runReverse,
-			  true, true);
+			  true, true, params.chrom==params.chromX);
 #ifdef OLD_IMP_MISSING
 	    Timer tim;
 	    eagle.imputeMissing(hapHedge, i);
@@ -586,7 +588,7 @@ int main(int argc, char *argv[]) {
       eagle.checkTrioErrorRate(i, nF1, nF2);
 #ifdef USE_PBWT
       eagle.runPBWT(i, nF1, nF2, params.Kpbwt, params.expectIBDcM, params.histFactor, true, false,
-		    !params.noImpMissing);
+		    !params.noImpMissing, params.chrom==params.chromX);
 #else
       timeMN2 += params.iter == 2 ? eagle.findLongHapMatches(i, nF1, nF2, params.iter)
 	: eagle.runHMM(i, nF1, nF2, params.iter,
diff --git a/src/EaglePBWT.cpp b/src/EaglePBWT.cpp
index 94ffeb6..138027f 100644
--- a/src/EaglePBWT.cpp
+++ b/src/EaglePBWT.cpp
@@ -105,15 +105,16 @@ namespace EAGLE {
   }
 
   float Eagle::runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect,
-		       double histFactor, bool runReverse, bool useTargetHaps, bool impMissing) {
+		       double histFactor, bool runReverse, bool useTargetHaps, bool impMissing,
+		       bool isChrX) {
     vector < pair <int, int> > noConPS;
     return runPBWT(n0, nF1, nF2, Kpbwt, cMexpect, histFactor, runReverse, useTargetHaps,
-		   impMissing, 0, noConPS);
+		   impMissing, 0, noConPS, isChrX);
   }
 
   float Eagle::runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect,
 		       double histFactor, bool runReverse, bool useTargetHaps, bool impMissing,
-		       int usePS, const vector < pair <int, int> > &conPS) {
+		       int usePS, const vector < pair <int, int> > &conPS, bool isChrX) {
     Timer timer;
     
     vector <uint> m64jInds(Mseg64*64+1);
@@ -212,11 +213,11 @@ namespace EAGLE {
     splitGenos.push_back(0); // pad on right with 0 (to match hapBitsT)
 
     // check for 0 or 1 het (warn)
-    if (hets64j.size() <= 1) {
+    if (!isChrX && hets64j.size() <= 1) {
       cerr << "WARNING: Sample " << n0-Nref+1 << " (1-indexed) has a het count of "
 	   << hets64j.size() << endl;
     }
-
+    
     // compute recombination probabilities
     vector <double> cMcoords(splits64j.size()+2);
     for (uint64 s = 0; s <= splits64j.size(); s++) {
diff --git a/src/EagleParams.cpp b/src/EagleParams.cpp
index 548b9cb..7eb084a 100644
--- a/src/EagleParams.cpp
+++ b/src/EagleParams.cpp
@@ -81,7 +81,7 @@ namespace EAGLE {
        "tabix-indexed [compressed] VCF/BCF file for reference haplotypes")
       ("vcfTarget", po::value<string>(&vcfTarget),
        "tabix-indexed [compressed] VCF/BCF file for target genotypes")
-      ("vcfOutFormat", po::value<string>(&vcfOutFormat)->default_value("z"),
+      ("vcfOutFormat", po::value<string>(&vcfOutFormat)->default_value("."),
        "b|u|z|v: compressed BCF (b), uncomp BCF (u), compressed VCF (z), uncomp VCF (v)")
       ("noImpMissing", "disable imputation of missing ./. target genotypes")
       ("allowRefAltSwap", "allow swapping of REF/ALT in target vs. ref VCF")
@@ -196,6 +196,12 @@ namespace EAGLE {
 	return false;
       }
 
+      if (vm.count("vcf") + vm.count("vcfRef") + vm.count("vcfTarget") == 0 &&
+	  vcfOutFormat != ".") {
+	cerr << "ERROR: --vcfOutFormat can only be used with vcf input" << endl;
+	return false;
+      }
+
       if (vm.count("bfile")) {
 	string bfile = vm["bfile"].as<string>();
 	famFile = bfile + ".fam";
@@ -263,7 +269,9 @@ namespace EAGLE {
 	if (vcfOutFormat == "b") { vcfOutSuffix = "bcf"; vcfWriteMode = "wb"; }
 	else if (vcfOutFormat == "u") { vcfOutSuffix = "bcf"; vcfWriteMode = "wbu"; }
 	else if (vcfOutFormat == "z") { vcfOutSuffix = "vcf.gz"; vcfWriteMode = "wz"; }
-	else if (vcfOutFormat == "v") { vcfOutSuffix = "vcf"; vcfWriteMode = "w"; }
+	else if (vcfOutFormat == "v" || vcfOutFormat == ".") {
+	  vcfOutSuffix = "vcf"; vcfWriteMode = "w";
+	}
 	else {
 	  cerr << "ERROR: --vcfOutFormat must be one of {b,u,z,v}" << endl;
 	  return false;
@@ -312,8 +320,8 @@ namespace EAGLE {
       }
       chrom = StringUtils::bcfNameToChrom(chromStr.c_str(), 0, chromX); // checks for range
 
-      if (pbwtIters < 0 || pbwtIters > 3) {
-	cerr << "ERROR: --pbwtIters must be either 0=auto, 1, 2, or 3" << endl;
+      if (pbwtIters < 0 || pbwtIters > 10) {
+	cerr << "ERROR: --pbwtIters must be either 0=auto or <=10" << endl;
 	return false;
       }
 
diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp
index 805a41e..b3bc289 100644
--- a/src/FileUtils.cpp
+++ b/src/FileUtils.cpp
@@ -151,7 +151,7 @@ namespace FileUtils {
   }
 
   AutoGzIfstream::operator bool() const {
-    return boost_in;
+    return !boost_in.fail();
   }
 
   AutoGzIfstream& AutoGzIfstream::read(char *s, std::streamsize n) {
@@ -209,7 +209,7 @@ namespace FileUtils {
   }
 
   AutoGzOfstream::operator bool() const {
-    return boost_out;
+    return !boost_out.fail();
   }
 
 }
diff --git a/src/GenoData.cpp b/src/GenoData.cpp
index 15f27ff..6d16d4e 100644
--- a/src/GenoData.cpp
+++ b/src/GenoData.cpp
@@ -594,7 +594,7 @@ namespace EAGLE {
 	}
 	else {
 	  if (numSnpsFailedQC < 5)
-	    cout << "Filtering snp " << snpsPreQC[mbed].ID << ": "
+	    cout << "Filtering snp " << snpsPreQC[m].ID << ": "
 		 << snpsPreQC[m].miss << " missing" << endl;
 	  numSnpsFailedQC++;
 	}
@@ -680,6 +680,10 @@ namespace EAGLE {
 			 bool noMapCheck, double cMmax) {
     
     htsFile *fin = hts_open(vcfFile.c_str(), "r");
+    if (fin == NULL) {
+      cerr << "ERROR: Could not open " << vcfFile << " for reading" << endl;
+      exit(1);
+    }
     bcf_hdr_t *hdr = bcf_hdr_read(fin);
     bcf1_t *rec = bcf_init1();
     int mgt = 0, *gt = NULL;
diff --git a/src/HapHedge.cpp b/src/HapHedge.cpp
index bbeb0ed..f98bf49 100644
--- a/src/HapHedge.cpp
+++ b/src/HapHedge.cpp
@@ -143,7 +143,7 @@ namespace EAGLE {
 	for (uint64 h = 0; h <= splits64j.size(); h++) {
 	  // set hom
 	  uint64 homStart = (h == 0 ? 0 : splits64j[h-1]+1);
-	  uint64 homStop = (h == splits64j.size() ? Mseg64*64 : splits64j[h]) - 1;
+	  uint64 homStop = (h == splits64j.size() ? Mseg64*64 : std::max(splits64j[h], 1ULL)) - 1;
 	  int numErrs = 0;
 	  for (uint64 m64 = (homStart>>6); m64 <= (homStop>>6); m64++) {
 	    uint64 mask = -1ULL;
diff --git a/src/MapInterpolater.cpp b/src/MapInterpolater.cpp
index e7c35a5..b3d1f93 100644
--- a/src/MapInterpolater.cpp
+++ b/src/MapInterpolater.cpp
@@ -56,8 +56,14 @@ namespace Genetics {
     int chr0 = 0, bp0 = 0; double gen0 = 0;
     int chr, bp; double rate, gen;
     while (fin >> chr >> bp >> rate >> gen) {
-      if (chr == chr0)
+      if (chr == chr0) {
+	if (gen<gen0 || bp<bp0) {
+	  cerr << "ERROR: Genetic map contains out-of-order row:" << endl;
+	  cerr << "       " << chr << " " << bp << " " << rate << " " << gen << endl;
+	  exit(1);
+	}
 	chrBpToRateGen[make_pair(chr, bp)] = make_pair((gen-gen0)/(1e-6*(bp-bp0)), gen);
+      }
       chr0 = chr; bp0 = bp; gen0 = gen;
     }
     chrBpToRateGen[make_pair(chr, bp+1e9)] = make_pair(1.0, gen+1e3); // sentinel at end
diff --git a/src/SyncedVcfData.cpp b/src/SyncedVcfData.cpp
index 765a37e..4e72e3d 100644
--- a/src/SyncedVcfData.cpp
+++ b/src/SyncedVcfData.cpp
@@ -45,7 +45,7 @@ namespace EAGLE {
   void process_ref_genotypes(int nsmpl, int ngt, int32_t *gt, bool allowHaploid, bool refAltSwap,
 			     vector <bool> &hapsRef, int &numMissing, int &numUnphased, uint &w) {
     numMissing = numUnphased = 0;
-    if (ngt != 2*nsmpl) {
+    if (ngt != 2*nsmpl && !(ngt==nsmpl && allowHaploid)) {
       cerr << "ERROR: ref ploidy != 2 (ngt != 2*nsmpl): ngt="
 	   << ngt << ", nsmpl=" << nsmpl << endl;
       exit(1);
@@ -85,6 +85,7 @@ namespace EAGLE {
 	      }
 	    }
 	  }
+	if (ploidy==1) haps[1] = haps[0];
 	if (missing) {
 	  haps[0] = haps[1] = 0; // set both alleles to REF allele
 	  numMissing++;
@@ -106,7 +107,7 @@ namespace EAGLE {
   void process_target_genotypes(int nsmpl, int ngt, int32_t *gt, bool allowHaploid,
 				vector <uchar> &genosTarget, int &numMissing) {
     numMissing = 0;
-    if (ngt != 2*nsmpl) {
+    if (ngt != 2*nsmpl && !(ngt==nsmpl && allowHaploid)) {
       cerr << "ERROR: target ploidy != 2 (ngt != 2*nsmpl): ngt="
 	   << ngt << ", nsmpl=" << nsmpl << endl;
       exit(1);
@@ -153,6 +154,7 @@ namespace EAGLE {
 	  g = 9;
 	  numMissing++;
 	}
+    else if (ploidy==1) g *= 2;   // encode as diploid homozygote
 	genosTarget.push_back(g);
       }
   }
@@ -171,7 +173,13 @@ namespace EAGLE {
     if ( chrom!=0 )
     {
         kstring_t str = {0,0,0};
-        ksprintf(&str,"%d:%d-%d",chrom,(uint32_t)bpStart,(uint32_t)bpEnd);
+        if ( chrom==chromX )
+        {
+            // this is not perfect, better would be to have the chr name passed to us
+            ksprintf(&str,"X:%d-%d,chrX:%d-%d",(uint32_t)bpStart,(uint32_t)bpEnd,(uint32_t)bpStart,(uint32_t)bpEnd);
+        }
+        else
+            ksprintf(&str,"%d:%d-%d",chrom,(uint32_t)bpStart,(uint32_t)bpEnd);
         if ( bcf_sr_set_regions(sr, str.s, 0)!=0 )
         {
             cerr << "ERROR: failed to initialize the region:" << str.s;
@@ -230,7 +238,8 @@ namespace EAGLE {
     cout << "Target samples: Ntarget = " << Ntarget << endl;
 
     M = 0;
-    uint MtargetOnly = 0, MrefOnly = 0, MmultiAllelic = 0, Mmonomorphic = 0;
+    uint MtargetOnly = 0, MrefOnly = 0, MmultiAllelicTgt = 0, MmonomorphicTgt = 0,
+      MmultiAllelicRef = 0, MmonomorphicRef = 0;
     uint MwithMissingRef = 0, MwithUnphasedRef = 0, MnotInRegion = 0, MnotOnChrom = 0;
     uint MrefAltError = 0, numRefAltSwaps = 0;
     uint64 GmissingRef = 0, GunphasedRef = 0, GmissingTarget = 0;
@@ -246,11 +255,16 @@ namespace EAGLE {
 	bcf1_t *tgt = bcf_sr_get_line(sr, 1);
 	if ( !ref ) {
 	  //fprintf(stderr, "onlyT .. %s:%d\n", bcf_seqname(tgt_hdr, tgt), tgt->pos+1);
-	  MtargetOnly++;
+	  bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
+	  if (strcmp(tgt->d.allele[1], ".") != 0) // report if polymorphic in target
+	    MtargetOnly++;
 	  continue;
 	}
 	if ( !tgt ) {
 	  //fprintf(stderr, "onlyR .. %s:%d\n", bcf_seqname(ref_hdr, ref), ref->pos+1);
+	  bcf_unpack(ref, BCF_UN_STR); // unpack thru ALT
+	  if (strcmp(ref->d.allele[1], ".") != 0) // report if polymorphic in ref
+	    MtargetOnly++;
 	  MrefOnly++;
 	  continue;
 	}
@@ -259,19 +273,23 @@ namespace EAGLE {
 	// filter out multi-allelic and monomorphic markers
 	int ntgt_gt = bcf_get_genotypes(tgt_hdr, tgt, &tgt_gt, &mtgt_gt);
 	if (tgt->n_allele > 2) {
-	  MmultiAllelic++;
+	  MmultiAllelicTgt++;
 	  continue;
 	}
 	if (tgt->n_allele < 2) {
-	  Mmonomorphic++;
+	  MmonomorphicTgt++;
 	  continue;
 	}
 
 	bool refAltSwap = false;
 
         if (allowRefAltSwap) { // perform further error-checking
-	  if (tgt->n_allele != 2 || ref->n_allele != 2) {
-	    MrefAltError++;
+	  if (ref->n_allele > 2) {
+	    MmultiAllelicRef++;
+	    continue;
+	  }
+	  if (ref->n_allele < 2) {
+	    MmonomorphicRef++;
 	    continue;
 	  }
 	  bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
@@ -375,8 +393,12 @@ namespace EAGLE {
     if (MnotInRegion)
       cout << "              " << MnotInRegion << " SNPs not in selected region (+ flanks)"
 	   << endl;
-    cout << "              " << MmultiAllelic << " multi-allelic SNPs" << endl;
-    cout << "              " << Mmonomorphic << " monomorphic SNPs" << endl;
+    cout << "              " << MmultiAllelicTgt << " multi-allelic SNPs in target" << endl;
+    cout << "              " << MmonomorphicTgt << " monomorphic SNPs in target" << endl;
+    if (MmultiAllelicRef)
+      cout << "              " << MmultiAllelicRef << " SNPs biallelic in target but multi-allelic in ref" << endl;
+    if (MmonomorphicRef)
+      cout << "              " << MmonomorphicRef << " SNPs biallelic in target but monomorphic in ref" << endl;
     if (MrefAltError)
       cout << "              " << MrefAltError << " SNPs with REF/ALT matching errors" << endl;
     cout << endl;
diff --git a/src/Version.hpp b/src/Version.hpp
index d527e6f..f0d41d8 100644
--- a/src/Version.hpp
+++ b/src/Version.hpp
@@ -19,7 +19,7 @@
 #ifndef VERSION_HPP
 #define VERSION_HPP
 
-const char EAGLE_VERSION[] = "2.3";
-const char EAGLE_VERSION_DATE[] = "July 22, 2016";
+const char EAGLE_VERSION[] = "2.3.1";
+const char EAGLE_VERSION_DATE[] = "March 3, 2017";
 
 #endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/eagle.git



More information about the debian-med-commit mailing list