[med-svn] [eagle] 01/02: Imported Upstream version 2.3.1
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Sun Mar 5 21:40:55 UTC 2017
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository eagle.
commit e12c01b64a8d5f688fe4531f8783a384cfbca506
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Sun Mar 5 22:40:34 2017 +0100
Imported Upstream version 2.3.1
---
example/example.log | 70 ++++++++++++++++++++---------------------
example/example_ref.log | 21 +++++++------
example/example_vcf.log | 72 +++++++++++++++++++++----------------------
example/phased.vcf.gz | Bin 200389 -> 200385 bytes
example/target.phased.vcf.gz | Bin 21049 -> 21050 bytes
src/Eagle.cpp | 8 +++++
src/Eagle.hpp | 4 +--
src/EagleImpMiss.cpp | 1 +
src/EagleMain.cpp | 16 +++++-----
src/EaglePBWT.cpp | 11 ++++---
src/EagleParams.cpp | 16 +++++++---
src/FileUtils.cpp | 4 +--
src/GenoData.cpp | 6 +++-
src/HapHedge.cpp | 2 +-
src/MapInterpolater.cpp | 8 ++++-
src/SyncedVcfData.cpp | 44 +++++++++++++++++++-------
src/Version.hpp | 4 +--
17 files changed, 170 insertions(+), 117 deletions(-)
diff --git a/example/example.log b/example/example.log
index 394a0ed..bd2d755 100644
--- a/example/example.log
+++ b/example/example.log
@@ -1,13 +1,13 @@
+-----------------------------+
| |
- | Eagle v2.3 |
- | July 22, 2016 |
+ | Eagle v2.3.1 |
+ | March 3, 2017 |
| Po-Ru Loh |
| |
+-----------------------------+
Copyright (C) 2015-2016 Harvard University.
-Distributed under the GNU GPLv3 open source license.
+Distributed under the GNU GPLv3+ open source license.
Command line options:
@@ -56,25 +56,25 @@ Setting --histFactor=1.00
BEGINNING STEP 1
-Time for step 1: 0.373152
-Time for step 1 MN^2: 0.0242926
+Time for step 1: 0.380579
+Time for step 1 MN^2: 0.024638
-Making hard calls (time: 0.0283859)
+Making hard calls (time: 0.0283101)
BEGINNING STEP 2
BATCH 1 OF 1
Building hash tables
-.................................................................. (time: 0.0523882)
+.................................................................. (time: 0.0659139)
Phasing samples 1-379
-Time for phasing batch: 0.449098
+Time for phasing batch: 0.466222
-Making hard calls (time: 0.029567)
+Making hard calls (time: 0.029881)
-Time for step 2: 0.531062
-Time for step 2 MN^2: 0.0591508
+Time for step 2: 0.562044
+Time for step 2 MN^2: 0.0596148
BEGINNING STEP 3 (PBWT ITERS)
@@ -87,108 +87,108 @@ BEGINNING PBWT ITER 1
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 1.75458
+Time for phasing batch: 1.84304
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 1.59901
+Time for phasing batch: 1.67975
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 1.69437
+Time for phasing batch: 1.79137
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 1.62289
+Time for phasing batch: 1.71214
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 1.59901
+Time for phasing batch: 1.62954
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 1.63321
+Time for phasing batch: 1.67016
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 1.63894
+Time for phasing batch: 1.66139
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 1.57722
+Time for phasing batch: 1.6077
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 1.56258
+Time for phasing batch: 1.63602
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 1.63269
+Time for phasing batch: 1.69574
-Time for PBWT iter 1: 16.3145
+Time for PBWT iter 1: 16.9269
BEGINNING PBWT ITER 2
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 2.73515
+Time for phasing batch: 2.81822
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 2.58695
+Time for phasing batch: 2.59811
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 2.70714
+Time for phasing batch: 2.79582
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 2.54992
+Time for phasing batch: 2.55964
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 2.51631
+Time for phasing batch: 2.70656
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 2.62562
+Time for phasing batch: 2.66884
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 2.60083
+Time for phasing batch: 2.66954
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 2.4503
+Time for phasing batch: 2.51312
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 2.54366
+Time for phasing batch: 2.58987
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 2.56976
+Time for phasing batch: 2.608
-Time for PBWT iter 2: 25.8857
+Time for PBWT iter 2: 26.5278
Writing .haps.gz and .sample output
-Time for writing output: 0.370972
-Total elapsed time for analysis = 43.6733 sec
+Time for writing output: 0.354415
+Total elapsed time for analysis = 44.9045 sec
diff --git a/example/example_ref.log b/example/example_ref.log
index e8fb1b7..f53b54d 100644
--- a/example/example_ref.log
+++ b/example/example_ref.log
@@ -1,13 +1,13 @@
+-----------------------------+
| |
- | Eagle v2.3 |
- | July 22, 2016 |
+ | Eagle v2.3.1 |
+ | March 3, 2017 |
| Po-Ru Loh |
| |
+-----------------------------+
Copyright (C) 2015-2016 Harvard University.
-Distributed under the GNU GPLv3 open source license.
+Distributed under the GNU GPLv3+ open source license.
Command line options:
@@ -29,10 +29,11 @@ Target samples: Ntarget = 8
[W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
SNPs to analyze: M = 430 SNPs in both target and reference
-SNPs ignored: 0 SNPs in target but not reference
+SNPs ignored: 215 SNPs in target but not reference
+ --> WARNING: Check REF/ALT agreement between target and ref <--
215 SNPs in reference but not target
- 0 multi-allelic SNPs
- 0 monomorphic SNPs
+ 0 multi-allelic SNPs in target
+ 0 monomorphic SNPs in target
Missing rate in target genotypes: 0.00116279
@@ -57,15 +58,15 @@ PHASING ITER 1 OF 1
Phasing target samples
................................................................................
-Time for phasing iter 1: 0.23367
-Writing vcf.gz output to target.phased.vcf.gz
+Time for phasing iter 1: 0.236171
+Writing vcf output to target.phased.vcf
[W::vcf_parse] INFO 'AC' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'AN' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'DP' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'AFR_AF' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
-Time for writing output: 0.028584
-Total elapsed time for analysis = 9.43616 sec
+Time for writing output: 0.051106
+Total elapsed time for analysis = 9.22206 sec
Mean phase confidence of each target individual:
ID PHASE_CONFIDENCE
diff --git a/example/example_vcf.log b/example/example_vcf.log
index 23decc9..6932e15 100644
--- a/example/example_vcf.log
+++ b/example/example_vcf.log
@@ -1,13 +1,13 @@
+-----------------------------+
| |
- | Eagle v2.3 |
- | July 22, 2016 |
+ | Eagle v2.3.1 |
+ | March 3, 2017 |
| Po-Ru Loh |
| |
+-----------------------------+
Copyright (C) 2015-2016 Harvard University.
-Distributed under the GNU GPLv3 open source license.
+Distributed under the GNU GPLv3+ open source license.
Command line options:
@@ -39,25 +39,25 @@ Setting --histFactor=1.00
BEGINNING STEP 1
-Time for step 1: 0.375713
-Time for step 1 MN^2: 0.0244864
+Time for step 1: 0.383094
+Time for step 1 MN^2: 0.024787
-Making hard calls (time: 0.028863)
+Making hard calls (time: 0.0283229)
BEGINNING STEP 2
BATCH 1 OF 1
Building hash tables
-.................................................................. (time: 0.0508139)
+.................................................................. (time: 0.0526478)
Phasing samples 1-379
-Time for phasing batch: 0.437996
+Time for phasing batch: 0.469969
-Making hard calls (time: 0.029588)
+Making hard calls (time: 0.029753)
-Time for step 2: 0.51841
-Time for step 2 MN^2: 0.0594165
+Time for step 2: 0.552398
+Time for step 2 MN^2: 0.0597794
BEGINNING STEP 3 (PBWT ITERS)
@@ -70,108 +70,108 @@ BEGINNING PBWT ITER 1
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 1.77567
+Time for phasing batch: 1.77488
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 1.64262
+Time for phasing batch: 1.69304
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 1.71332
+Time for phasing batch: 1.74526
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 1.61786
+Time for phasing batch: 1.64103
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 1.6091
+Time for phasing batch: 1.66061
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 1.6461
+Time for phasing batch: 1.66104
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 1.651
+Time for phasing batch: 1.6719
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 1.58484
+Time for phasing batch: 1.58273
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 1.59481
+Time for phasing batch: 1.63833
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 1.63671
+Time for phasing batch: 1.6582
-Time for PBWT iter 1: 16.4721
+Time for PBWT iter 1: 16.727
BEGINNING PBWT ITER 2
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 2.72875
+Time for phasing batch: 2.75391
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 2.59217
+Time for phasing batch: 2.66628
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 2.72183
+Time for phasing batch: 2.74627
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 2.57471
+Time for phasing batch: 2.55569
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 2.5283
+Time for phasing batch: 2.56301
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 2.64632
+Time for phasing batch: 2.63885
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 2.63781
+Time for phasing batch: 2.61097
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 2.52006
+Time for phasing batch: 2.51258
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 2.53771
+Time for phasing batch: 2.57989
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 2.51691
+Time for phasing batch: 2.57872
-Time for PBWT iter 2: 26.0046
-Writing vcf.gz output to phased.vcf.gz
-Time for writing output: 0.331217
-Total elapsed time for analysis = 52.9965 sec
+Time for PBWT iter 2: 26.2062
+Writing vcf output to phased.vcf
+Time for writing output: 0.168121
+Total elapsed time for analysis = 53.6103 sec
diff --git a/example/phased.vcf.gz b/example/phased.vcf.gz
index 33f7f98..962c54b 100644
Binary files a/example/phased.vcf.gz and b/example/phased.vcf.gz differ
diff --git a/example/target.phased.vcf.gz b/example/target.phased.vcf.gz
index 270221f..f74a9f3 100644
Binary files a/example/target.phased.vcf.gz and b/example/target.phased.vcf.gz differ
diff --git a/src/Eagle.cpp b/src/Eagle.cpp
index 45bb95c..4860b60 100644
--- a/src/Eagle.cpp
+++ b/src/Eagle.cpp
@@ -3221,7 +3221,15 @@ namespace EAGLE {
char **argv) const {
htsFile *htsTmp = hts_open(tmpFile.c_str(), "r");
+ if (htsTmp == NULL) {
+ cerr << "ERROR: Could not open temporary file " << tmpFile << endl;
+ exit(1);
+ }
htsFile *out = hts_open(outFile.c_str(), writeMode.c_str());
+ if (out == NULL) {
+ cerr << "ERROR: Could not write to file " << outFile << endl;
+ exit(1);
+ }
bcf_hdr_t *hdr = bcf_hdr_read(htsTmp);
bcf_hdr_append_eagle_version(hdr, argc, argv);
diff --git a/src/Eagle.hpp b/src/Eagle.hpp
index 23c463a..dfe7bb8 100644
--- a/src/Eagle.hpp
+++ b/src/Eagle.hpp
@@ -173,10 +173,10 @@ namespace EAGLE {
double runHMM(uint64 n0, uint64 nF1, uint64 nF2, int iter, uint beamWidth, uint maxHapStates);
std::vector <bool> computeRefIsMono(const std::vector <uint> &bestHaps) const;
float runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect, double histFactor,
- bool runReverse, bool useTargetHaps, bool impMissing);
+ bool runReverse, bool useTargetHaps, bool impMissing, bool isChrX);
float runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect, double histFactor,
bool runReverse, bool useTargetHaps, bool impMissing, int usePS,
- const std::vector < std::pair <int, int> > &conPS);
+ const std::vector < std::pair <int, int> > &conPS, bool isChrX);
void imputeMissing(const HapHedge &hapHedge, uint64 n0);
void writePhaseConfs(const std::string &tmpPhaseFile) const;
void readPhaseConfs(const std::string &tmpPhaseFile);
diff --git a/src/EagleImpMiss.cpp b/src/EagleImpMiss.cpp
index f2d42bf..69601fe 100644
--- a/src/EagleImpMiss.cpp
+++ b/src/EagleImpMiss.cpp
@@ -18,6 +18,7 @@
#include <vector>
#include <iostream>
+#include <cmath>
#include "HapHedge.hpp"
#include "MemoryUtils.hpp"
diff --git a/src/EagleMain.cpp b/src/EagleMain.cpp
index 124ec1d..d2c28b1 100644
--- a/src/EagleMain.cpp
+++ b/src/EagleMain.cpp
@@ -136,7 +136,8 @@ void phaseWithRef(EagleParams ¶ms, Timer &timer, double t0, int argc, char *
}
confs[i-Nref] = eagle.runPBWT
(i, nF1, nF2, params.Kpbwt/(iter<iters?2:1), params.expectIBDcM, params.histFactor,
- iter==iters, iter>1, !params.noImpMissing, params.usePS, conPSall[i-Nref]);
+ iter==iters, iter>1, !params.noImpMissing, params.usePS, conPSall[i-Nref],
+ params.chrom==params.chromX);
#ifdef OLD_IMP_MISSING
if (!params.noImpMissing) {
Timer tim;
@@ -214,7 +215,7 @@ int main(int argc, char *argv[]) {
cout << endl;
cout << "Copyright (C) 2015-2016 Harvard University." << endl;
- cout << "Distributed under the GNU GPLv3 open source license." << endl << endl;
+ cout << "Distributed under the GNU GPLv3+ open source license." << endl << endl;
//cout << "Boost version: " << BOOST_LIB_VERSION << endl;
//cout << endl;
@@ -274,8 +275,6 @@ int main(int argc, char *argv[]) {
params.maxMissingPerSnp, params.maxMissingPerIndiv, params.noMapCheck,
params.cMmax);
- vector <double> invLD64j = genoData.computeInvLD64j(1000);
-
if (!params.usePBWT) { // Eagle v1 algorithm
params.pbwtOnly = false; // should already be false
params.runStep2 = 1;
@@ -292,6 +291,9 @@ int main(int argc, char *argv[]) {
params.runStep2 = (genoData.getMseg64() >= 3U); // can't run Step 2 with < 3 SNP segments
}
+ vector <double> invLD64j;
+ if (!params.pbwtOnly) invLD64j = genoData.computeInvLD64j(1000);
+
Eagle eagle(genoData.getN(), genoData.getMseg64(), genoData.getGenoBits(),
genoData.getSeg64cMvecs(), genoData.getSeg64logPs(), invLD64j, genoData.getIndivs(),
genoData.getSnps(), params.maskFile, genoData.getIsFlipped64j(), params.pErr,
@@ -462,14 +464,14 @@ int main(int argc, char *argv[]) {
if (b == 1)
for (uint att = 0; att < min(9U, (uint) children.size()); att++) // run on trios
eagle.runPBWT(children[att], nF1s[att], nF2s[att], Kpbwt, params.expectIBDcM,
- params.histFactor, runReverse, true, false);
+ params.histFactor, runReverse, true, false, params.chrom==params.chromX);
uint iStart = (b-1)*N/numBatches, iEnd = b*N/numBatches;
cout << endl << "Phasing samples " << (iStart+1) << "-" << iEnd << endl;
#pragma omp parallel for reduction(+:timeImpMissing) schedule(dynamic, 4)
for (uint i = iStart; i < iEnd; i++) {
eagle.runPBWT(i, -1, -1, Kpbwt, params.expectIBDcM, params.histFactor, runReverse,
- true, true);
+ true, true, params.chrom==params.chromX);
#ifdef OLD_IMP_MISSING
Timer tim;
eagle.imputeMissing(hapHedge, i);
@@ -586,7 +588,7 @@ int main(int argc, char *argv[]) {
eagle.checkTrioErrorRate(i, nF1, nF2);
#ifdef USE_PBWT
eagle.runPBWT(i, nF1, nF2, params.Kpbwt, params.expectIBDcM, params.histFactor, true, false,
- !params.noImpMissing);
+ !params.noImpMissing, params.chrom==params.chromX);
#else
timeMN2 += params.iter == 2 ? eagle.findLongHapMatches(i, nF1, nF2, params.iter)
: eagle.runHMM(i, nF1, nF2, params.iter,
diff --git a/src/EaglePBWT.cpp b/src/EaglePBWT.cpp
index 94ffeb6..138027f 100644
--- a/src/EaglePBWT.cpp
+++ b/src/EaglePBWT.cpp
@@ -105,15 +105,16 @@ namespace EAGLE {
}
float Eagle::runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect,
- double histFactor, bool runReverse, bool useTargetHaps, bool impMissing) {
+ double histFactor, bool runReverse, bool useTargetHaps, bool impMissing,
+ bool isChrX) {
vector < pair <int, int> > noConPS;
return runPBWT(n0, nF1, nF2, Kpbwt, cMexpect, histFactor, runReverse, useTargetHaps,
- impMissing, 0, noConPS);
+ impMissing, 0, noConPS, isChrX);
}
float Eagle::runPBWT(uint64 n0, uint64 nF1, uint64 nF2, int Kpbwt, double cMexpect,
double histFactor, bool runReverse, bool useTargetHaps, bool impMissing,
- int usePS, const vector < pair <int, int> > &conPS) {
+ int usePS, const vector < pair <int, int> > &conPS, bool isChrX) {
Timer timer;
vector <uint> m64jInds(Mseg64*64+1);
@@ -212,11 +213,11 @@ namespace EAGLE {
splitGenos.push_back(0); // pad on right with 0 (to match hapBitsT)
// check for 0 or 1 het (warn)
- if (hets64j.size() <= 1) {
+ if (!isChrX && hets64j.size() <= 1) {
cerr << "WARNING: Sample " << n0-Nref+1 << " (1-indexed) has a het count of "
<< hets64j.size() << endl;
}
-
+
// compute recombination probabilities
vector <double> cMcoords(splits64j.size()+2);
for (uint64 s = 0; s <= splits64j.size(); s++) {
diff --git a/src/EagleParams.cpp b/src/EagleParams.cpp
index 548b9cb..7eb084a 100644
--- a/src/EagleParams.cpp
+++ b/src/EagleParams.cpp
@@ -81,7 +81,7 @@ namespace EAGLE {
"tabix-indexed [compressed] VCF/BCF file for reference haplotypes")
("vcfTarget", po::value<string>(&vcfTarget),
"tabix-indexed [compressed] VCF/BCF file for target genotypes")
- ("vcfOutFormat", po::value<string>(&vcfOutFormat)->default_value("z"),
+ ("vcfOutFormat", po::value<string>(&vcfOutFormat)->default_value("."),
"b|u|z|v: compressed BCF (b), uncomp BCF (u), compressed VCF (z), uncomp VCF (v)")
("noImpMissing", "disable imputation of missing ./. target genotypes")
("allowRefAltSwap", "allow swapping of REF/ALT in target vs. ref VCF")
@@ -196,6 +196,12 @@ namespace EAGLE {
return false;
}
+ if (vm.count("vcf") + vm.count("vcfRef") + vm.count("vcfTarget") == 0 &&
+ vcfOutFormat != ".") {
+ cerr << "ERROR: --vcfOutFormat can only be used with vcf input" << endl;
+ return false;
+ }
+
if (vm.count("bfile")) {
string bfile = vm["bfile"].as<string>();
famFile = bfile + ".fam";
@@ -263,7 +269,9 @@ namespace EAGLE {
if (vcfOutFormat == "b") { vcfOutSuffix = "bcf"; vcfWriteMode = "wb"; }
else if (vcfOutFormat == "u") { vcfOutSuffix = "bcf"; vcfWriteMode = "wbu"; }
else if (vcfOutFormat == "z") { vcfOutSuffix = "vcf.gz"; vcfWriteMode = "wz"; }
- else if (vcfOutFormat == "v") { vcfOutSuffix = "vcf"; vcfWriteMode = "w"; }
+ else if (vcfOutFormat == "v" || vcfOutFormat == ".") {
+ vcfOutSuffix = "vcf"; vcfWriteMode = "w";
+ }
else {
cerr << "ERROR: --vcfOutFormat must be one of {b,u,z,v}" << endl;
return false;
@@ -312,8 +320,8 @@ namespace EAGLE {
}
chrom = StringUtils::bcfNameToChrom(chromStr.c_str(), 0, chromX); // checks for range
- if (pbwtIters < 0 || pbwtIters > 3) {
- cerr << "ERROR: --pbwtIters must be either 0=auto, 1, 2, or 3" << endl;
+ if (pbwtIters < 0 || pbwtIters > 10) {
+ cerr << "ERROR: --pbwtIters must be either 0=auto or <=10" << endl;
return false;
}
diff --git a/src/FileUtils.cpp b/src/FileUtils.cpp
index 805a41e..b3bc289 100644
--- a/src/FileUtils.cpp
+++ b/src/FileUtils.cpp
@@ -151,7 +151,7 @@ namespace FileUtils {
}
AutoGzIfstream::operator bool() const {
- return boost_in;
+ return !boost_in.fail();
}
AutoGzIfstream& AutoGzIfstream::read(char *s, std::streamsize n) {
@@ -209,7 +209,7 @@ namespace FileUtils {
}
AutoGzOfstream::operator bool() const {
- return boost_out;
+ return !boost_out.fail();
}
}
diff --git a/src/GenoData.cpp b/src/GenoData.cpp
index 15f27ff..6d16d4e 100644
--- a/src/GenoData.cpp
+++ b/src/GenoData.cpp
@@ -594,7 +594,7 @@ namespace EAGLE {
}
else {
if (numSnpsFailedQC < 5)
- cout << "Filtering snp " << snpsPreQC[mbed].ID << ": "
+ cout << "Filtering snp " << snpsPreQC[m].ID << ": "
<< snpsPreQC[m].miss << " missing" << endl;
numSnpsFailedQC++;
}
@@ -680,6 +680,10 @@ namespace EAGLE {
bool noMapCheck, double cMmax) {
htsFile *fin = hts_open(vcfFile.c_str(), "r");
+ if (fin == NULL) {
+ cerr << "ERROR: Could not open " << vcfFile << " for reading" << endl;
+ exit(1);
+ }
bcf_hdr_t *hdr = bcf_hdr_read(fin);
bcf1_t *rec = bcf_init1();
int mgt = 0, *gt = NULL;
diff --git a/src/HapHedge.cpp b/src/HapHedge.cpp
index bbeb0ed..f98bf49 100644
--- a/src/HapHedge.cpp
+++ b/src/HapHedge.cpp
@@ -143,7 +143,7 @@ namespace EAGLE {
for (uint64 h = 0; h <= splits64j.size(); h++) {
// set hom
uint64 homStart = (h == 0 ? 0 : splits64j[h-1]+1);
- uint64 homStop = (h == splits64j.size() ? Mseg64*64 : splits64j[h]) - 1;
+ uint64 homStop = (h == splits64j.size() ? Mseg64*64 : std::max(splits64j[h], 1ULL)) - 1;
int numErrs = 0;
for (uint64 m64 = (homStart>>6); m64 <= (homStop>>6); m64++) {
uint64 mask = -1ULL;
diff --git a/src/MapInterpolater.cpp b/src/MapInterpolater.cpp
index e7c35a5..b3d1f93 100644
--- a/src/MapInterpolater.cpp
+++ b/src/MapInterpolater.cpp
@@ -56,8 +56,14 @@ namespace Genetics {
int chr0 = 0, bp0 = 0; double gen0 = 0;
int chr, bp; double rate, gen;
while (fin >> chr >> bp >> rate >> gen) {
- if (chr == chr0)
+ if (chr == chr0) {
+ if (gen<gen0 || bp<bp0) {
+ cerr << "ERROR: Genetic map contains out-of-order row:" << endl;
+ cerr << " " << chr << " " << bp << " " << rate << " " << gen << endl;
+ exit(1);
+ }
chrBpToRateGen[make_pair(chr, bp)] = make_pair((gen-gen0)/(1e-6*(bp-bp0)), gen);
+ }
chr0 = chr; bp0 = bp; gen0 = gen;
}
chrBpToRateGen[make_pair(chr, bp+1e9)] = make_pair(1.0, gen+1e3); // sentinel at end
diff --git a/src/SyncedVcfData.cpp b/src/SyncedVcfData.cpp
index 765a37e..4e72e3d 100644
--- a/src/SyncedVcfData.cpp
+++ b/src/SyncedVcfData.cpp
@@ -45,7 +45,7 @@ namespace EAGLE {
void process_ref_genotypes(int nsmpl, int ngt, int32_t *gt, bool allowHaploid, bool refAltSwap,
vector <bool> &hapsRef, int &numMissing, int &numUnphased, uint &w) {
numMissing = numUnphased = 0;
- if (ngt != 2*nsmpl) {
+ if (ngt != 2*nsmpl && !(ngt==nsmpl && allowHaploid)) {
cerr << "ERROR: ref ploidy != 2 (ngt != 2*nsmpl): ngt="
<< ngt << ", nsmpl=" << nsmpl << endl;
exit(1);
@@ -85,6 +85,7 @@ namespace EAGLE {
}
}
}
+ if (ploidy==1) haps[1] = haps[0];
if (missing) {
haps[0] = haps[1] = 0; // set both alleles to REF allele
numMissing++;
@@ -106,7 +107,7 @@ namespace EAGLE {
void process_target_genotypes(int nsmpl, int ngt, int32_t *gt, bool allowHaploid,
vector <uchar> &genosTarget, int &numMissing) {
numMissing = 0;
- if (ngt != 2*nsmpl) {
+ if (ngt != 2*nsmpl && !(ngt==nsmpl && allowHaploid)) {
cerr << "ERROR: target ploidy != 2 (ngt != 2*nsmpl): ngt="
<< ngt << ", nsmpl=" << nsmpl << endl;
exit(1);
@@ -153,6 +154,7 @@ namespace EAGLE {
g = 9;
numMissing++;
}
+ else if (ploidy==1) g *= 2; // encode as diploid homozygote
genosTarget.push_back(g);
}
}
@@ -171,7 +173,13 @@ namespace EAGLE {
if ( chrom!=0 )
{
kstring_t str = {0,0,0};
- ksprintf(&str,"%d:%d-%d",chrom,(uint32_t)bpStart,(uint32_t)bpEnd);
+ if ( chrom==chromX )
+ {
+ // this is not perfect, better would be to have the chr name passed to us
+ ksprintf(&str,"X:%d-%d,chrX:%d-%d",(uint32_t)bpStart,(uint32_t)bpEnd,(uint32_t)bpStart,(uint32_t)bpEnd);
+ }
+ else
+ ksprintf(&str,"%d:%d-%d",chrom,(uint32_t)bpStart,(uint32_t)bpEnd);
if ( bcf_sr_set_regions(sr, str.s, 0)!=0 )
{
cerr << "ERROR: failed to initialize the region:" << str.s;
@@ -230,7 +238,8 @@ namespace EAGLE {
cout << "Target samples: Ntarget = " << Ntarget << endl;
M = 0;
- uint MtargetOnly = 0, MrefOnly = 0, MmultiAllelic = 0, Mmonomorphic = 0;
+ uint MtargetOnly = 0, MrefOnly = 0, MmultiAllelicTgt = 0, MmonomorphicTgt = 0,
+ MmultiAllelicRef = 0, MmonomorphicRef = 0;
uint MwithMissingRef = 0, MwithUnphasedRef = 0, MnotInRegion = 0, MnotOnChrom = 0;
uint MrefAltError = 0, numRefAltSwaps = 0;
uint64 GmissingRef = 0, GunphasedRef = 0, GmissingTarget = 0;
@@ -246,11 +255,16 @@ namespace EAGLE {
bcf1_t *tgt = bcf_sr_get_line(sr, 1);
if ( !ref ) {
//fprintf(stderr, "onlyT .. %s:%d\n", bcf_seqname(tgt_hdr, tgt), tgt->pos+1);
- MtargetOnly++;
+ bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
+ if (strcmp(tgt->d.allele[1], ".") != 0) // report if polymorphic in target
+ MtargetOnly++;
continue;
}
if ( !tgt ) {
//fprintf(stderr, "onlyR .. %s:%d\n", bcf_seqname(ref_hdr, ref), ref->pos+1);
+ bcf_unpack(ref, BCF_UN_STR); // unpack thru ALT
+ if (strcmp(ref->d.allele[1], ".") != 0) // report if polymorphic in ref
+ MtargetOnly++;
MrefOnly++;
continue;
}
@@ -259,19 +273,23 @@ namespace EAGLE {
// filter out multi-allelic and monomorphic markers
int ntgt_gt = bcf_get_genotypes(tgt_hdr, tgt, &tgt_gt, &mtgt_gt);
if (tgt->n_allele > 2) {
- MmultiAllelic++;
+ MmultiAllelicTgt++;
continue;
}
if (tgt->n_allele < 2) {
- Mmonomorphic++;
+ MmonomorphicTgt++;
continue;
}
bool refAltSwap = false;
if (allowRefAltSwap) { // perform further error-checking
- if (tgt->n_allele != 2 || ref->n_allele != 2) {
- MrefAltError++;
+ if (ref->n_allele > 2) {
+ MmultiAllelicRef++;
+ continue;
+ }
+ if (ref->n_allele < 2) {
+ MmonomorphicRef++;
continue;
}
bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
@@ -375,8 +393,12 @@ namespace EAGLE {
if (MnotInRegion)
cout << " " << MnotInRegion << " SNPs not in selected region (+ flanks)"
<< endl;
- cout << " " << MmultiAllelic << " multi-allelic SNPs" << endl;
- cout << " " << Mmonomorphic << " monomorphic SNPs" << endl;
+ cout << " " << MmultiAllelicTgt << " multi-allelic SNPs in target" << endl;
+ cout << " " << MmonomorphicTgt << " monomorphic SNPs in target" << endl;
+ if (MmultiAllelicRef)
+ cout << " " << MmultiAllelicRef << " SNPs biallelic in target but multi-allelic in ref" << endl;
+ if (MmonomorphicRef)
+ cout << " " << MmonomorphicRef << " SNPs biallelic in target but monomorphic in ref" << endl;
if (MrefAltError)
cout << " " << MrefAltError << " SNPs with REF/ALT matching errors" << endl;
cout << endl;
diff --git a/src/Version.hpp b/src/Version.hpp
index d527e6f..f0d41d8 100644
--- a/src/Version.hpp
+++ b/src/Version.hpp
@@ -19,7 +19,7 @@
#ifndef VERSION_HPP
#define VERSION_HPP
-const char EAGLE_VERSION[] = "2.3";
-const char EAGLE_VERSION_DATE[] = "July 22, 2016";
+const char EAGLE_VERSION[] = "2.3.1";
+const char EAGLE_VERSION_DATE[] = "March 3, 2017";
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/eagle.git
More information about the debian-med-commit
mailing list