[med-svn] [eagle] 01/02: Imported Upstream version 2.3.2
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Thu Apr 13 21:48:02 UTC 2017
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository eagle.
commit 4625e661ec9b9feef169bc1d2f04f659f2149e18
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Thu Apr 13 23:47:05 2017 +0200
Imported Upstream version 2.3.2
---
example/example.log | 68 ++++++++++++++++++++---------------------
example/example_ref.log | 16 +++++-----
example/example_vcf.log | 70 +++++++++++++++++++++----------------------
example/phased.vcf.gz | Bin 200385 -> 200384 bytes
example/target.phased.vcf.gz | Bin 21050 -> 21053 bytes
src/Eagle.cpp | 22 ++++++++++++--
src/Eagle.hpp | 6 ++--
src/EagleMain.cpp | 5 ++--
src/EagleParams.cpp | 8 +++--
src/EagleParams.hpp | 1 +
src/Makefile | 4 +--
src/SyncedVcfData.cpp | 63 ++++++++++++++++++++------------------
src/SyncedVcfData.hpp | 6 ++--
src/Version.hpp | 4 +--
14 files changed, 149 insertions(+), 124 deletions(-)
diff --git a/example/example.log b/example/example.log
index bd2d755..65ea5a4 100644
--- a/example/example.log
+++ b/example/example.log
@@ -1,7 +1,7 @@
+-----------------------------+
| |
- | Eagle v2.3.1 |
- | March 3, 2017 |
+ | Eagle v2.3.2 |
+ | March 23, 2017 |
| Po-Ru Loh |
| |
+-----------------------------+
@@ -56,25 +56,25 @@ Setting --histFactor=1.00
BEGINNING STEP 1
-Time for step 1: 0.380579
-Time for step 1 MN^2: 0.024638
+Time for step 1: 0.325358
+Time for step 1 MN^2: 0.0202137
-Making hard calls (time: 0.0283101)
+Making hard calls (time: 0.0190668)
BEGINNING STEP 2
BATCH 1 OF 1
Building hash tables
-.................................................................. (time: 0.0659139)
+.................................................................. (time: 0.161656)
Phasing samples 1-379
-Time for phasing batch: 0.466222
+Time for phasing batch: 0.384582
-Making hard calls (time: 0.029881)
+Making hard calls (time: 0.0192499)
-Time for step 2: 0.562044
-Time for step 2 MN^2: 0.0596148
+Time for step 2: 0.5655
+Time for step 2 MN^2: 0.050123
BEGINNING STEP 3 (PBWT ITERS)
@@ -87,108 +87,108 @@ BEGINNING PBWT ITER 1
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 1.84304
+Time for phasing batch: 2.14832
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 1.67975
+Time for phasing batch: 1.47815
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 1.79137
+Time for phasing batch: 1.60704
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 1.71214
+Time for phasing batch: 1.69579
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 1.62954
+Time for phasing batch: 1.6183
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 1.67016
+Time for phasing batch: 1.5888
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 1.66139
+Time for phasing batch: 1.69022
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 1.6077
+Time for phasing batch: 1.39507
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 1.63602
+Time for phasing batch: 1.72671
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 1.69574
+Time for phasing batch: 1.57987
-Time for PBWT iter 1: 16.9269
+Time for PBWT iter 1: 16.5283
BEGINNING PBWT ITER 2
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 2.81822
+Time for phasing batch: 2.41118
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 2.59811
+Time for phasing batch: 2.39643
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 2.79582
+Time for phasing batch: 2.82551
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 2.55964
+Time for phasing batch: 2.8835
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 2.70656
+Time for phasing batch: 3.0855
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 2.66884
+Time for phasing batch: 2.37932
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 2.66954
+Time for phasing batch: 2.45946
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 2.51312
+Time for phasing batch: 2.54506
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 2.58987
+Time for phasing batch: 2.71921
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 2.608
+Time for phasing batch: 2.58614
-Time for PBWT iter 2: 26.5278
+Time for PBWT iter 2: 26.2913
Writing .haps.gz and .sample output
-Time for writing output: 0.354415
-Total elapsed time for analysis = 44.9045 sec
+Time for writing output: 0.359584
+Total elapsed time for analysis = 44.334 sec
diff --git a/example/example_ref.log b/example/example_ref.log
index f53b54d..ec27b0e 100644
--- a/example/example_ref.log
+++ b/example/example_ref.log
@@ -1,7 +1,7 @@
+-----------------------------+
| |
- | Eagle v2.3.1 |
- | March 3, 2017 |
+ | Eagle v2.3.2 |
+ | March 23, 2017 |
| Po-Ru Loh |
| |
+-----------------------------+
@@ -29,11 +29,9 @@ Target samples: Ntarget = 8
[W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
SNPs to analyze: M = 430 SNPs in both target and reference
-SNPs ignored: 215 SNPs in target but not reference
- --> WARNING: Check REF/ALT agreement between target and ref <--
+SNPs ignored: 0 SNPs in target but not reference
215 SNPs in reference but not target
0 multi-allelic SNPs in target
- 0 monomorphic SNPs in target
Missing rate in target genotypes: 0.00116279
@@ -58,15 +56,15 @@ PHASING ITER 1 OF 1
Phasing target samples
................................................................................
-Time for phasing iter 1: 0.236171
-Writing vcf output to target.phased.vcf
+Time for phasing iter 1: 0.181584
+Writing vcf.gz output to target.phased.vcf.gz
[W::vcf_parse] INFO 'AC' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'AN' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'DP' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'AFR_AF' is not defined in the header, assuming Type=String
[W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
-Time for writing output: 0.051106
-Total elapsed time for analysis = 9.22206 sec
+Time for writing output: 0.0341809
+Total elapsed time for analysis = 6.69493 sec
Mean phase confidence of each target individual:
ID PHASE_CONFIDENCE
diff --git a/example/example_vcf.log b/example/example_vcf.log
index 6932e15..4affef4 100644
--- a/example/example_vcf.log
+++ b/example/example_vcf.log
@@ -1,7 +1,7 @@
+-----------------------------+
| |
- | Eagle v2.3.1 |
- | March 3, 2017 |
+ | Eagle v2.3.2 |
+ | March 23, 2017 |
| Po-Ru Loh |
| |
+-----------------------------+
@@ -39,25 +39,25 @@ Setting --histFactor=1.00
BEGINNING STEP 1
-Time for step 1: 0.383094
-Time for step 1 MN^2: 0.024787
+Time for step 1: 0.252695
+Time for step 1 MN^2: 0.0173362
-Making hard calls (time: 0.0283229)
+Making hard calls (time: 0.018297)
BEGINNING STEP 2
BATCH 1 OF 1
Building hash tables
-.................................................................. (time: 0.0526478)
+.................................................................. (time: 0.0494721)
Phasing samples 1-379
-Time for phasing batch: 0.469969
+Time for phasing batch: 0.308583
-Making hard calls (time: 0.029753)
+Making hard calls (time: 0.019398)
-Time for step 2: 0.552398
-Time for step 2 MN^2: 0.0597794
+Time for step 2: 0.377476
+Time for step 2 MN^2: 0.0407339
BEGINNING STEP 3 (PBWT ITERS)
@@ -70,108 +70,108 @@ BEGINNING PBWT ITER 1
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 1.77488
+Time for phasing batch: 1.59837
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 1.69304
+Time for phasing batch: 1.56651
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 1.74526
+Time for phasing batch: 1.463
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 1.64103
+Time for phasing batch: 1.61652
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 1.66061
+Time for phasing batch: 1.54799
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 1.66104
+Time for phasing batch: 1.63484
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 1.6719
+Time for phasing batch: 1.78759
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 1.58273
+Time for phasing batch: 1.4086
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 1.63833
+Time for phasing batch: 1.4037
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 1.6582
+Time for phasing batch: 1.66127
-Time for PBWT iter 1: 16.727
+Time for PBWT iter 1: 15.6884
BEGINNING PBWT ITER 2
BATCH 1 OF 10
Phasing samples 1-37
-Time for phasing batch: 2.75391
+Time for phasing batch: 2.77417
BATCH 2 OF 10
Phasing samples 38-75
-Time for phasing batch: 2.66628
+Time for phasing batch: 2.84674
BATCH 3 OF 10
Phasing samples 76-113
-Time for phasing batch: 2.74627
+Time for phasing batch: 3.50577
BATCH 4 OF 10
Phasing samples 114-151
-Time for phasing batch: 2.55569
+Time for phasing batch: 2.62343
BATCH 5 OF 10
Phasing samples 152-189
-Time for phasing batch: 2.56301
+Time for phasing batch: 2.34182
BATCH 6 OF 10
Phasing samples 190-227
-Time for phasing batch: 2.63885
+Time for phasing batch: 2.38923
BATCH 7 OF 10
Phasing samples 228-265
-Time for phasing batch: 2.61097
+Time for phasing batch: 2.38542
BATCH 8 OF 10
Phasing samples 266-303
-Time for phasing batch: 2.51258
+Time for phasing batch: 2.40897
BATCH 9 OF 10
Phasing samples 304-341
-Time for phasing batch: 2.57989
+Time for phasing batch: 2.41107
BATCH 10 OF 10
Phasing samples 342-379
-Time for phasing batch: 2.57872
+Time for phasing batch: 2.17693
-Time for PBWT iter 2: 26.2062
-Writing vcf output to phased.vcf
-Time for writing output: 0.168121
-Total elapsed time for analysis = 53.6103 sec
+Time for PBWT iter 2: 25.8636
+Writing vcf.gz output to phased.vcf.gz
+Time for writing output: 0.245132
+Total elapsed time for analysis = 49.358 sec
diff --git a/example/phased.vcf.gz b/example/phased.vcf.gz
index 962c54b..76de287 100644
Binary files a/example/phased.vcf.gz and b/example/phased.vcf.gz differ
diff --git a/example/target.phased.vcf.gz b/example/target.phased.vcf.gz
index f74a9f3..2ad75cb 100644
Binary files a/example/target.phased.vcf.gz and b/example/target.phased.vcf.gz differ
diff --git a/src/Eagle.cpp b/src/Eagle.cpp
index 4860b60..3fe0fef 100644
--- a/src/Eagle.cpp
+++ b/src/Eagle.cpp
@@ -3216,9 +3216,9 @@ namespace EAGLE {
bcf_hdr_sync(hdr);
}
- void Eagle::writeVcf(const string &tmpFile, const string &outFile, int chromX, double bpStart,
- double bpEnd, const string &writeMode, bool noImpMissing, int argc,
- char **argv) const {
+ void Eagle::writeVcf(const string &tmpFile, const vector <bool> &isTmpPhased,
+ const string &outFile, int chromX, double bpStart, double bpEnd,
+ const string &writeMode, bool noImpMissing, int argc, char **argv) const {
htsFile *htsTmp = hts_open(tmpFile.c_str(), "r");
if (htsTmp == NULL) {
@@ -3242,7 +3242,23 @@ namespace EAGLE {
vector <int> mostRecentPloidy(N-Nref, 2);
+ int tmpLineNum = -1; // index in tmp file (corresponding to isTmpPhased)
while (bcf_read(htsTmp, hdr, rec) >= 0) {
+ tmpLineNum++;
+ if (!isTmpPhased[tmpLineNum]) { // site was not phased; remove phase information and output
+ int bp = rec->pos+1;
+ if (bpStart <= bp && bp <= bpEnd) { // check if within output region
+ int ntgt_gt = bcf_get_genotypes(hdr, rec, &tgt_gt, &mtgt_gt);
+ for (int k = 0; k < ntgt_gt; k++)
+ if (tgt_gt[k] != bcf_int32_vector_end && !bcf_gt_is_missing(tgt_gt[k])) {
+ int idx = bcf_gt_allele(tgt_gt[k]); // allele index
+ tgt_gt[k] = bcf_gt_unphased(idx); // convert allele index to bcf value (unphased)
+ }
+ bcf_update_genotypes(hdr, rec, tgt_gt, ntgt_gt);
+ bcf_write(out, hdr, rec);
+ }
+ continue;
+ }
int chrom = StringUtils::bcfNameToChrom(bcf_hdr_id2name(hdr, rec->rid), 1, chromX);
diff --git a/src/Eagle.hpp b/src/Eagle.hpp
index dfe7bb8..5c43e0f 100644
--- a/src/Eagle.hpp
+++ b/src/Eagle.hpp
@@ -185,9 +185,9 @@ namespace EAGLE {
void outputSE(const std::vector <uint> &children, const std::vector <uint> &nF1s,
const std::vector <uint> &nF2s, int step) const;
void writeHapsGzSample(const std::string &prefix) const;
- void writeVcf(const std::string &tmpFile, const std::string &outFile, int chromX,
- double bpStart, double bpEnd, const std::string &writeMode, bool noImpMissing,
- int argc, char**argv) const;
+ void writeVcf(const std::string &tmpFile, const std::vector <bool> &isTmpPhased,
+ const std::string &outFile, int chromX, double bpStart, double bpEnd,
+ const std::string &writeMode, bool noImpMissing, int argc, char**argv) const;
void writeVcfNonRef(const std::string &vcfFile, const std::string &outFile, int inputChrom,
int chromX, double bpStart, double bpEnd, const std::string &writeMode,
int argc, char **argv) const;
diff --git a/src/EagleMain.cpp b/src/EagleMain.cpp
index d2c28b1..c8a2617 100644
--- a/src/EagleMain.cpp
+++ b/src/EagleMain.cpp
@@ -66,11 +66,12 @@ void phaseWithRef(EagleParams ¶ms, Timer &timer, double t0, int argc, char *
string tmpFile = params.outPrefix + ".unphased." + params.vcfOutSuffix;
string outFile = params.outPrefix + "." + params.vcfOutSuffix;
vector < vector < pair <int, int> > > conPSall; double snpRate;
+ vector <bool> isTmpPhased;
SyncedVcfData vcfData(params.vcfRef, params.vcfTarget, params.allowRefAltSwap, params.chrom,
params.chromX, params.bpStart-params.bpFlanking,
params.bpEnd+params.bpFlanking, params.geneticMapFile,
params.cMmax==0 ? 1 : params.cMmax, tmpFile, params.vcfWriteMode,
- params.usePS, conPSall, snpRate);
+ params.usePS, conPSall, snpRate, params.outputUnphased, isTmpPhased);
Eagle eagle(vcfData.getNref(), vcfData.getNtarget(), vcfData.getMseg64(),
vcfData.getGenoBits(), vcfData.getSeg64cMvecs(), params.pErr);
@@ -185,7 +186,7 @@ void phaseWithRef(EagleParams ¶ms, Timer &timer, double t0, int argc, char *
timer.update_time();
cout << "Writing " << params.vcfOutSuffix << " output to " << outFile << endl;
- eagle.writeVcf(tmpFile, outFile, params.chromX, params.bpStart, params.bpEnd,
+ eagle.writeVcf(tmpFile, isTmpPhased, outFile, params.chromX, params.bpStart, params.bpEnd,
params.vcfWriteMode, params.noImpMissing, argc, argv);
cout << "Time for writing output: " << timer.update_time() << endl;
diff --git a/src/EagleParams.cpp b/src/EagleParams.cpp
index 7eb084a..2509c23 100644
--- a/src/EagleParams.cpp
+++ b/src/EagleParams.cpp
@@ -85,6 +85,7 @@ namespace EAGLE {
"b|u|z|v: compressed BCF (b), uncomp BCF (u), compressed VCF (z), uncomp VCF (v)")
("noImpMissing", "disable imputation of missing ./. target genotypes")
("allowRefAltSwap", "allow swapping of REF/ALT in target vs. ref VCF")
+ ("outputUnphased", "output unphased sites (target-only, multi-allelic, etc.)")
;
po::options_description bothModes("Region selection options");
@@ -227,6 +228,7 @@ namespace EAGLE {
noMapCheck = vm.count("noMapCheck");
noImpMissing = vm.count("noImpMissing");
allowRefAltSwap = vm.count("allowRefAltSwap");
+ outputUnphased = vm.count("outputUnphased");
if (vm.count("vcfRef") || vm.count("vcfTarget") || vm.count("vcf")) { // VCF mode
if (vm.count("vcf")) { // non-ref mode
@@ -268,10 +270,10 @@ namespace EAGLE {
}
if (vcfOutFormat == "b") { vcfOutSuffix = "bcf"; vcfWriteMode = "wb"; }
else if (vcfOutFormat == "u") { vcfOutSuffix = "bcf"; vcfWriteMode = "wbu"; }
- else if (vcfOutFormat == "z") { vcfOutSuffix = "vcf.gz"; vcfWriteMode = "wz"; }
- else if (vcfOutFormat == "v" || vcfOutFormat == ".") {
- vcfOutSuffix = "vcf"; vcfWriteMode = "w";
+ else if (vcfOutFormat == "z" || vcfOutFormat == ".") {
+ vcfOutSuffix = "vcf.gz"; vcfWriteMode = "wz";
}
+ else if (vcfOutFormat == "v") { vcfOutSuffix = "vcf"; vcfWriteMode = "w"; }
else {
cerr << "ERROR: --vcfOutFormat must be one of {b,u,z,v}" << endl;
return false;
diff --git a/src/EagleParams.hpp b/src/EagleParams.hpp
index a0b6eb2..9470418 100644
--- a/src/EagleParams.hpp
+++ b/src/EagleParams.hpp
@@ -44,6 +44,7 @@ namespace EAGLE {
std::string vcfOutFormat, vcfOutSuffix, vcfWriteMode;
// outFormat b|u|z|v -> outSuffix bcf|bcf|vcf.gz|vcf, writeMode wb|wbu|wz|w
bool noImpMissing;
+ bool outputUnphased; // output unphased sites (target-only, multi-allelic, etc.)
int usePS; // use FORMAT:PS phase constraints: 1=soft, 2=harder
bool allowRefAltSwap; // in reference-based phasing mode
diff --git a/src/Makefile b/src/Makefile
index e17235d..d7731c0 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -7,8 +7,8 @@
LBLAS = -lopenblas # alternatively -llapack (just need sgemm_)
BLAS_DIR = /opt/openblas/0.2.14/lib
BOOST_INSTALL_DIR = /home/pl88/boost_1_58_0/install
-HTSLIB_DIR = /groups/price/poru/external_software/htslib/htslib-1.3
-
+HTSLIB_DIR = /groups/price/poru/external_software/htslib/htslib-1.4
+### NOTE: to reduce dependencies from htslib, configure htslib with: ./configure --disable-bz2 --disable-lzma --disable-plugins --disable-libcurl --disable-gcs --disable-s3
### these paths are used only for static linking
ZLIB_STATIC_DIR = /opt/zlib-1.2.8/lib # probably unnecessary on most systems
diff --git a/src/SyncedVcfData.cpp b/src/SyncedVcfData.cpp
index 4e72e3d..0acc785 100644
--- a/src/SyncedVcfData.cpp
+++ b/src/SyncedVcfData.cpp
@@ -163,12 +163,14 @@ namespace EAGLE {
(const string &vcfRef, const string &vcfTarget, bool allowRefAltSwap, int chrom, int chromX,
double bpStart, double bpEnd, vector <bool> &hapsRef, vector <uchar> &genosTarget,
const string &tmpFile, const string &writeMode, int usePS,
- vector < vector < pair <int, int> > > &conPSall) {
+ vector < vector < pair <int, int> > > &conPSall, bool outputUnphased,
+ vector <bool> &isTmpPhased) {
vector < pair <int, int> > chrBps;
bcf_srs_t *sr = bcf_sr_init();
- sr->require_index = 1;
+ bcf_sr_set_opt(sr, BCF_SR_PAIR_LOGIC, BCF_SR_PAIR_BOTH_REF);
+ bcf_sr_set_opt(sr, BCF_SR_REQUIRE_IDX);
if ( chrom!=0 )
{
@@ -188,17 +190,6 @@ namespace EAGLE {
free(str.s);
}
- // By default, the synced reader requires that CHR, POS and ALT are the same
- // in both files. If this is too strict and SNP/indel/all lines with the same
- // position should be considered as matching, uncomment:
- //
- // sr->collapse = COLLAPSE_SNPS|COLLAPSE_INDELS;
- //
- // See also examples in bcftools/vcfisec etc.
-
- if (allowRefAltSwap)
- sr->collapse = COLLAPSE_SNPS;
-
if (!bcf_sr_add_reader(sr, vcfRef.c_str())) {
cerr << "ERROR: Could not open " << vcfRef << " for reading: " << bcf_sr_strerror(sr->errnum)
<< endl;
@@ -238,7 +229,7 @@ namespace EAGLE {
cout << "Target samples: Ntarget = " << Ntarget << endl;
M = 0;
- uint MtargetOnly = 0, MrefOnly = 0, MmultiAllelicTgt = 0, MmonomorphicTgt = 0,
+ uint MtargetOnly = 0, MrefOnly = 0, MmultiAllelicTgt = 0,
MmultiAllelicRef = 0, MmonomorphicRef = 0;
uint MwithMissingRef = 0, MwithUnphasedRef = 0, MnotInRegion = 0, MnotOnChrom = 0;
uint MrefAltError = 0, numRefAltSwaps = 0;
@@ -258,14 +249,14 @@ namespace EAGLE {
bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
if (strcmp(tgt->d.allele[1], ".") != 0) // report if polymorphic in target
MtargetOnly++;
+ if (outputUnphased) { bcf_write(out, tgt_hdr, tgt); isTmpPhased.push_back(false); }
continue;
}
if ( !tgt ) {
//fprintf(stderr, "onlyR .. %s:%d\n", bcf_seqname(ref_hdr, ref), ref->pos+1);
bcf_unpack(ref, BCF_UN_STR); // unpack thru ALT
if (strcmp(ref->d.allele[1], ".") != 0) // report if polymorphic in ref
- MtargetOnly++;
- MrefOnly++;
+ MrefOnly++;
continue;
}
//fprintf(stderr, "match .. %s:%d\n", bcf_seqname(ref_hdr, ref), ref->pos+1);
@@ -274,26 +265,31 @@ namespace EAGLE {
int ntgt_gt = bcf_get_genotypes(tgt_hdr, tgt, &tgt_gt, &mtgt_gt);
if (tgt->n_allele > 2) {
MmultiAllelicTgt++;
+ if (outputUnphased) { bcf_write(out, tgt_hdr, tgt); isTmpPhased.push_back(false); }
continue;
}
- if (tgt->n_allele < 2) {
- MmonomorphicTgt++;
- continue;
- }
+ if (ref->n_allele==1)
+ {
+ // drop monomorphic reference markers
+ MmonomorphicRef++;
+ if (outputUnphased) { bcf_write(out, tgt_hdr, tgt); isTmpPhased.push_back(false); }
+ continue;
+ }
+
+ // preserve monomorphic markers if biallelic in the reference panel
+ if (tgt->n_allele < 2)
+ {
+ bcf_update_alleles(tgt_hdr, tgt, (const char**)ref->d.allele,ref->n_allele);
+ }
bool refAltSwap = false;
if (allowRefAltSwap) { // perform further error-checking
if (ref->n_allele > 2) {
MmultiAllelicRef++;
+ if (outputUnphased) { bcf_write(out, tgt_hdr, tgt); isTmpPhased.push_back(false); }
continue;
}
- if (ref->n_allele < 2) {
- MmonomorphicRef++;
- continue;
- }
- bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
- bcf_unpack(ref, BCF_UN_STR); // unpack thru ALT
/*
printf("tgt REF=%s, ALT=%s ref REF=%s, ALT=%s\n", tgt->d.allele[0], tgt->d.allele[1],
ref->d.allele[0], ref->d.allele[1]);
@@ -309,6 +305,7 @@ namespace EAGLE {
}
else {
MrefAltError++;
+ if (outputUnphased) { bcf_write(out, tgt_hdr, tgt); isTmpPhased.push_back(false); }
continue;
}
}
@@ -366,6 +363,7 @@ namespace EAGLE {
// print the record
bcf_write(out, tgt_hdr, tgt);
+ isTmpPhased.push_back(true);
}
bcf_sr_destroy(sr);
@@ -394,7 +392,6 @@ namespace EAGLE {
cout << " " << MnotInRegion << " SNPs not in selected region (+ flanks)"
<< endl;
cout << " " << MmultiAllelicTgt << " multi-allelic SNPs in target" << endl;
- cout << " " << MmonomorphicTgt << " monomorphic SNPs in target" << endl;
if (MmultiAllelicRef)
cout << " " << MmultiAllelicRef << " SNPs biallelic in target but multi-allelic in ref" << endl;
if (MmonomorphicRef)
@@ -420,6 +417,13 @@ namespace EAGLE {
cout << "Missing rate in target genotypes: " << GmissingTarget / (double) M / Ntarget << endl;
cout << endl;
+ int numTmpUnphased = 0;
+ for (uint j = 0; j < isTmpPhased.size(); j++)
+ numTmpUnphased += !isTmpPhased[j];
+ if (numTmpUnphased)
+ cout << "SNPs excluded from phasing that will be kept in output (unphased): "
+ << numTmpUnphased << endl << endl;
+
if (M <= 1U) {
cerr << endl << "ERROR: Target and ref have too few matching SNPs (M = " << M << ")" << endl;
exit(1);
@@ -493,14 +497,15 @@ namespace EAGLE {
int chrom, int chromX, double bpStart, double bpEnd,
const string &geneticMapFile, double cMmax, const string &tmpFile,
const string &writeMode, int usePS,
- vector < vector < pair <int, int> > > &conPSall, double &snpRate) {
+ vector < vector < pair <int, int> > > &conPSall, double &snpRate,
+ bool outputUnphased, vector <bool> &isTmpPhased) {
// perform synced read
vector <bool> hapsRef; // M*2*Nref
vector <uchar> genosTarget; // M*Ntarget
vector < pair <int, int> > chrBps =
processVcfs(vcfRef, vcfTarget, allowRefAltSwap, chrom, chromX, bpStart, bpEnd, hapsRef,
- genosTarget, tmpFile, writeMode, usePS, conPSall);
+ genosTarget, tmpFile, writeMode, usePS, conPSall, outputUnphased, isTmpPhased);
// interpolate genetic coordinates
vector <double> cMs = processMap(chrBps, geneticMapFile);
diff --git a/src/SyncedVcfData.hpp b/src/SyncedVcfData.hpp
index 99dcd73..9d4543a 100644
--- a/src/SyncedVcfData.hpp
+++ b/src/SyncedVcfData.hpp
@@ -43,7 +43,8 @@ namespace EAGLE {
(const std::string &vcfRef, const std::string &vcfTarget, bool allowRefAltSwap, int chrom,
int chromX, double bpStart, double bpEnd, std::vector <bool> &hapsRef,
std::vector <uchar> &genosTarget, const std::string &tmpFile, const std::string &writeMode,
- int usePS, std::vector < std::vector < std::pair <int, int> > > &conPSall);
+ int usePS, std::vector < std::vector < std::pair <int, int> > > &conPSall,
+ bool outputUnphased, std::vector <bool> &isTmpPhased);
std::vector <double> processMap(std::vector < std::pair <int, int> > &chrBps,
const std::string &geneticMapFile);
void buildGenoBits(const std::vector <bool> &hapsRef, const std::vector <uchar> &genosTarget,
@@ -59,7 +60,8 @@ namespace EAGLE {
int chrom, int chromX, double bpStart, double bpEnd,
const std::string &geneticMapFile, double cMmax, const std::string &tmpFile,
const std::string &writeMode, int usePS,
- std::vector < std::vector < std::pair <int, int> > > &conPSall, double &snpRate);
+ std::vector < std::vector < std::pair <int, int> > > &conPSall, double &snpRate,
+ bool outputUnphased, std::vector <bool> &isTmpPhased);
~SyncedVcfData();
diff --git a/src/Version.hpp b/src/Version.hpp
index f0d41d8..050a7fe 100644
--- a/src/Version.hpp
+++ b/src/Version.hpp
@@ -19,7 +19,7 @@
#ifndef VERSION_HPP
#define VERSION_HPP
-const char EAGLE_VERSION[] = "2.3.1";
-const char EAGLE_VERSION_DATE[] = "March 3, 2017";
+const char EAGLE_VERSION[] = "2.3.2";
+const char EAGLE_VERSION_DATE[] = "March 23, 2017";
#endif
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/eagle.git
More information about the debian-med-commit
mailing list