[med-svn] [eagle] 01/04: Imported Upstream version 2.3.4

Dylan Aïssi bob.dybian-guest at moszumanska.debian.org
Tue Jul 18 22:13:33 UTC 2017


This is an automated email from the git hooks/post-receive script.

bob.dybian-guest pushed a commit to branch master
in repository eagle.

commit 323e0f67145f52ea6f22c91f30fbe36a06cff3d2
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date:   Tue Jul 18 23:56:59 2017 +0200

    Imported Upstream version 2.3.4
---
 example/example.log          |  66 +++++++++++++--------------
 example/example_ref.log      |  10 ++---
 example/example_vcf.log      |  68 ++++++++++++++--------------
 example/phased.vcf.gz        | Bin 200384 -> 200384 bytes
 example/target.phased.vcf.gz | Bin 21053 -> 21053 bytes
 src/Eagle.cpp                | 103 +++++++++++++++++++++++--------------------
 src/EaglePBWT.cpp            |   2 +-
 src/SyncedVcfData.cpp        |   4 +-
 src/Version.hpp              |   4 +-
 9 files changed, 131 insertions(+), 126 deletions(-)

diff --git a/example/example.log b/example/example.log
index 65ea5a4..a85bbc2 100644
--- a/example/example.log
+++ b/example/example.log
@@ -1,7 +1,7 @@
                       +-----------------------------+
                       |                             |
-                      |   Eagle v2.3.2              |
-                      |   March 23, 2017            |
+                      |   Eagle v2.3.4              |
+                      |   June 5, 2017              |
                       |   Po-Ru Loh                 |
                       |                             |
                       +-----------------------------+
@@ -56,25 +56,25 @@ Setting --histFactor=1.00
 
 BEGINNING STEP 1
 
-Time for step 1: 0.325358
-Time for step 1 MN^2: 0.0202137
+Time for step 1: 0.372298
+Time for step 1 MN^2: 0.0234743
 
-Making hard calls (time: 0.0190668)
+Making hard calls (time: 0.0283618)
 
 
 BEGINNING STEP 2
 
 BATCH 1 OF 1
 Building hash tables
-.................................................................. (time: 0.161656)
+.................................................................. (time: 0.0564198)
 
 Phasing samples 1-379
-Time for phasing batch: 0.384582
+Time for phasing batch: 0.455953
 
-Making hard calls (time: 0.0192499)
+Making hard calls (time: 0.0308828)
 
-Time for step 2: 0.5655
-Time for step 2 MN^2: 0.050123
+Time for step 2: 0.543273
+Time for step 2 MN^2: 0.060238
 
 
 BEGINNING STEP 3 (PBWT ITERS)
@@ -87,22 +87,22 @@ BEGINNING PBWT ITER 1
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 2.14832
+Time for phasing batch: 1.79289
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 1.47815
+Time for phasing batch: 1.63213
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 1.60704
+Time for phasing batch: 1.72728
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 1.69579
+Time for phasing batch: 1.67067
 
 BATCH 5 OF 10
 
@@ -112,83 +112,83 @@ Time for phasing batch: 1.6183
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 1.5888
+Time for phasing batch: 1.64558
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 1.69022
+Time for phasing batch: 1.66745
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 1.39507
+Time for phasing batch: 1.5854
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 1.72671
+Time for phasing batch: 1.65665
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 1.57987
+Time for phasing batch: 1.65564
 
-Time for PBWT iter 1: 16.5283
+Time for PBWT iter 1: 16.652
 
 BEGINNING PBWT ITER 2
 
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 2.41118
+Time for phasing batch: 2.78407
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 2.39643
+Time for phasing batch: 2.63331
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 2.82551
+Time for phasing batch: 2.79219
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 2.8835
+Time for phasing batch: 2.63535
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 3.0855
+Time for phasing batch: 2.63604
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 2.37932
+Time for phasing batch: 2.73293
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 2.45946
+Time for phasing batch: 2.68814
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 2.54506
+Time for phasing batch: 2.52878
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 2.71921
+Time for phasing batch: 2.63756
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 2.58614
+Time for phasing batch: 2.67104
 
-Time for PBWT iter 2: 26.2913
+Time for PBWT iter 2: 26.7394
 Writing .haps.gz and .sample output
-Time for writing output: 0.359584
-Total elapsed time for analysis = 44.334 sec
+Time for writing output: 0.40393
+Total elapsed time for analysis = 44.9436 sec
diff --git a/example/example_ref.log b/example/example_ref.log
index ec27b0e..6eb6a7f 100644
--- a/example/example_ref.log
+++ b/example/example_ref.log
@@ -1,7 +1,7 @@
                       +-----------------------------+
                       |                             |
-                      |   Eagle v2.3.2              |
-                      |   March 23, 2017            |
+                      |   Eagle v2.3.4              |
+                      |   June 5, 2017              |
                       |   Po-Ru Loh                 |
                       |                             |
                       +-----------------------------+
@@ -56,15 +56,15 @@ PHASING ITER 1 OF 1
 
 Phasing target samples
 ................................................................................
-Time for phasing iter 1: 0.181584
+Time for phasing iter 1: 0.259136
 Writing vcf.gz output to target.phased.vcf.gz
 [W::vcf_parse] INFO 'AC' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'AN' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'DP' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'AFR_AF' is not defined in the header, assuming Type=String
 [W::vcf_parse] INFO 'EX_TARGET' is not defined in the header, assuming Type=String
-Time for writing output: 0.0341809
-Total elapsed time for analysis = 6.69493 sec
+Time for writing output: 0.0443978
+Total elapsed time for analysis = 10.1217 sec
 
 Mean phase confidence of each target individual:
 ID	PHASE_CONFIDENCE
diff --git a/example/example_vcf.log b/example/example_vcf.log
index 4affef4..a8177cf 100644
--- a/example/example_vcf.log
+++ b/example/example_vcf.log
@@ -1,7 +1,7 @@
                       +-----------------------------+
                       |                             |
-                      |   Eagle v2.3.2              |
-                      |   March 23, 2017            |
+                      |   Eagle v2.3.4              |
+                      |   June 5, 2017              |
                       |   Po-Ru Loh                 |
                       |                             |
                       +-----------------------------+
@@ -39,25 +39,25 @@ Setting --histFactor=1.00
 
 BEGINNING STEP 1
 
-Time for step 1: 0.252695
-Time for step 1 MN^2: 0.0173362
+Time for step 1: 0.382344
+Time for step 1 MN^2: 0.024531
 
-Making hard calls (time: 0.018297)
+Making hard calls (time: 0.0279789)
 
 
 BEGINNING STEP 2
 
 BATCH 1 OF 1
 Building hash tables
-.................................................................. (time: 0.0494721)
+.................................................................. (time: 0.0512731)
 
 Phasing samples 1-379
-Time for phasing batch: 0.308583
+Time for phasing batch: 0.44787
 
-Making hard calls (time: 0.019398)
+Making hard calls (time: 0.0295429)
 
-Time for step 2: 0.377476
-Time for step 2 MN^2: 0.0407339
+Time for step 2: 0.528698
+Time for step 2 MN^2: 0.0594785
 
 
 BEGINNING STEP 3 (PBWT ITERS)
@@ -70,108 +70,108 @@ BEGINNING PBWT ITER 1
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 1.59837
+Time for phasing batch: 1.78389
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 1.56651
+Time for phasing batch: 1.66108
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 1.463
+Time for phasing batch: 1.72659
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 1.61652
+Time for phasing batch: 1.6426
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 1.54799
+Time for phasing batch: 1.629
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 1.63484
+Time for phasing batch: 1.63717
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 1.78759
+Time for phasing batch: 1.66667
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 1.4086
+Time for phasing batch: 1.5801
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 1.4037
+Time for phasing batch: 1.60071
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 1.66127
+Time for phasing batch: 1.60565
 
-Time for PBWT iter 1: 15.6884
+Time for PBWT iter 1: 16.5335
 
 BEGINNING PBWT ITER 2
 
 BATCH 1 OF 10
 
 Phasing samples 1-37
-Time for phasing batch: 2.77417
+Time for phasing batch: 2.80026
 
 BATCH 2 OF 10
 
 Phasing samples 38-75
-Time for phasing batch: 2.84674
+Time for phasing batch: 2.68277
 
 BATCH 3 OF 10
 
 Phasing samples 76-113
-Time for phasing batch: 3.50577
+Time for phasing batch: 2.77932
 
 BATCH 4 OF 10
 
 Phasing samples 114-151
-Time for phasing batch: 2.62343
+Time for phasing batch: 2.58917
 
 BATCH 5 OF 10
 
 Phasing samples 152-189
-Time for phasing batch: 2.34182
+Time for phasing batch: 2.58681
 
 BATCH 6 OF 10
 
 Phasing samples 190-227
-Time for phasing batch: 2.38923
+Time for phasing batch: 2.64623
 
 BATCH 7 OF 10
 
 Phasing samples 228-265
-Time for phasing batch: 2.38542
+Time for phasing batch: 2.67757
 
 BATCH 8 OF 10
 
 Phasing samples 266-303
-Time for phasing batch: 2.40897
+Time for phasing batch: 2.53453
 
 BATCH 9 OF 10
 
 Phasing samples 304-341
-Time for phasing batch: 2.41107
+Time for phasing batch: 2.59236
 
 BATCH 10 OF 10
 
 Phasing samples 342-379
-Time for phasing batch: 2.17693
+Time for phasing batch: 2.606
 
-Time for PBWT iter 2: 25.8636
+Time for PBWT iter 2: 26.4951
 Writing vcf.gz output to phased.vcf.gz
-Time for writing output: 0.245132
-Total elapsed time for analysis = 49.358 sec
+Time for writing output: 0.362386
+Total elapsed time for analysis = 54.0173 sec
diff --git a/example/phased.vcf.gz b/example/phased.vcf.gz
index 76de287..6e80a7c 100644
Binary files a/example/phased.vcf.gz and b/example/phased.vcf.gz differ
diff --git a/example/target.phased.vcf.gz b/example/target.phased.vcf.gz
index 2ad75cb..8120c71 100644
Binary files a/example/target.phased.vcf.gz and b/example/target.phased.vcf.gz differ
diff --git a/src/Eagle.cpp b/src/Eagle.cpp
index 3fe0fef..dfe1b80 100644
--- a/src/Eagle.cpp
+++ b/src/Eagle.cpp
@@ -3263,68 +3263,73 @@ namespace EAGLE {
       int chrom = StringUtils::bcfNameToChrom(bcf_hdr_id2name(hdr, rec->rid), 1, chromX);
 
       int ntgt_gt = bcf_get_genotypes(hdr, rec, &tgt_gt, &mtgt_gt);
-      
+
       int bp = rec->pos+1;
       if (bpStart <= bp && bp <= bpEnd) { // check if within output region
-	for (int i = 0; i < (int) (N-Nref); i++) {
-	  int ploidy = 2;
-	  int *ptr = tgt_gt + i*ploidy;
-
-	  if (chrom != chromX || (bcf_gt_is_missing(ptr[0]) && mostRecentPloidy[i] == 2)
-	      || ptr[1] != bcf_int32_vector_end) { // diploid... be careful about missing '.'
-	    mostRecentPloidy[i] = 2;
-	    bool missing = false;
-	    int minIdx = 1000, maxIdx = 0;
-	    for (int j = 0; j < ploidy; j++) {
-	      if ( bcf_gt_is_missing(ptr[j]) ) { // missing allele
-		missing = true;
-	      }
-	      else {
-		int idx = bcf_gt_allele(ptr[j]); // allele index
-		minIdx = std::min(minIdx, idx);
-		maxIdx = std::max(maxIdx, idx);
+	if (bcf_hdr_nsamples(hdr) == ntgt_gt) { // site is all-haploid
+	  bcf_write(out, hdr, rec);
+	}
+	else {
+	  for (int i = 0; i < (int) (N-Nref); i++) {
+	    int ploidy = 2;
+	    int *ptr = tgt_gt + i*ploidy;
+
+	    if (chrom != chromX || (bcf_gt_is_missing(ptr[0]) && mostRecentPloidy[i] == 2)
+		|| ptr[1] != bcf_int32_vector_end) { // diploid... be careful about missing '.'
+	      mostRecentPloidy[i] = 2;
+	      bool missing = false;
+	      int minIdx = 1000, maxIdx = 0;
+	      for (int j = 0; j < ploidy; j++) {
+		if ( bcf_gt_is_missing(ptr[j]) ) { // missing allele
+		  missing = true;
+		}
+		else {
+		  int idx = bcf_gt_allele(ptr[j]); // allele index
+		  minIdx = std::min(minIdx, idx);
+		  maxIdx = std::max(maxIdx, idx);
+		}
 	      }
-	    }
 
-	    if (!missing && minIdx == maxIdx) { // hom => same allele
-	      ptr[0] = ptr[1] = bcf_gt_phased(minIdx);
-	    }
-	    else if (!missing && minIdx > 0) { // ALT1/ALT2 het => don't phase
-	      ptr[0] = ptr[1] = bcf_gt_missing;
-	    }
-	    else { // REF/ALT* het => phase as called by Eagle
-	      if (missing && noImpMissing) { // don't call alleles
+	      if (!missing && minIdx == maxIdx) { // hom => same allele
+		ptr[0] = ptr[1] = bcf_gt_phased(minIdx);
+	      }
+	      else if (!missing && minIdx > 0) { // ALT1/ALT2 het => don't phase
 		ptr[0] = ptr[1] = bcf_gt_missing;
 	      }
-	      else {
-		for (int j = 0; j < ploidy; j++) {
-		  uint64 nTargetHap = 2*i + j;
-		  int altIdx = missing ? 1 : maxIdx;
-		  int hapBit = (tmpHaploBitsT[nTargetHap*Mseg64+(m64j/64)]>>(m64j&63))&1;
-		  if (isFlipped64j[m64j]) hapBit = !hapBit;
-		  int idx = hapBit ? altIdx : 0;
-		  ptr[j] = bcf_gt_phased(idx); // convert allele index to bcf value (phased)
+	      else { // REF/ALT* het => phase as called by Eagle
+		if (missing && noImpMissing) { // don't call alleles
+		  ptr[0] = ptr[1] = bcf_gt_missing;
+		}
+		else {
+		  for (int j = 0; j < ploidy; j++) {
+		    uint64 nTargetHap = 2*i + j;
+		    int altIdx = missing ? 1 : maxIdx;
+		    int hapBit = (tmpHaploBitsT[nTargetHap*Mseg64+(m64j/64)]>>(m64j&63))&1;
+		    if (isFlipped64j[m64j]) hapBit = !hapBit;
+		    int idx = hapBit ? altIdx : 0;
+		    ptr[j] = bcf_gt_phased(idx); // convert allele index to bcf value (phased)
+		  }
 		}
 	      }
 	    }
-	  }
-	  else { // haploid
-	    mostRecentPloidy[i] = 1;
-	    if ( bcf_gt_is_missing(ptr[0]) && !noImpMissing ) { // missing allele
-	      int j = 0;
-	      uint64 nTargetHap = 2*i + j;
-	      int altIdx = 1;
-	      int hapBit = (tmpHaploBitsT[nTargetHap*Mseg64+(m64j/64)]>>(m64j&63))&1;
-	      if (isFlipped64j[m64j]) hapBit = !hapBit;
-	      int idx = hapBit ? altIdx : 0;
-	      ptr[j] = bcf_gt_phased(idx); // convert allele index to bcf value (phased)
+	    else { // haploid
+	      mostRecentPloidy[i] = 1;
+	      if ( bcf_gt_is_missing(ptr[0]) && !noImpMissing ) { // missing allele
+		int j = 0;
+		uint64 nTargetHap = 2*i + j;
+		int altIdx = 1;
+		int hapBit = (tmpHaploBitsT[nTargetHap*Mseg64+(m64j/64)]>>(m64j&63))&1;
+		if (isFlipped64j[m64j]) hapBit = !hapBit;
+		int idx = hapBit ? altIdx : 0;
+		ptr[j] = bcf_gt_phased(idx); // convert allele index to bcf value (phased)
+	      }
 	    }
 	  }
-	}
 
-	bcf_update_genotypes(hdr, rec, tgt_gt, ntgt_gt);
+	  bcf_update_genotypes(hdr, rec, tgt_gt, ntgt_gt);
 
-	bcf_write(out, hdr, rec);
+	  bcf_write(out, hdr, rec);
+	}
       }
 
       m64j++;
diff --git a/src/EaglePBWT.cpp b/src/EaglePBWT.cpp
index 138027f..9d20c8a 100644
--- a/src/EaglePBWT.cpp
+++ b/src/EaglePBWT.cpp
@@ -619,7 +619,7 @@ namespace EAGLE {
 	    int minErr = 1<<30;
 	    vector <double> cMdiffs; vector <uint64> revStarts;
 
-	    for (uint64 m64j = m64jPrev; m64j < m64jNext; m64j++) {
+	    for (uint64 m64j = m64jPrev; m64j == m64jPrev || m64j < m64jNext; m64j++) {
 	      if (m64j != m64jPrev) { // update err counts
 		if ((genos64j[m64j] == 0 || genos64j[m64j] == 2) &&
 		    (((haploBitsT[refSeqFwd*Mseg64+m64j/64]>>(m64j&63))&1) != genos64j[m64j]/2))
diff --git a/src/SyncedVcfData.cpp b/src/SyncedVcfData.cpp
index 0acc785..d8b0b2b 100644
--- a/src/SyncedVcfData.cpp
+++ b/src/SyncedVcfData.cpp
@@ -247,7 +247,7 @@ namespace EAGLE {
 	if ( !ref ) {
 	  //fprintf(stderr, "onlyT .. %s:%d\n", bcf_seqname(tgt_hdr, tgt), tgt->pos+1);
 	  bcf_unpack(tgt, BCF_UN_STR); // unpack thru ALT
-	  if (strcmp(tgt->d.allele[1], ".") != 0) // report if polymorphic in target
+	  if ( tgt->n_allele>1 && strcmp(tgt->d.allele[1], ".") != 0) // report if polymorphic in target
 	    MtargetOnly++;
 	  if (outputUnphased) { bcf_write(out, tgt_hdr, tgt); isTmpPhased.push_back(false); }
 	  continue;
@@ -255,7 +255,7 @@ namespace EAGLE {
 	if ( !tgt ) {
 	  //fprintf(stderr, "onlyR .. %s:%d\n", bcf_seqname(ref_hdr, ref), ref->pos+1);
 	  bcf_unpack(ref, BCF_UN_STR); // unpack thru ALT
-	  if (strcmp(ref->d.allele[1], ".") != 0) // report if polymorphic in ref
+	  if ( ref->n_allele>1 && strcmp(ref->d.allele[1], ".") != 0) // report if polymorphic in ref
 	    MrefOnly++;
 	  continue;
 	}
diff --git a/src/Version.hpp b/src/Version.hpp
index 050a7fe..4518d27 100644
--- a/src/Version.hpp
+++ b/src/Version.hpp
@@ -19,7 +19,7 @@
 #ifndef VERSION_HPP
 #define VERSION_HPP
 
-const char EAGLE_VERSION[] = "2.3.2";
-const char EAGLE_VERSION_DATE[] = "March 23, 2017";
+const char EAGLE_VERSION[] = "2.3.4";
+const char EAGLE_VERSION_DATE[] = "June 5, 2017";
 
 #endif

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/eagle.git



More information about the debian-med-commit mailing list