[med-svn] samtools 08/26: mpileup: Allow up to five alleles (N, A, C, G, T)

Charles Plessy plessy at moszumanska.debian.org
Tue Dec 10 10:06:25 UTC 2013


This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch debian/unstable
in repository samtools.

commit bb313d1aa2309cd734a805817453f655ff6abe35
Author: Petr Danecek <pd3 at sanger.ac.uk>
Date:   Tue Nov 26 13:38:43 2013 +0000

    mpileup: Allow up to five alleles (N,A,C,G,T)
---
 bam2bcf.c | 44 ++++++++++++++++++++++----------------------
 bam2bcf.h |  2 +-
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/bam2bcf.c b/bam2bcf.c
index 4b1ed29..70f7311 100644
--- a/bam2bcf.c
+++ b/bam2bcf.c
@@ -138,7 +138,7 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
 		mapQ = mapQ < bca->capQ? mapQ : bca->capQ;
 		if (q > mapQ) q = mapQ;
 		if (q > 63) q = 63;
-		if (q < 4) q = 4;
+		if (q < 4) q = 4;       // MQ=0 reads count as BQ=4
 		if (!is_indel) {
 			b = bam_seqi(bam_get_seq(p->b), p->qpos); // base
 			b = bam_nt16_nt4_table[b? b : ref_base]; // b is the 2-bit base
@@ -474,47 +474,47 @@ void calc_SegBias(const bcf_callret1_t *bcr, bcf_call_t *call)
  */
 int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int ref_base /*4-bit*/, bcf_call_t *call)
 {
-	int ref4, i, j, qsum[4];
+	int ref4, i, j;
+    unsigned int qsum[5] = {0,0,0,0,0};
 	int64_t tmp;
 	if (ref_base >= 0) {
 		call->ori_ref = ref4 = bam_nt16_nt4_table[ref_base];
 		if (ref4 > 4) ref4 = 4;
 	} else call->ori_ref = -1, ref4 = 0;
-	// calculate qsum
-	// this is done by calculating combined qsum across all samples
-	memset(qsum, 0, 4 * sizeof(int));
+	// calculate qsum, this is done by calculating combined qsum across all samples
 	for (i = 0; i < n; ++i)
 		for (j = 0; j < 4; ++j)
 			qsum[j] += calls[i].qsum[j];
 	// then encoding the base in the first two bits
     int qsum_tot=0;
     for (j=0; j<4; j++) { qsum_tot += qsum[j]; call->qsum[j] = 0; }
-	for (j = 0; j < 4; ++j) qsum[j] = qsum[j] << 2 | j;
+	for (j=0; j<4; j++) 
+    { 
+        assert( !(qsum[j]>>(sizeof(unsigned int)*8-2)) );   // if qsum is too big, insert sort will break
+        qsum[j] = qsum[j] << 2 | j; 
+    }
 	// find the top 2 alleles
-	for (i = 1; i < 4; ++i) // insertion sort
+	for (i = 1; i < 4; ++i) // insertion sort, ascending
 		for (j = i; j > 0 && qsum[j] < qsum[j-1]; --j)
 			tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
 
-
 	// Set the reference allele and alternative allele(s)
 
-	// Clear the allele list
+	// Set the alleles and QS values
 	for (i = 0; i < 5; ++i) call->a[i] = -1;
+	for (i = 0; i < 5; ++i) call->qsum[i] = 0;
 	call->unseen = -1;
 	call->a[0] = ref4;
-
-    // Set values for the QS tag
-	for (i = 3, j = 1; i >= 0; --i) {
-		if ((qsum[i]&3) != ref4) {
-			if (qsum[i]>>2 != 0) 
-            {
-                if ( j<4 ) call->qsum[j] = (float)(qsum[i]>>2)/qsum_tot; // ref N can make j>=4
-                call->a[j++]  = qsum[i]&3;
-            }
-			else break;
+	for (i = 3, j = 1; i >= 0; --i) // i: alleles sorted by QS; j, a[j]: output allele ordering
+    {
+		if ((qsum[i]&3) == ref4) 
+            call->qsum[0] = qsum_tot ? (float)(qsum[i]>>2)/qsum_tot : 0;    // REF's qsum
+        else
+        {
+			if ( !(qsum[i]>>2) ) break; // qsum is 0, this allele is not seen in the pileup
+            call->qsum[j] = (float)(qsum[i]>>2)/qsum_tot;
+            call->a[j++]  = qsum[i]&3;
 		}
-        else 
-            call->qsum[0] = qsum_tot ? (float)(qsum[i]>>2)/qsum_tot : 0;
 	}
 	if (ref_base >= 0) { // for SNPs, find the "unseen" base
 		if (((ref4 < 4 && j < 4) || (ref4 == 4 && j < 5)) && i >= 0)
@@ -665,7 +665,7 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
     for (i=0; i<16; i++) tmpf[i] = bc->anno[i];
     bcf_update_info_float(hdr, rec, "I16", tmpf, 16);
 
-    for (i=3; i>0; i--)
+    for (i=4; i>0; i--)
         if ( bc->qsum[i]!=0 ) break;
     bcf_update_info_float(hdr, rec, "QS", bc->qsum, i+1);
 
diff --git a/bam2bcf.h b/bam2bcf.h
index bd7d04e..c33970b 100644
--- a/bam2bcf.h
+++ b/bam2bcf.h
@@ -52,7 +52,7 @@ typedef struct {
     int tid, pos;
     bcf_hdr_t *bcf_hdr;
 	int a[5]; // alleles: ref, alt, alt2, alt3
-    float qsum[4];  // for the QS tag, scaled to 1
+    float qsum[5];  // for the QS tag, scaled to 1
 	int n, n_alleles, shift, ori_ref, unseen;
 	int n_supp; // number of supporting non-reference reads
 	double anno[16];

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/samtools.git



More information about the debian-med-commit mailing list