[med-svn] [bwa] 02/04: Imported Upstream version 0.7.7

Andreas Tille tille at debian.org
Tue Mar 18 12:46:58 UTC 2014


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository bwa.

commit b93d335740137413db21e8136ce17f6c1ec9c0bc
Author: Andreas Tille <tille at debian.org>
Date:   Tue Mar 18 13:47:03 2014 +0100

    Imported Upstream version 0.7.7
---
 NEWS      | 17 +++++++++++++++++
 README.md |  3 ++-
 bwa.1     |  2 +-
 bwa.c     | 19 ++++++++++++-------
 bwamem.c  |  4 ++++
 fastmap.c |  4 +++-
 main.c    | 22 +++++++++++++++++++++-
 7 files changed, 60 insertions(+), 11 deletions(-)

diff --git a/NEWS b/NEWS
index eb9c37a..a7c64ed 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,20 @@
+Release 0.7.7 (25 Feburary, 2014)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This release fixes incorrect MD tags in the BWA-MEM output.
+
+A note about short-read mapping to GRCh38. The new human reference genome
+GRCh38 contains 60Mbp program generated alpha repeat arrays, some of which are
+hard masked as they cannot be localized. These highly repetitive arrays make
+BWA-MEM ~50% slower. If you are concerned with the performance of BWA-MEM, you
+may consider to use option "-c2000 -m50". On simulated data, this setting helps
+the performance at a very minor cost on accuracy. I may consider to change the
+default in future releases.
+
+(0.7.7: 25 Feburary 2014, r441)
+
+
+
 Release 0.7.6 (31 Januaray, 2014)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/README.md b/README.md
index 009a4ca..ac1e57e 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ different sub-commands: **aln/samse/sampe** for BWA-backtrack,
 ###Availability
 
 BWA is released under [GPLv3][1]. The latest souce code is [freely
-available][2] at github. Released packages can [be downloaded ][3] at
+available][2] at github. Released packages can [be downloaded][3] at
 SourceForge. After you acquire the source code, simply use `make` to compile
 and copy the single executable `bwa` to the destination you want. The only
 dependency of BWA is [zlib][14].
@@ -73,3 +73,4 @@ do not have plan to submit it to a peer-reviewed journal in the near future.
 [12]: http://arxiv.org/abs/1303.3997
 [13]: http://arxiv.org/
 [14]: http://zlib.net/
+[15]: https://github.com/lh3/bwa/tree/mem
diff --git a/bwa.1 b/bwa.1
index 5949a1b..601a529 100644
--- a/bwa.1
+++ b/bwa.1
@@ -1,4 +1,4 @@
-.TH bwa 1 "31 January 2014" "bwa-0.7.6" "Bioinformatics tools"
+.TH bwa 1 "25 Feburary 2014" "bwa-0.7.7" "Bioinformatics tools"
 .SH NAME
 .PP
 bwa - Burrows-Wheeler Alignment Tool
diff --git a/bwa.c b/bwa.c
index aec04d8..140d57e 100644
--- a/bwa.c
+++ b/bwa.c
@@ -93,6 +93,7 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
 	int i;
 	int64_t rlen;
 	kstring_t str;
+	const char *int2base;
 
 	*n_cigar = 0; *NM = -1;
 	if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand
@@ -124,9 +125,10 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
 		// NW alignment
 		*score = ksw_global(l_query, query, rlen, rseq, 5, mat, q, r, w, n_cigar, &cigar);
 	}
-	{// compute NM
+	{// compute NM and MD
 		int k, x, y, u, n_mm = 0, n_gap = 0;
 		str.l = str.m = *n_cigar * 4; str.s = (char*)cigar; // append MD to CIGAR
+		int2base = rb < l_pac? "ACGTN" : "TGCAN";
 		for (k = 0, x = y = u = 0; k < *n_cigar; ++k) {
 			int op, len;
 			cigar = (uint32_t*)str.s;
@@ -134,17 +136,20 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
 			if (op == 0) { // match
 				for (i = 0; i < len; ++i) {
 					if (query[x + i] != rseq[y + i]) {
-						kputw(u, &str); kputc("ACGTN"[rseq[y+i]], &str);
+						kputw(u, &str);
+						kputc(int2base[rseq[y+i]], &str);
 						++n_mm; u = 0;
 					} else ++u;
 				}
 				x += len; y += len;
 			} else if (op == 2) { // deletion
-				kputw(u, &str); kputc('^', &str);
-				for (i = 0; i < len; ++i)
-					kputc("ACGTN"[rseq[y+i]], &str);
-				u = 0;
-				y += len, n_gap += len;
+				if (k > 0 && k < *n_cigar - 1) { // don't do the following if D is the first or the last CIGAR
+					kputw(u, &str); kputc('^', &str);
+					for (i = 0; i < len; ++i)
+						kputc(int2base[rseq[y+i]], &str);
+					u = 0; n_gap += len;
+				}
+				y += len;
 			} else if (op == 1) x += len, n_gap += len; // insertion
 		}
 		kputw(u, &str); kputc(0, &str);
diff --git a/bwamem.c b/bwamem.c
index 6f77064..19ca561 100644
--- a/bwamem.c
+++ b/bwamem.c
@@ -1024,7 +1024,9 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
 	worker_t w;
 	mem_alnreg_v *regs;
 	mem_pestat_t pes[4];
+	double ctime, rtime;
 
+	ctime = cputime(); rtime = realtime();
 	regs = malloc(n * sizeof(mem_alnreg_v));
 	w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
 	w.seqs = seqs; w.regs = regs; w.n_processed = n_processed;
@@ -1036,4 +1038,6 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
 	}
 	kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
 	free(regs);
+	if (bwa_verbose >= 3)
+		fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
 }
diff --git a/fastmap.c b/fastmap.c
index 40cea8c..72d850c 100644
--- a/fastmap.c
+++ b/fastmap.c
@@ -30,7 +30,7 @@ int main_mem(int argc, char *argv[])
 	int64_t n_processed = 0;
 
 	opt = mem_opt_init();
-	while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:")) >= 0) {
+	while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) {
 		if (c == 'k') opt->min_seed_len = atoi(optarg);
 		else if (c == 'w') opt->w = atoi(optarg);
 		else if (c == 'A') opt->a = atoi(optarg);
@@ -50,6 +50,7 @@ int main_mem(int argc, char *argv[])
 		else if (c == 'v') bwa_verbose = atoi(optarg);
 		else if (c == 'r') opt->split_factor = atof(optarg);
 		else if (c == 'D') opt->chain_drop_ratio = atof(optarg);
+		else if (c == 'm') opt->max_matesw = atoi(optarg);
 		else if (c == 'C') copy_comment = 1;
 		else if (c == 'Q') {
 			opt->mapQ_coef_len = atoi(optarg);
@@ -77,6 +78,7 @@ int main_mem(int argc, char *argv[])
 //		fprintf(stderr, "       -s INT     look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
 		fprintf(stderr, "       -c INT     skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
 		fprintf(stderr, "       -D FLOAT   drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->chain_drop_ratio);
+		fprintf(stderr, "       -m INT     perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw);
 		fprintf(stderr, "       -S         skip mate rescue\n");
 		fprintf(stderr, "       -P         skip pairing; mate rescue performed unless -S also in use\n");
 		fprintf(stderr, "       -A INT     score for a sequence match [%d]\n", opt->a);
diff --git a/main.c b/main.c
index f872917..a8df9c0 100644
--- a/main.c
+++ b/main.c
@@ -4,7 +4,7 @@
 #include "utils.h"
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.7.6a-r433"
+#define PACKAGE_VERSION "0.7.7-r441"
 #endif
 
 int bwa_fa2pac(int argc, char *argv[]);
@@ -14,9 +14,17 @@ int bwa_bwt2sa(int argc, char *argv[]);
 int bwa_index(int argc, char *argv[]);
 int bwt_bwtgen_main(int argc, char *argv[]);
 
+int bwa_aln(int argc, char *argv[]);
+int bwa_sai2sam_se(int argc, char *argv[]);
+int bwa_sai2sam_pe(int argc, char *argv[]);
+
+int bwa_bwtsw2(int argc, char *argv[]);
+
 int main_fastmap(int argc, char *argv[]);
 int main_mem(int argc, char *argv[]);
 
+int main_pemerge(int argc, char *argv[]);
+	
 char *bwa_pg;
 
 static int usage()
@@ -29,6 +37,11 @@ static int usage()
 	fprintf(stderr, "Command: index         index sequences in the FASTA format\n");
 	fprintf(stderr, "         mem           BWA-MEM algorithm\n");
 	fprintf(stderr, "         fastmap       identify super-maximal exact matches\n");
+	fprintf(stderr, "         pemerge       merge overlapping paired ends (EXPERIMENTAL)\n");
+	fprintf(stderr, "         aln           gapped/ungapped alignment\n");
+	fprintf(stderr, "         samse         generate alignment (single ended)\n");
+	fprintf(stderr, "         sampe         generate alignment (paired ended)\n");
+	fprintf(stderr, "         bwasw         BWA-SW for long queries\n");
 	fprintf(stderr, "\n");
 	fprintf(stderr, "         fa2pac        convert FASTA to PAC format\n");
 	fprintf(stderr, "         pac2bwt       generate BWT from PAC\n");
@@ -60,8 +73,15 @@ int main(int argc, char *argv[])
 	else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1);
 	else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1);
 	else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1);
+	else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1);
+	else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1);
+	else if (strcmp(argv[1], "sampe") == 0) ret = bwa_sai2sam_pe(argc-1, argv+1);
+	else if (strcmp(argv[1], "bwtsw2") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+	else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+	else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
 	else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1);
 	else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1);
+	else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1);
 	else {
 		fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
 		return 1;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bwa.git



More information about the debian-med-commit mailing list