[med-svn] [bwa] 02/04: Imported Upstream version 0.7.7
Andreas Tille
tille at debian.org
Tue Mar 18 12:46:58 UTC 2014
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository bwa.
commit b93d335740137413db21e8136ce17f6c1ec9c0bc
Author: Andreas Tille <tille at debian.org>
Date: Tue Mar 18 13:47:03 2014 +0100
Imported Upstream version 0.7.7
---
NEWS | 17 +++++++++++++++++
README.md | 3 ++-
bwa.1 | 2 +-
bwa.c | 19 ++++++++++++-------
bwamem.c | 4 ++++
fastmap.c | 4 +++-
main.c | 22 +++++++++++++++++++++-
7 files changed, 60 insertions(+), 11 deletions(-)
diff --git a/NEWS b/NEWS
index eb9c37a..a7c64ed 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,20 @@
+Release 0.7.7 (25 Feburary, 2014)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+This release fixes incorrect MD tags in the BWA-MEM output.
+
+A note about short-read mapping to GRCh38. The new human reference genome
+GRCh38 contains 60Mbp program generated alpha repeat arrays, some of which are
+hard masked as they cannot be localized. These highly repetitive arrays make
+BWA-MEM ~50% slower. If you are concerned with the performance of BWA-MEM, you
+may consider to use option "-c2000 -m50". On simulated data, this setting helps
+the performance at a very minor cost on accuracy. I may consider to change the
+default in future releases.
+
+(0.7.7: 25 Feburary 2014, r441)
+
+
+
Release 0.7.6 (31 Januaray, 2014)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/README.md b/README.md
index 009a4ca..ac1e57e 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ different sub-commands: **aln/samse/sampe** for BWA-backtrack,
###Availability
BWA is released under [GPLv3][1]. The latest souce code is [freely
-available][2] at github. Released packages can [be downloaded ][3] at
+available][2] at github. Released packages can [be downloaded][3] at
SourceForge. After you acquire the source code, simply use `make` to compile
and copy the single executable `bwa` to the destination you want. The only
dependency of BWA is [zlib][14].
@@ -73,3 +73,4 @@ do not have plan to submit it to a peer-reviewed journal in the near future.
[12]: http://arxiv.org/abs/1303.3997
[13]: http://arxiv.org/
[14]: http://zlib.net/
+[15]: https://github.com/lh3/bwa/tree/mem
diff --git a/bwa.1 b/bwa.1
index 5949a1b..601a529 100644
--- a/bwa.1
+++ b/bwa.1
@@ -1,4 +1,4 @@
-.TH bwa 1 "31 January 2014" "bwa-0.7.6" "Bioinformatics tools"
+.TH bwa 1 "25 Feburary 2014" "bwa-0.7.7" "Bioinformatics tools"
.SH NAME
.PP
bwa - Burrows-Wheeler Alignment Tool
diff --git a/bwa.c b/bwa.c
index aec04d8..140d57e 100644
--- a/bwa.c
+++ b/bwa.c
@@ -93,6 +93,7 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
int i;
int64_t rlen;
kstring_t str;
+ const char *int2base;
*n_cigar = 0; *NM = -1;
if (l_query <= 0 || rb >= re || (rb < l_pac && re > l_pac)) return 0; // reject if negative length or bridging the forward and reverse strand
@@ -124,9 +125,10 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
// NW alignment
*score = ksw_global(l_query, query, rlen, rseq, 5, mat, q, r, w, n_cigar, &cigar);
}
- {// compute NM
+ {// compute NM and MD
int k, x, y, u, n_mm = 0, n_gap = 0;
str.l = str.m = *n_cigar * 4; str.s = (char*)cigar; // append MD to CIGAR
+ int2base = rb < l_pac? "ACGTN" : "TGCAN";
for (k = 0, x = y = u = 0; k < *n_cigar; ++k) {
int op, len;
cigar = (uint32_t*)str.s;
@@ -134,17 +136,20 @@ uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pa
if (op == 0) { // match
for (i = 0; i < len; ++i) {
if (query[x + i] != rseq[y + i]) {
- kputw(u, &str); kputc("ACGTN"[rseq[y+i]], &str);
+ kputw(u, &str);
+ kputc(int2base[rseq[y+i]], &str);
++n_mm; u = 0;
} else ++u;
}
x += len; y += len;
} else if (op == 2) { // deletion
- kputw(u, &str); kputc('^', &str);
- for (i = 0; i < len; ++i)
- kputc("ACGTN"[rseq[y+i]], &str);
- u = 0;
- y += len, n_gap += len;
+ if (k > 0 && k < *n_cigar - 1) { // don't do the following if D is the first or the last CIGAR
+ kputw(u, &str); kputc('^', &str);
+ for (i = 0; i < len; ++i)
+ kputc(int2base[rseq[y+i]], &str);
+ u = 0; n_gap += len;
+ }
+ y += len;
} else if (op == 1) x += len, n_gap += len; // insertion
}
kputw(u, &str); kputc(0, &str);
diff --git a/bwamem.c b/bwamem.c
index 6f77064..19ca561 100644
--- a/bwamem.c
+++ b/bwamem.c
@@ -1024,7 +1024,9 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
worker_t w;
mem_alnreg_v *regs;
mem_pestat_t pes[4];
+ double ctime, rtime;
+ ctime = cputime(); rtime = realtime();
regs = malloc(n * sizeof(mem_alnreg_v));
w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
w.seqs = seqs; w.regs = regs; w.n_processed = n_processed;
@@ -1036,4 +1038,6 @@ void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
}
kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
free(regs);
+ if (bwa_verbose >= 3)
+ fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
}
diff --git a/fastmap.c b/fastmap.c
index 40cea8c..72d850c 100644
--- a/fastmap.c
+++ b/fastmap.c
@@ -30,7 +30,7 @@ int main_mem(int argc, char *argv[])
int64_t n_processed = 0;
opt = mem_opt_init();
- while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:")) >= 0) {
+ while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) {
if (c == 'k') opt->min_seed_len = atoi(optarg);
else if (c == 'w') opt->w = atoi(optarg);
else if (c == 'A') opt->a = atoi(optarg);
@@ -50,6 +50,7 @@ int main_mem(int argc, char *argv[])
else if (c == 'v') bwa_verbose = atoi(optarg);
else if (c == 'r') opt->split_factor = atof(optarg);
else if (c == 'D') opt->chain_drop_ratio = atof(optarg);
+ else if (c == 'm') opt->max_matesw = atoi(optarg);
else if (c == 'C') copy_comment = 1;
else if (c == 'Q') {
opt->mapQ_coef_len = atoi(optarg);
@@ -77,6 +78,7 @@ int main_mem(int argc, char *argv[])
// fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->chain_drop_ratio);
+ fprintf(stderr, " -m INT perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw);
fprintf(stderr, " -S skip mate rescue\n");
fprintf(stderr, " -P skip pairing; mate rescue performed unless -S also in use\n");
fprintf(stderr, " -A INT score for a sequence match [%d]\n", opt->a);
diff --git a/main.c b/main.c
index f872917..a8df9c0 100644
--- a/main.c
+++ b/main.c
@@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.7.6a-r433"
+#define PACKAGE_VERSION "0.7.7-r441"
#endif
int bwa_fa2pac(int argc, char *argv[]);
@@ -14,9 +14,17 @@ int bwa_bwt2sa(int argc, char *argv[]);
int bwa_index(int argc, char *argv[]);
int bwt_bwtgen_main(int argc, char *argv[]);
+int bwa_aln(int argc, char *argv[]);
+int bwa_sai2sam_se(int argc, char *argv[]);
+int bwa_sai2sam_pe(int argc, char *argv[]);
+
+int bwa_bwtsw2(int argc, char *argv[]);
+
int main_fastmap(int argc, char *argv[]);
int main_mem(int argc, char *argv[]);
+int main_pemerge(int argc, char *argv[]);
+
char *bwa_pg;
static int usage()
@@ -29,6 +37,11 @@ static int usage()
fprintf(stderr, "Command: index index sequences in the FASTA format\n");
fprintf(stderr, " mem BWA-MEM algorithm\n");
fprintf(stderr, " fastmap identify super-maximal exact matches\n");
+ fprintf(stderr, " pemerge merge overlapping paired ends (EXPERIMENTAL)\n");
+ fprintf(stderr, " aln gapped/ungapped alignment\n");
+ fprintf(stderr, " samse generate alignment (single ended)\n");
+ fprintf(stderr, " sampe generate alignment (paired ended)\n");
+ fprintf(stderr, " bwasw BWA-SW for long queries\n");
fprintf(stderr, "\n");
fprintf(stderr, " fa2pac convert FASTA to PAC format\n");
fprintf(stderr, " pac2bwt generate BWT from PAC\n");
@@ -60,8 +73,15 @@ int main(int argc, char *argv[])
else if (strcmp(argv[1], "bwtupdate") == 0) ret = bwa_bwtupdate(argc-1, argv+1);
else if (strcmp(argv[1], "bwt2sa") == 0) ret = bwa_bwt2sa(argc-1, argv+1);
else if (strcmp(argv[1], "index") == 0) ret = bwa_index(argc-1, argv+1);
+ else if (strcmp(argv[1], "aln") == 0) ret = bwa_aln(argc-1, argv+1);
+ else if (strcmp(argv[1], "samse") == 0) ret = bwa_sai2sam_se(argc-1, argv+1);
+ else if (strcmp(argv[1], "sampe") == 0) ret = bwa_sai2sam_pe(argc-1, argv+1);
+ else if (strcmp(argv[1], "bwtsw2") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+ else if (strcmp(argv[1], "dbwtsw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
+ else if (strcmp(argv[1], "bwasw") == 0) ret = bwa_bwtsw2(argc-1, argv+1);
else if (strcmp(argv[1], "fastmap") == 0) ret = main_fastmap(argc-1, argv+1);
else if (strcmp(argv[1], "mem") == 0) ret = main_mem(argc-1, argv+1);
+ else if (strcmp(argv[1], "pemerge") == 0) ret = main_pemerge(argc-1, argv+1);
else {
fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
return 1;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/bwa.git
More information about the debian-med-commit
mailing list