[med-svn] [libssw] 01/03: Imported Upstream version 1.1
Sascha Steinbiss
satta at debian.org
Mon Aug 22 18:25:51 UTC 2016
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository libssw.
commit f863fae12dca70c124d2011a075e8d8b9cd3609a
Author: Sascha Steinbiss <satta at debian.org>
Date: Mon Aug 22 18:03:48 2016 +0000
Imported Upstream version 1.1
---
README.md | 5 ++-
src/Makefile | 2 +-
src/main.c | 9 ++--
src/ssw.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
src/ssw.h | 56 ++++++++++++------------
src/ssw_cpp.cpp | 1 -
6 files changed, 163 insertions(+), 42 deletions(-)
diff --git a/README.md b/README.md
index 83e67b7..3534f6a 100644
--- a/README.md
+++ b/README.md
@@ -22,11 +22,14 @@ Wan-Ping Lee <wanping.lee at gmail.com>
Yongan Zhao <zhaoyanswill at gmail.com>
Daniel Cameron <cameron.d at wehi.edu.au>
-Last revision: 04/05/2016
+Last revision: 06/29/2016
##Overview
SSW is a fast implementation of the Smith-Waterman algorithm, which uses the Single-Instruction Multiple-Data (SIMD) instructions to parallelize the algorithm at the instruction level. SSW library provides an API that can be flexibly used by programs written in C, C++ and other languages. We also provide a software that can do protein and genome alignment directly. Current version of our implementation is ~50 times faster than an ordinary Smith-Waterman. It can return the Smith-Waterman [...]
+The Debian package of this library can be achieved here:
+https://tracker.debian.org/pkg/libssw
+
Note: When SSW open a gap, the gap open penalty alone is applied.
## C/C++ interface
diff --git a/src/Makefile b/src/Makefile
index f726378..f4e763b 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,6 @@
CC = gcc
CXX = g++
-CFLAGS := -Wall -pipe -O3
+CFLAGS := -Wall -pipe -O2
CXXFLAGS := $(CFLAGS)
LOBJS = ssw.o
LCPPOBJS = ssw_cpp.o
diff --git a/src/main.c b/src/main.c
index 2ed7cc1..b03c219 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,7 +1,7 @@
/* main.c
* Created by Mengyao Zhao on 06/23/11.
- * Version 0.1.5
- * Last revision by Mengyao Zhao on 06/24/16.
+ * Version 1.0
+ * Last revision by Mengyao Zhao on 07/19/16.
*/
#include <stdlib.h>
@@ -56,7 +56,7 @@ static void reverse_comple(const char* seq, char* rc) {
if (start == end) rc[start] = (char)rc_table[(int8_t)seq[start]];
}
-static void ssw_write (const s_align* a,
+static void ssw_write (s_align* a,
const kseq_t* ref_seq,
const kseq_t* read,
const char* read_seq, // strand == 0: original read; strand == 1: reverse complement read
@@ -64,6 +64,7 @@ static void ssw_write (const s_align* a,
int8_t strand, // 0: forward aligned ; 1: reverse complement aligned
int8_t sam) { // 0: Blast like output; 1: Sam format output
+ int32_t mismatch;
if (sam == 0) { // Blast like output
fprintf(stdout, "target_name: %s\nquery_name: %s\noptimal_alignment_score: %d\t", ref_seq->name.s, read->name.s, a->score1);
if (a->score2 > 0) fprintf(stdout, "suboptimal_alignment_score: %d\t", a->score2);
@@ -161,11 +162,13 @@ end:
if (strand) fprintf(stdout, "16\t");
else fprintf(stdout, "0\t");
fprintf(stdout, "%s\t%d\t%d\t", ref_seq->name.s, a->ref_begin1 + 1, mapq);
+ mismatch = mark_mismatch(a->ref_begin1, a->read_begin1, a->read_end1, ref_seq->seq.s, read_seq, read->seq.l, &a->cigar, &a->cigarLen);
for (c = 0; c < a->cigarLen; ++c) {
char letter = cigar_int_to_op(a->cigar[c]);
uint32_t length = cigar_int_to_len(a->cigar[c]);
fprintf(stdout, "%lu%c", (unsigned long)length, letter);
}
+ // fprintf(stderr, "%s\tmismatch: %d\n", read->name.s, mismatch);
fprintf(stdout, "\t*\t0\t0\t");
for (c = a->read_begin1; c <= a->read_end1; ++c) fprintf(stdout, "%c", read_seq[c]);
fprintf(stdout, "\t");
diff --git a/src/ssw.c b/src/ssw.c
index 825c39b..b1fa2d1 100644
--- a/src/ssw.c
+++ b/src/ssw.c
@@ -31,7 +31,7 @@
* Created by Mengyao Zhao on 6/22/10.
* Copyright 2010 Boston College. All rights reserved.
* Version 0.1.4
- * Last revision by Mengyao Zhao on 02/11/16.
+ * Last revision by Mengyao Zhao on 07/19/16.
*
*/
@@ -529,6 +529,37 @@ end:
return bests;
}
+/*! @function Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length
+ @param length length of CIGAR
+ @param op_letter CIGAR operation character ('M', 'I', etc)
+ @return 32-bit unsigned integer, representing encoded CIGAR operation and length
+*/
+uint32_t to_cigar_int (uint32_t length, char op_letter)
+{
+ switch (op_letter) {
+ case 'M': /* alignment match (can be a sequence match or mismatch */
+ default:
+ return length << BAM_CIGAR_SHIFT;
+ case 'S': /* soft clipping (clipped sequences present in SEQ) */
+ return (length << BAM_CIGAR_SHIFT) | (4u);
+ case 'D': /* deletion from the reference */
+ return (length << BAM_CIGAR_SHIFT) | (2u);
+ case 'I': /* insertion to the reference */
+ return (length << BAM_CIGAR_SHIFT) | (1u);
+ case 'H': /* hard clipping (clipped sequences NOT present in SEQ) */
+ return (length << BAM_CIGAR_SHIFT) | (5u);
+ case 'N': /* skipped region from the reference */
+ return (length << BAM_CIGAR_SHIFT) | (3u);
+ case 'P': /* padding (silent deletion from padded reference) */
+ return (length << BAM_CIGAR_SHIFT) | (6u);
+ case '=': /* sequence match */
+ return (length << BAM_CIGAR_SHIFT) | (7u);
+ case 'X': /* sequence mismatch */
+ return (length << BAM_CIGAR_SHIFT) | (8u);
+ }
+ return (uint32_t)-1; // This never happens
+}
+
static cigar* banded_sw (const int8_t* ref,
const int8_t* read,
int32_t refLen,
@@ -855,13 +886,98 @@ void align_destroy (s_align* a) {
free(a->cigar);
free(a);
}
-/*
-inline char cigar_int_to_op(uint32_t cigar_int) {
- return UNLIKELY((cigar_int & 0xfU) > 8) ? 'M': MAPSTR[cigar_int & 0xfU];
+
+uint32_t* add_cigar (uint32_t* new_cigar, int32_t* p, int32_t* s, uint32_t length, char op) {
+ if ((*p) >= (*s)) {
+ ++(*s);
+ kroundup32(*s);
+ new_cigar = (uint32_t*)realloc(new_cigar, (*s)*sizeof(uint32_t));
+ }
+ new_cigar[(*p) ++] = to_cigar_int(length, op);
+ return new_cigar;
}
+uint32_t* store_previous_m (int8_t choice, // 0: current not M, 1: current match, 2: current mismatch
+ uint32_t* length_m,
+ uint32_t* length_x,
+ int32_t* p,
+ int32_t* s,
+ uint32_t* new_cigar) {
+
+ if ((*length_m) && (choice == 2 || !choice)) {
+ new_cigar = add_cigar (new_cigar, p, s, (*length_m), '=');
+ (*length_m) = 0;
+ } else if ((*length_x) && (choice == 1 || !choice)) {
+ new_cigar = add_cigar (new_cigar, p, s, (*length_x), 'X');
+ (*length_x) = 0;
+ }
+ return new_cigar;
+}
+
+/*! @function:
+ 1. Calculate the number of mismatches.
+ 2. Modify the cigar string:
+ differentiate matches (=) and mismatches(X); add softclip(S) at the beginning and ending of the original cigar.
+ @return:
+ The number of mismatches.
+ The cigar and cigarLen are modified.
+*/
+int32_t mark_mismatch (int32_t ref_begin1,
+ int32_t read_begin1,
+ int32_t read_end1,
+ const char* ref,
+ const char* read,
+ int32_t readLen,
+ uint32_t** cigar,
+ int32_t* cigarLen) {
+
+ int32_t mismatch_length = 0, p = 0, i, length, j, s = *cigarLen + 2;
+ uint32_t *new_cigar = (uint32_t*)malloc(s*sizeof(uint32_t)), length_m = 0, length_x = 0;
+ char op;
+
+ ref += ref_begin1;
+ read += read_begin1;
+ if (read_begin1 > 0) new_cigar[p ++] = to_cigar_int(read_begin1, 'S');
+ for (i = 0; i < (*cigarLen); ++i) {
+ op = cigar_int_to_op((*cigar)[i]);
+ length = cigar_int_to_len((*cigar)[i]);
+ if (op == 'M') {
+ for (j = 0; j < length; ++j) {
+ fprintf(stderr, "ref[%d]: %c\tread[%d]: %c\n", j, *ref, j, *read);
+ if (*ref != *read) {
+ ++ mismatch_length;
+ fprintf(stderr, "length_m: %d\n", length_m);
+ // the previous is match; however the current one is mismatche
+ new_cigar = store_previous_m (2, &length_m, &length_x, &p, &s, new_cigar);
+ ++ length_x;
+ } else {
+ // the previous is mismatch; however the current one is matche
+ new_cigar = store_previous_m (1, &length_m, &length_x, &p, &s, new_cigar);
+ ++ length_m;
+ }
+ ++ ref;
+ ++ read;
+ }
+ }else if (op == 'I') {
+ read += length;
+ mismatch_length += length;
+ new_cigar = store_previous_m (0, &length_m, &length_x, &p, &s, new_cigar);
+ new_cigar = add_cigar (new_cigar, &p, &s, length, 'I');
+ }else if (op == 'D') {
+ ref += length;
+ mismatch_length += length;
+ new_cigar = store_previous_m (0, &length_m, &length_x, &p, &s, new_cigar);
+ new_cigar = add_cigar (new_cigar, &p, &s, length, 'D');
+ }
+ }
+ new_cigar = store_previous_m (0, &length_m, &length_x, &p, &s, new_cigar);
+
+ length = readLen - read_end1 - 1;
+ if (length > 0) new_cigar = add_cigar(new_cigar, &p, &s, length, 'S');
+
+ (*cigarLen) = p;
+ free(*cigar);
+ (*cigar) = new_cigar;
+ return mismatch_length;
+}
-inline uint32_t cigar_int_to_len (uint32_t cigar_int)
-{
- return cigar_int >> BAM_CIGAR_SHIFT;
-}*/
diff --git a/src/ssw.h b/src/ssw.h
index 685ecf3..43653e6 100644
--- a/src/ssw.h
+++ b/src/ssw.h
@@ -4,7 +4,7 @@
* Created by Mengyao Zhao on 6/22/10.
* Copyright 2010 Boston College. All rights reserved.
* Version 0.1.4
- * Last revision by Mengyao Zhao on 02/11/16.
+ * Last revision by Mengyao Zhao on 07/19/16.
*
*/
@@ -22,7 +22,7 @@ extern "C" {
#define MAPSTR "MIDNSHP=X"
#ifndef BAM_CIGAR_SHIFT
-#define BAM_CIGAR_SHIFT 4
+#define BAM_CIGAR_SHIFT 4u
#endif
@@ -129,37 +129,37 @@ s_align* ssw_align (const s_profile* prof,
*/
void align_destroy (s_align* a);
+/*! @function:
+ 1. Calculate the number of mismatches.
+ 2. Modify the cigar string:
+ differentiate matches (=), mismatches(X), and softclip(S).
+ @param ref_begin1 0-based best alignment beginning position on the reference sequence
+ @param read_begin1 0-based best alignment beginning position on the read sequence
+ @param read_end1 0-based best alignment ending position on the read sequence
+ @param ref pointer to the reference sequence
+ @param read pointer to the read sequence
+ @param readLen length of the read
+ @param cigar best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2)
+ @param cigarLen length of the cigar string
+ @return:
+ The number of mismatches.
+ The cigar and cigarLen are modified.
+*/
+int32_t mark_mismatch (int32_t ref_begin1,
+ int32_t read_begin1,
+ int32_t read_end1,
+ const char* ref,
+ const char* read,
+ int32_t readLen,
+ uint32_t** cigar,
+ int32_t* cigarLen);
+
/*! @function Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length
@param length length of CIGAR
@param op_letter CIGAR operation character ('M', 'I', etc)
@return 32-bit unsigned integer, representing encoded CIGAR operation and length
*/
-static inline uint32_t to_cigar_int (uint32_t length, char op_letter)
-{
- switch (op_letter) {
- case 'M': /* alignment match (can be a sequence match or mismatch */
- default:
- return length << BAM_CIGAR_SHIFT;
- case 'S': /* soft clipping (clipped sequences present in SEQ) */
- return (length << BAM_CIGAR_SHIFT) | (4u);
- case 'D': /* deletion from the reference */
- return (length << BAM_CIGAR_SHIFT) | (2u);
- case 'I': /* insertion to the reference */
- return (length << BAM_CIGAR_SHIFT) | (1u);
- case 'H': /* hard clipping (clipped sequences NOT present in SEQ) */
- return (length << BAM_CIGAR_SHIFT) | (5u);
- case 'N': /* skipped region from the reference */
- return (length << BAM_CIGAR_SHIFT) | (3u);
- case 'P': /* padding (silent deletion from padded reference) */
- return (length << BAM_CIGAR_SHIFT) | (6u);
- case '=': /* sequence match */
- return (length << BAM_CIGAR_SHIFT) | (7u);
- case 'X': /* sequence mismatch */
- return (length << BAM_CIGAR_SHIFT) | (8u);
- }
- return (uint32_t)-1; // This never happens
-}
-
+uint32_t to_cigar_int (uint32_t length, char op_letter);
/*! @function Extract CIGAR operation character from CIGAR 32-bit unsigned integer
@param cigar_int 32-bit unsigned integer, representing encoded CIGAR operation and length
diff --git a/src/ssw_cpp.cpp b/src/ssw_cpp.cpp
index 7bd3beb..4e3df7d 100644
--- a/src/ssw_cpp.cpp
+++ b/src/ssw_cpp.cpp
@@ -117,7 +117,6 @@ void CleanPreviousMOperator(
// 1. Calculate the number of mismatches.
// 2. Modify the cigar string:
// differentiate matches (M) and mismatches(X).
-// Note that SSW does not differentiate matches and mismatches.
// @Return:
// The number of mismatches.
int CalculateNumberMismatch(
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libssw.git
More information about the debian-med-commit
mailing list