[med-svn] [libvcflib] 05/09: Imported Upstream version 1.0.0~rc1+dfsg1
Andreas Tille
tille at debian.org
Tue Nov 22 12:23:29 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository libvcflib.
commit dd96c4485027624d10fdc38b2ea7197787f39141
Author: Andreas Tille <tille at debian.org>
Date: Tue Nov 22 11:22:20 2016 +0100
Imported Upstream version 1.0.0~rc1+dfsg1
---
src/ssw.c | 834 --------------------------------------------------------
src/ssw.h | 129 ---------
src/ssw_cpp.cpp | 399 ---------------------------
src/ssw_cpp.h | 216 ---------------
4 files changed, 1578 deletions(-)
diff --git a/src/ssw.c b/src/ssw.c
deleted file mode 100644
index 69646f1..0000000
--- a/src/ssw.c
+++ /dev/null
@@ -1,834 +0,0 @@
-/*
- * ssw.c
- *
- * Created by Mengyao Zhao on 6/22/10.
- * Copyright 2010 Boston College. All rights reserved.
- * Version 0.1.4
- * Last revision by Mengyao Zhao on 07/31/12.
- *
- */
-
-#include <emmintrin.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include "ssw.h"
-
-#ifdef __GNUC__
-#define LIKELY(x) __builtin_expect((x),1)
-#define UNLIKELY(x) __builtin_expect((x),0)
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
-
-/* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */
-#define set_u(u, w, i, j) { int x=(i)-(w); x=x>0?x:0; (u)=(j)-x+1; }
-
-/* Convert the coordinate in the direction matrix into the coordinate in one line of the band. */
-#define set_d(u, w, i, j, p) { int x=(i)-(w); x=x>0?x:0; x=(j)-x; (u)=x*3+p; }
-
-/*! @function
- @abstract Round an integer to the next closest power-2 integer.
- @param x integer to be rounded (in place)
- @discussion x will be modified.
- */
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-
-typedef struct {
- uint16_t score;
- int32_t ref; //0-based position
- int32_t read; //alignment ending position on read, 0-based
-} alignment_end;
-
-typedef struct {
- uint32_t* seq;
- int32_t length;
-} cigar;
-
-struct _profile{
- __m128i* profile_byte; // 0: none
- __m128i* profile_word; // 0: none
- const int8_t* read;
- const int8_t* mat;
- int32_t readLen;
- int32_t n;
- uint8_t bias;
-};
-
-/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */
-__m128i* qP_byte (const int8_t* read_num,
- const int8_t* mat,
- const int32_t readLen,
- const int32_t n, /* the edge length of the squre matrix mat */
- uint8_t bias) {
-
- int32_t segLen = (readLen + 15) / 16; /* Split the 128 bit register into 16 pieces.
- Each piece is 8 bit. Split the read into 16 segments.
- Calculat 16 segments in parallel.
- */
- __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
- int8_t* t = (int8_t*)vProfile;
- int32_t nt, i, j, segNum;
-
- /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
- for (nt = 0; LIKELY(nt < n); nt ++) {
- for (i = 0; i < segLen; i ++) {
- j = i;
- for (segNum = 0; LIKELY(segNum < 16) ; segNum ++) {
- *t++ = j>= readLen ? bias : mat[nt * n + read_num[j]] + bias;
- j += segLen;
- }
- }
- }
- return vProfile;
-}
-
-/* Striped Smith-Waterman
- Record the highest score of each reference position.
- Return the alignment score and ending position of the best alignment, 2nd best alignment, etc.
- Gap begin and gap extension are different.
- wight_match > 0, all other weights < 0.
- The returned positions are 0-based.
- */
-alignment_end* sw_sse2_byte (const int8_t* ref,
- int8_t ref_dir, // 0: forward ref; 1: reverse ref
- int32_t refLen,
- int32_t readLen,
- const uint8_t weight_gapO, /* will be used as - */
- const uint8_t weight_gapE, /* will be used as - */
- __m128i* vProfile,
- uint8_t terminate, /* the best alignment score: used to terminate
- the matrix calculation when locating the
- alignment beginning point. If this score
- is set to 0, it will not be used */
- uint8_t bias, /* Shift 0 point to a positive value. */
- int32_t maskLen) {
-
-#define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \
- (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \
- (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \
- (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \
- (m) = _mm_extract_epi16((vm), 0)
-
- uint8_t max = 0; /* the max alignment score */
- int32_t end_read = readLen - 1;
- int32_t end_ref = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */
- int32_t segLen = (readLen + 15) / 16; /* number of segment */
-
- /* array to record the largest score of each reference position */
- uint8_t* maxColumn = (uint8_t*) calloc(refLen, 1);
-
- /* array to record the alignment read ending position of the largest score of each reference position */
- int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
-
- /* Define 16 byte 0 vector. */
- __m128i vZero = _mm_set1_epi32(0);
-
- __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
-
- int32_t i, j;
- /* 16 byte insertion begin vector */
- __m128i vGapO = _mm_set1_epi8(weight_gapO);
-
- /* 16 byte insertion extension vector */
- __m128i vGapE = _mm_set1_epi8(weight_gapE);
-
- /* 16 byte bias vector */
- __m128i vBias = _mm_set1_epi8(bias);
-
- __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
- __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
- __m128i vTemp;
- int32_t edge, begin = 0, end = refLen, step = 1;
-// int32_t distance = readLen * 2 / 3;
-// int32_t distance = readLen / 2;
-// int32_t distance = readLen;
-
- /* outer loop to process the reference sequence */
- if (ref_dir == 1) {
- begin = refLen - 1;
- end = -1;
- step = -1;
- }
- for (i = begin; LIKELY(i != end); i += step) {
- int32_t cmp;
- __m128i e = vZero, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
- Any errors to vH values will be corrected in the Lazy_F loop.
- */
-// max16(maxColumn[i], vMaxColumn);
-// fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
-
- __m128i vH = pvHStore[segLen - 1];
- vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
- __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
-
- /* Swap the 2 H buffers. */
- __m128i* pv = pvHLoad;
- pvHLoad = pvHStore;
- pvHStore = pv;
-
- /* inner loop to process the query sequence */
- for (j = 0; LIKELY(j < segLen); ++j) {
- vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j));
- vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
- // max16(maxColumn[i], vH);
- // fprintf(stderr, "H[%d]: %d\n", i, maxColumn[i]);
-// int8_t* t;
-// int32_t ti;
-//for (t = (int8_t*)&vH, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
-
- /* Get max from vH, vE and vF. */
- e = _mm_load_si128(pvE + j);
- vH = _mm_max_epu8(vH, e);
- vH = _mm_max_epu8(vH, vF);
- vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
-
- // max16(maxColumn[i], vMaxColumn);
- // fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
-// for (t = (int8_t*)&vMaxColumn, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
-
- /* Save vH values. */
- _mm_store_si128(pvHStore + j, vH);
-
- /* Update vE value. */
- vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
- e = _mm_subs_epu8(e, vGapE);
- e = _mm_max_epu8(e, vH);
- _mm_store_si128(pvE + j, e);
-
- /* Update vF value. */
- vF = _mm_subs_epu8(vF, vGapE);
- vF = _mm_max_epu8(vF, vH);
-
- /* Load the next vH. */
- vH = _mm_load_si128(pvHLoad + j);
- }
-
- /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
- /* reset pointers to the start of the saved data */
- j = 0;
- vH = _mm_load_si128 (pvHStore + j);
-
- /* the computed vF value is for the given column. since */
- /* we are at the end, we need to shift the vF value over */
- /* to the next column. */
- vF = _mm_slli_si128 (vF, 1);
- vTemp = _mm_subs_epu8 (vH, vGapO);
- vTemp = _mm_subs_epu8 (vF, vTemp);
- vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
- cmp = _mm_movemask_epi8 (vTemp);
-
- while (cmp != 0xffff)
- {
- vH = _mm_max_epu8 (vH, vF);
- vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
- _mm_store_si128 (pvHStore + j, vH);
- vF = _mm_subs_epu8 (vF, vGapE);
- j++;
- if (j >= segLen)
- {
- j = 0;
- vF = _mm_slli_si128 (vF, 1);
- }
- vH = _mm_load_si128 (pvHStore + j);
-
- vTemp = _mm_subs_epu8 (vH, vGapO);
- vTemp = _mm_subs_epu8 (vF, vTemp);
- vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
- cmp = _mm_movemask_epi8 (vTemp);
- }
-
- vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn);
- vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore);
- cmp = _mm_movemask_epi8(vTemp);
- if (cmp != 0xffff) {
- uint8_t temp;
- vMaxMark = vMaxScore;
- max16(temp, vMaxScore);
- vMaxScore = vMaxMark;
-
- if (LIKELY(temp > max)) {
- max = temp;
- if (max + bias >= 255) break; //overflow
- end_ref = i;
-
- /* Store the column with the highest alignment score in order to trace the alignment ending position on read. */
- for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
- }
- }
-
- /* Record the max score of current column. */
- max16(maxColumn[i], vMaxColumn);
-// fprintf(stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
- if (maxColumn[i] == terminate) break;
- }
-
- /* Trace the alignment ending position on read. */
- uint8_t *t = (uint8_t*)pvHmax;
- int32_t column_len = segLen * 16;
- for (i = 0; LIKELY(i < column_len); ++i, ++t) {
- int32_t temp;
- if (*t == max) {
- temp = i / 16 + i % 16 * segLen;
- if (temp < end_read) end_read = temp;
- }
- }
-
- free(pvHmax);
- free(pvE);
- free(pvHLoad);
- free(pvHStore);
-
- /* Find the most possible 2nd best alignment. */
- alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
- bests[0].score = max + bias >= 255 ? 255 : max;
- bests[0].ref = end_ref;
- bests[0].read = end_read;
-
- bests[1].score = 0;
- bests[1].ref = 0;
- bests[1].read = 0;
-
- edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
- for (i = 0; i < edge; i ++) {
-// fprintf (stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
- if (maxColumn[i] > bests[1].score) {
- bests[1].score = maxColumn[i];
- bests[1].ref = i;
- }
- }
- edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
- for (i = edge + 1; i < refLen; i ++) {
-// fprintf (stderr, "refLen: %d\tmaxColumn[%d]: %d\n", refLen, i, maxColumn[i]);
- if (maxColumn[i] > bests[1].score) {
- bests[1].score = maxColumn[i];
- bests[1].ref = i;
- }
- }
-
- free(maxColumn);
- free(end_read_column);
- return bests;
-}
-
-__m128i* qP_word (const int8_t* read_num,
- const int8_t* mat,
- const int32_t readLen,
- const int32_t n) {
-
- int32_t segLen = (readLen + 7) / 8;
- __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
- int16_t* t = (int16_t*)vProfile;
- int32_t nt, i, j;
- int32_t segNum;
-
- /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
- for (nt = 0; LIKELY(nt < n); nt ++) {
- for (i = 0; i < segLen; i ++) {
- j = i;
- for (segNum = 0; LIKELY(segNum < 8) ; segNum ++) {
- *t++ = j>= readLen ? 0 : mat[nt * n + read_num[j]];
- j += segLen;
- }
- }
- }
- return vProfile;
-}
-
-alignment_end* sw_sse2_word (const int8_t* ref,
- int8_t ref_dir, // 0: forward ref; 1: reverse ref
- int32_t refLen,
- int32_t readLen,
- const uint8_t weight_gapO, /* will be used as - */
- const uint8_t weight_gapE, /* will be used as - */
- __m128i* vProfile,
- uint16_t terminate,
- int32_t maskLen) {
-
-#define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \
- (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \
- (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \
- (m) = _mm_extract_epi16((vm), 0)
-
- uint16_t max = 0; /* the max alignment score */
- int32_t end_read = readLen - 1;
- int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */
- int32_t segLen = (readLen + 7) / 8; /* number of segment */
-
- /* array to record the largest score of each reference position */
- uint16_t* maxColumn = (uint16_t*) calloc(refLen, 2);
-
- /* array to record the alignment read ending position of the largest score of each reference position */
- int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
-
- /* Define 16 byte 0 vector. */
- __m128i vZero = _mm_set1_epi32(0);
-
- __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
- __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
-
- int32_t i, j, k;
- /* 16 byte insertion begin vector */
- __m128i vGapO = _mm_set1_epi16(weight_gapO);
-
- /* 16 byte insertion extension vector */
- __m128i vGapE = _mm_set1_epi16(weight_gapE);
-
- /* 16 byte bias vector */
- __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
- __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
- __m128i vTemp;
- int32_t edge, begin = 0, end = refLen, step = 1;
-
- /* outer loop to process the reference sequence */
- if (ref_dir == 1) {
- begin = refLen - 1;
- end = -1;
- step = -1;
- }
- for (i = begin; LIKELY(i != end); i += step) {
- int32_t cmp;
- __m128i e = vZero, vF = vZero; /* Initialize F value to 0.
- Any errors to vH values will be corrected in the Lazy_F loop.
- */
- __m128i vH = pvHStore[segLen - 1];
- vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
-
- /* Swap the 2 H buffers. */
- __m128i* pv = pvHLoad;
-
- __m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
-
- __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
- pvHLoad = pvHStore;
- pvHStore = pv;
-
- /* inner loop to process the query sequence */
- for (j = 0; LIKELY(j < segLen); j ++) {
- vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j));
-
- /* Get max from vH, vE and vF. */
- e = _mm_load_si128(pvE + j);
- vH = _mm_max_epi16(vH, e);
- vH = _mm_max_epi16(vH, vF);
- vMaxColumn = _mm_max_epi16(vMaxColumn, vH);
-
- /* Save vH values. */
- _mm_store_si128(pvHStore + j, vH);
-
- /* Update vE value. */
- vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
- e = _mm_subs_epu16(e, vGapE);
- e = _mm_max_epi16(e, vH);
- _mm_store_si128(pvE + j, e);
-
- /* Update vF value. */
- vF = _mm_subs_epu16(vF, vGapE);
- vF = _mm_max_epi16(vF, vH);
-
- /* Load the next vH. */
- vH = _mm_load_si128(pvHLoad + j);
- }
-
- /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
- for (k = 0; LIKELY(k < 8); ++k) {
- vF = _mm_slli_si128 (vF, 2);
- for (j = 0; LIKELY(j < segLen); ++j) {
- vH = _mm_load_si128(pvHStore + j);
- vH = _mm_max_epi16(vH, vF);
- _mm_store_si128(pvHStore + j, vH);
- vH = _mm_subs_epu16(vH, vGapO);
- vF = _mm_subs_epu16(vF, vGapE);
- if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end;
- }
- }
-
-end:
- vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn);
- vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore);
- cmp = _mm_movemask_epi8(vTemp);
- if (cmp != 0xffff) {
- uint16_t temp;
- vMaxMark = vMaxScore;
- max8(temp, vMaxScore);
- vMaxScore = vMaxMark;
-
- if (LIKELY(temp > max)) {
- max = temp;
- end_ref = i;
- for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
- }
- }
-
- /* Record the max score of current column. */
- max8(maxColumn[i], vMaxColumn);
- if (maxColumn[i] == terminate) break;
- }
-
- /* Trace the alignment ending position on read. */
- uint16_t *t = (uint16_t*)pvHmax;
- int32_t column_len = segLen * 8;
- for (i = 0; LIKELY(i < column_len); ++i, ++t) {
- int32_t temp;
- if (*t == max) {
- temp = i / 8 + i % 8 * segLen;
- if (temp < end_read) end_read = temp;
- }
- }
-
- free(pvHmax);
- free(pvE);
- free(pvHLoad);
- free(pvHStore);
-
- /* Find the most possible 2nd best alignment. */
- alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
- bests[0].score = max;
- bests[0].ref = end_ref;
- bests[0].read = end_read;
-
- bests[1].score = 0;
- bests[1].ref = 0;
- bests[1].read = 0;
-
- edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
- for (i = 0; i < edge; i ++) {
- if (maxColumn[i] > bests[1].score) {
- bests[1].score = maxColumn[i];
- bests[1].ref = i;
- }
- }
- edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
- for (i = edge; i < refLen; i ++) {
- if (maxColumn[i] > bests[1].score) {
- bests[1].score = maxColumn[i];
- bests[1].ref = i;
- }
- }
-
- free(maxColumn);
- free(end_read_column);
- return bests;
-}
-
-cigar* banded_sw (const int8_t* ref,
- const int8_t* read,
- int32_t refLen,
- int32_t readLen,
- int32_t score,
- const uint32_t weight_gapO, /* will be used as - */
- const uint32_t weight_gapE, /* will be used as - */
- int32_t band_width,
- const int8_t* mat, /* pointer to the weight matrix */
- int32_t n) {
-
- uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1;
- int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, s2 = 1024, l, max = 0;
- int32_t width, width_d, *h_b, *e_b, *h_c;
- int8_t *direction, *direction_line;
- cigar* result = (cigar*)malloc(sizeof(cigar));
- h_b = (int32_t*)malloc(s1 * sizeof(int32_t));
- e_b = (int32_t*)malloc(s1 * sizeof(int32_t));
- h_c = (int32_t*)malloc(s1 * sizeof(int32_t));
- direction = (int8_t*)malloc(s2 * sizeof(int8_t));
-
- do {
- width = band_width * 2 + 3, width_d = band_width * 2 + 1;
- while (width >= s1) {
- ++s1;
- kroundup32(s1);
- h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t));
- e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t));
- h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t));
- }
- while (width_d * readLen * 3 >= s2) {
- ++s2;
- kroundup32(s2);
- if (s2 < 0) {
- fprintf(stderr, "Alignment score and position are not consensus.\n");
- exit(1);
- }
- direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t));
- }
- direction_line = direction;
- for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0;
- for (i = 0; LIKELY(i < readLen); i ++) {
- int32_t beg = 0, end = refLen - 1, u = 0, edge;
- j = i - band_width; beg = beg > j ? beg : j; // band start
- j = i + band_width; end = end < j ? end : j; // band end
- edge = end + 1 < width - 1 ? end + 1 : width - 1;
- f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0;
- direction_line = direction + width_d * i * 3;
-
- for (j = beg; LIKELY(j <= end); j ++) {
- int32_t b, e1, f1, d, de, df, dh;
- set_u(u, band_width, i, j); set_u(e, band_width, i - 1, j);
- set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1);
- set_d(de, band_width, i, j, 0);
- set_d(df, band_width, i, j, 1);
- set_d(dh, band_width, i, j, 2);
-
- temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO;
- temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE;
- e_b[u] = temp1 > temp2 ? temp1 : temp2;
- direction_line[de] = temp1 > temp2 ? 3 : 2;
-
- temp1 = h_c[b] - weight_gapO;
- temp2 = f - weight_gapE;
- f = temp1 > temp2 ? temp1 : temp2;
- direction_line[df] = temp1 > temp2 ? 5 : 4;
-
- e1 = e_b[u] > 0 ? e_b[u] : 0;
- f1 = f > 0 ? f : 0;
- temp1 = e1 > f1 ? e1 : f1;
- temp2 = h_b[d] + mat[ref[j] * n + read[i]];
- h_c[u] = temp1 > temp2 ? temp1 : temp2;
-
- if (h_c[u] > max) max = h_c[u];
-
- if (temp1 <= temp2) direction_line[dh] = 1;
- else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df];
- }
- for (j = 1; j <= u; j ++) h_b[j] = h_c[j];
- }
- band_width *= 2;
- } while (LIKELY(max < score));
- band_width /= 2;
-
- // trace back
- i = readLen - 1;
- j = refLen - 1;
- e = 0; // Count the number of M, D or I.
- l = 0; // record length of current cigar
- f = max = 0; // M
- temp2 = 2; // h
- while (LIKELY(i > 0)) {
- set_d(temp1, band_width, i, j, temp2);
- switch (direction_line[temp1]) {
- case 1:
- --i;
- --j;
- temp2 = 2;
- direction_line -= width_d * 3;
- f = 0; // M
- break;
- case 2:
- --i;
- temp2 = 0; // e
- direction_line -= width_d * 3;
- f = 1; // I
- break;
- case 3:
- --i;
- temp2 = 2;
- direction_line -= width_d * 3;
- f = 1; // I
- break;
- case 4:
- --j;
- temp2 = 1;
- f = 2; // D
- break;
- case 5:
- --j;
- temp2 = 2;
- f = 2; // D
- break;
- default:
- fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]);
- return 0;
- }
- if (f == max) ++e;
- else {
- ++l;
- while (l >= s) {
- ++s;
- kroundup32(s);
- c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
- }
- c[l - 1] = e<<4|max;
- max = f;
- e = 1;
- }
- }
- if (f == 0) {
- ++l;
- while (l >= s) {
- ++s;
- kroundup32(s);
- c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
- }
- c[l - 1] = (e+1)<<4;
- }else {
- l += 2;
- while (l >= s) {
- ++s;
- kroundup32(s);
- c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
- }
- c[l - 2] = e<<4|f;
- c[l - 1] = 16; // 1M
- }
-
- // reverse cigar
- c1 = (uint32_t*)malloc(l * sizeof(uint32_t));
- s = 0;
- e = l - 1;
- while (LIKELY(s <= e)) {
- c1[s] = c[e];
- c1[e] = c[s];
- ++ s;
- -- e;
- }
- result->seq = c1;
- result->length = l;
-
- free(direction);
- free(h_c);
- free(e_b);
- free(h_b);
- free(c);
- return result;
-}
-
-int8_t* seq_reverse(const int8_t* seq, int32_t end) /* end is 0-based alignment ending position */
-{
- int8_t* reverse = (int8_t*)calloc(end + 1, sizeof(int8_t));
- int32_t start = 0;
- while (LIKELY(start <= end)) {
- reverse[start] = seq[end];
- reverse[end] = seq[start];
- ++ start;
- -- end;
- }
- return reverse;
-}
-
-s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size) {
- s_profile* p = (s_profile*)calloc(1, sizeof(struct _profile));
- p->profile_byte = 0;
- p->profile_word = 0;
- p->bias = 0;
-
- if (score_size == 0 || score_size == 2) {
- /* Find the bias to use in the substitution matrix */
- int32_t bias = 0, i;
- for (i = 0; i < n*n; i++) if (mat[i] < bias) bias = mat[i];
- bias = abs(bias);
-
- p->bias = bias;
- p->profile_byte = qP_byte (read, mat, readLen, n, bias);
- }
- if (score_size == 1 || score_size == 2) p->profile_word = qP_word (read, mat, readLen, n);
- p->read = read;
- p->mat = mat;
- p->readLen = readLen;
- p->n = n;
- return p;
-}
-
-void init_destroy (s_profile* p) {
- free(p->profile_byte);
- free(p->profile_word);
- free(p);
-}
-
-s_align* ssw_align (const s_profile* prof,
- const int8_t* ref,
- int32_t refLen,
- const uint8_t weight_gapO,
- const uint8_t weight_gapE,
- const uint8_t flag, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled
- const uint16_t filters,
- const int32_t filterd,
- const int32_t maskLen) {
-
- alignment_end* bests = 0, *bests_reverse = 0;
- __m128i* vP = 0;
- int32_t word = 0, band_width = 0, readLen = prof->readLen;
- int8_t* read_reverse = 0;
- cigar* path;
- s_align* r = (s_align*)calloc(1, sizeof(s_align));
- r->ref_begin1 = -1;
- r->read_begin1 = -1;
- r->cigar = 0;
- r->cigarLen = 0;
- if (maskLen < 15) {
- fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n");
- }
-
- // Find the alignment scores and ending positions
- if (prof->profile_byte) {
- bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen);
- if (prof->profile_word && bests[0].score == 255) {
- free(bests);
- bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
- word = 1;
- } else if (bests[0].score == 255) {
- fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n");
- return 0;
- }
- }else if (prof->profile_word) {
- bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
- word = 1;
- }else {
- fprintf(stderr, "Please call the function ssw_init before ssw_align.\n");
- return 0;
- }
- r->score1 = bests[0].score;
- r->ref_end1 = bests[0].ref;
- r->read_end1 = bests[0].read;
- if (maskLen >= 15) {
- r->score2 = bests[1].score;
- r->ref_end2 = bests[1].ref;
- } else {
- r->score2 = 0;
- r->ref_end2 = -1;
- }
- free(bests);
- if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end;
-
- // Find the beginning position of the best alignment.
- read_reverse = seq_reverse(prof->read, r->read_end1);
- if (word == 0) {
- vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias);
- bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen);
- } else {
- vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n);
- bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen);
- }
- free(vP);
- free(read_reverse);
- r->ref_begin1 = bests_reverse[0].ref;
- r->read_begin1 = r->read_end1 - bests_reverse[0].read;
- free(bests_reverse);
- if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end;
-
- // Generate cigar.
- refLen = r->ref_end1 - r->ref_begin1 + 1;
- readLen = r->read_end1 - r->read_begin1 + 1;
- band_width = abs(refLen - readLen) + 1;
- path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n);
- if (path == 0) r = 0;
- else {
- r->cigar = path->seq;
- r->cigarLen = path->length;
- free(path);
- }
-
-end:
- return r;
-}
-
-void align_destroy (s_align* a) {
- free(a->cigar);
- free(a);
-}
diff --git a/src/ssw.h b/src/ssw.h
deleted file mode 100644
index 3cb45c8..0000000
--- a/src/ssw.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * ssw.h
- *
- * Created by Mengyao Zhao on 6/22/10.
- * Copyright 2010 Boston College. All rights reserved.
- * Version 0.1.4
- * Last revision by Mengyao Zhao on 07/31/12.
- *
- */
-
-#ifndef SSW_H
-#define SSW_H
-
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <emmintrin.h>
-
-/*! @typedef structure of the query profile */
-struct _profile;
-typedef struct _profile s_profile;
-
-/*! @typedef structure of the alignment result
- @field score1 the best alignment score
- @field score2 sub-optimal alignment score
- @field ref_begin1 0-based best alignment beginning position on reference; ref_begin1 = -1 when the best alignment beginning
- position is not available
- @field ref_end1 0-based best alignment ending position on reference
- @field read_begin1 0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning
- position is not available
- @field read_end1 0-based best alignment ending position on read
- @field read_end2 0-based sub-optimal alignment ending position on read
- @field cigar best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2);
- cigar = 0 when the best alignment path is not available
- @field cigarLen length of the cigar string; cigarLen = 0 when the best alignment path is not available
-*/
-typedef struct {
- uint16_t score1;
- uint16_t score2;
- int32_t ref_begin1;
- int32_t ref_end1;
- int32_t read_begin1;
- int32_t read_end1;
- int32_t ref_end2;
- uint32_t* cigar;
- int32_t cigarLen;
-} s_align;
-
-#ifdef __cplusplus
-extern "C" {
-#endif // __cplusplus
-
-/*! @function Create the query profile using the query sequence.
- @param read pointer to the query sequence; the query sequence needs to be numbers
- @param readLen length of the query sequence
- @param mat pointer to the substitution matrix; mat needs to be corresponding to the read sequence
- @param n the square root of the number of elements in mat (mat has n*n elements)
- @param score_size estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if
- your estimated best alignment score >= 255, please set 1; if you don't know, please set 2
- @return pointer to the query profile structure
- @note example for parameter read and mat:
- If the query sequence is: ACGTATC, the sequence that read points to can be: 1234142
- Then if the penalty for match is 2 and for mismatch is -2, the substitution matrix of parameter mat will be:
- //A C G T
- 2 -2 -2 -2 //A
- -2 2 -2 -2 //C
- -2 -2 2 -2 //G
- -2 -2 -2 2 //T
- mat is the pointer to the array {2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2}
-*/
-s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size);
-
-/*! @function Release the memory allocated by function ssw_init.
- @param p pointer to the query profile structure
-*/
-void init_destroy (s_profile* p);
-
-// @function ssw alignment.
-/*! @function Do Striped Smith-Waterman alignment.
- @param prof pointer to the query profile structure
- @param ref pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of
- function ssw_init
- @param refLen length of the target sequence
- @param weight_gapO the absolute value of gap open penalty
- @param weight_gapE the absolute value of gap extension penalty
- @param flag bitwise FLAG; (from high to low) bit 5: when setted as 1, function ssw_align will return the best alignment
- beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1
- < filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and
- cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function
- will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is
- setted) the function will always return the best alignment beginning position and cigar
- @param filters score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the
- decription of the flag parameter for detailed usage.)
- @param filterd distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check
- the decription of the flag parameter for detailed usage.)
- @param maskLen The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use
- readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT
- return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal
- alignment ending position, the suboptimal alignment score can be heuristically found by checking the second
- largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid
- picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the
- reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest
- score from the unmasked elements.
- @return pointer to the alignment result structure
- @note Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score,
- and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted
- while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are
- 0-based coordinate.
-*/
-s_align* ssw_align (const s_profile* prof,
- const int8_t* ref,
- int32_t refLen,
- const uint8_t weight_gapO,
- const uint8_t weight_gapE,
- const uint8_t flag,
- const uint16_t filters,
- const int32_t filterd,
- const int32_t maskLen);
-
-/*! @function Release the memory allocated by function ssw_align.
- @param a pointer to the alignment result structure
-*/
-void align_destroy (s_align* a);
-
-#ifdef __cplusplus
-}
-#endif // __cplusplus
-
-#endif // SSW_H
diff --git a/src/ssw_cpp.cpp b/src/ssw_cpp.cpp
deleted file mode 100644
index ea260de..0000000
--- a/src/ssw_cpp.cpp
+++ /dev/null
@@ -1,399 +0,0 @@
-#include "ssw_cpp.h"
-
-#include <sstream>
-
-extern "C" {
-#include "ssw.h"
-}
-
-namespace {
-
-static int8_t kBaseTranslation[128] = {
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- // A C G
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- // T
- 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- // a c g
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- // t
- 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-};
-
-void BuildSwScoreMatrix(const uint8_t& match_score,
- const uint8_t& mismatch_penalty,
- int8_t* matrix) {
-
- // The score matrix looks like
- // // A, C, G, T, N
- // score_matrix_ = { 2, -2, -2, -2, 0, // A
- // -2, 2, -2, -2, 0, // C
- // -2, -2, 2, -2, 0, // G
- // -2, -2, -2, 2, 0, // T
- // 0, 0, 0, 0, 0};// N
-
- int id = 0;
- for (int i = 0; i < 4; ++i) {
- for (int j = 0; j < 4; ++j) {
- matrix[id] = ((i == j) ? match_score : static_cast<int8_t>(-mismatch_penalty));
- ++id;
- }
- matrix[id] = 0;
- ++id;
- }
-
- for (int i = 0; i < 5; ++i)
- matrix[id++] = 0;
-
-}
-
-void ConvertAlignment(const s_align& s_al,
- const int& query_len,
- StripedSmithWaterman::Alignment* al) {
- al->sw_score = s_al.score1;
- al->sw_score_next_best = s_al.score2;
- al->ref_begin = s_al.ref_begin1;
- al->ref_end = s_al.ref_end1;
- al->query_begin = s_al.read_begin1;
- al->query_end = s_al.read_end1;
- al->ref_end_next_best = s_al.ref_end2;
-
- al->cigar.clear();
- al->cigar_string.clear();
-
- if (s_al.cigarLen > 0) {
- std::ostringstream cigar_string;
- if (al->query_begin > 0) {
- uint32_t cigar = (al->query_begin << 4) | 0x0004;
- al->cigar.push_back(cigar);
- cigar_string << al->query_begin << 'S';
- }
-
- for (int i = 0; i < s_al.cigarLen; ++i) {
- al->cigar.push_back(s_al.cigar[i]);
- cigar_string << (s_al.cigar[i] >> 4);
- uint8_t op = s_al.cigar[i] & 0x000f;
- switch(op) {
- case 0: cigar_string << 'M'; break;
- case 1: cigar_string << 'I'; break;
- case 2: cigar_string << 'D'; break;
- }
- }
-
- int end = query_len - al->query_end - 1;
- if (end > 0) {
- uint32_t cigar = (end << 4) | 0x0004;
- al->cigar.push_back(cigar);
- cigar_string << end << 'S';
- }
-
- al->cigar_string = cigar_string.str();
- } // end if
-}
-
-int CalculateNumberMismatch(
- const StripedSmithWaterman::Alignment& al,
- const int8_t* matrix,
- int8_t const *ref,
- int8_t const *query) {
-
- ref += al.ref_begin;
- query += al.query_begin;
- int mismatch_length = 0;
- for (unsigned int i = 0; i < al.cigar.size(); ++i) {
- int32_t op = al.cigar[i] & 0x0000000f;
- int32_t length = (al.cigar[i] >> 4) & 0x0fffffff;
- if (op == 0) { // M
- for (int j = 0; j < length; ++j) {
- if (matrix[*ref] != matrix[*query]) ++mismatch_length;
- ++ref;
- ++query;
- }
- } else if (op == 1) { // I
- query += length;
- mismatch_length += length;
- } else if (op == 2) { // D
- ref += length;
- mismatch_length += length;
- }
- }
-
- return mismatch_length;
-}
-
-void SetFlag(const StripedSmithWaterman::Filter& filter, uint8_t* flag) {
- if (filter.report_begin_position) *flag |= 0x08;
- if (filter.report_cigar) *flag |= 0x0f;
-}
-
-} // namespace
-
-
-
-namespace StripedSmithWaterman {
-
-Aligner::Aligner(void)
- : score_matrix_(NULL)
- , score_matrix_size_(5)
- , translation_matrix_(NULL)
- , default_matrix_(false)
- , matrix_built_(false)
- , match_score_(2)
- , mismatch_penalty_(2)
- , gap_opening_penalty_(3)
- , gap_extending_penalty_(1)
- , translated_reference_(NULL)
- , reference_length_(0)
-{
- BuildDefaultMatrix();
-}
-
-Aligner::Aligner(
- const uint8_t& match_score,
- const uint8_t& mismatch_penalty,
- const uint8_t& gap_opening_penalty,
- const uint8_t& gap_extending_penalty)
-
- : score_matrix_(NULL)
- , score_matrix_size_(5)
- , translation_matrix_(NULL)
- , default_matrix_(false)
- , matrix_built_(false)
- , match_score_(match_score)
- , mismatch_penalty_(mismatch_penalty)
- , gap_opening_penalty_(gap_opening_penalty)
- , gap_extending_penalty_(gap_extending_penalty)
- , translated_reference_(NULL)
- , reference_length_(0)
-{
- BuildDefaultMatrix();
-}
-
-Aligner::Aligner(const int8_t* score_matrix,
- const int& score_matrix_size,
- const int8_t* translation_matrix,
- const int& translation_matrix_size)
-
- : score_matrix_(NULL)
- , score_matrix_size_(score_matrix_size)
- , translation_matrix_(NULL)
- , default_matrix_(true)
- , matrix_built_(false)
- , match_score_(2)
- , mismatch_penalty_(2)
- , gap_opening_penalty_(3)
- , gap_extending_penalty_(1)
- , translated_reference_(NULL)
- , reference_length_(0)
-{
- score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
- memcpy(score_matrix_, score_matrix, sizeof(int8_t) * score_matrix_size_ * score_matrix_size_);
- translation_matrix_ = new int8_t[translation_matrix_size];
- memcpy(translation_matrix_, translation_matrix, sizeof(int8_t) * translation_matrix_size);
- matrix_built_ = true;
-}
-
-
-Aligner::~Aligner(void){
- Clear();
-}
-
-int Aligner::SetReferenceSequence(const char* seq, const int& length) {
-
- int len = 0;
- if (matrix_built_) {
- // calculate the valid length
- int calculated_ref_length = static_cast<int>(strlen(seq));
- int valid_length = (calculated_ref_length > length)
- ? length : calculated_ref_length;
- // delete the current buffer
- CleanReferenceSequence();
- // allocate a new buffer
- translated_reference_ = new int8_t[valid_length];
-
- len = TranslateBase(seq, valid_length, translated_reference_);
- } else {
- // nothing
- }
-
- reference_length_ = len;
- return len;
-
-
-}
-
-int Aligner::TranslateBase(const char* bases, const int& length,
- int8_t* translated) const {
-
- char* ptr = (char*)bases;
- int len = 0;
- for (int i = 0; i < length; ++i) {
- translated[i] = translation_matrix_[(int) *ptr];
- ++ptr;
- ++len;
- }
-
- return len;
-}
-
-
-bool Aligner::Align(const char* query, const Filter& filter,
- Alignment* alignment) const
-{
- if (!matrix_built_) return false;
- if (reference_length_ == 0) return false;
-
- int query_len = strlen(query);
- if (query_len == 0) return false;
- int8_t* translated_query = new int8_t[query_len];
- TranslateBase(query, query_len, translated_query);
-
- const int8_t score_size = 2;
- s_profile* profile = ssw_init(translated_query, query_len, score_matrix_,
- score_matrix_size_, score_size);
-
- uint8_t flag = 0;
- SetFlag(filter, &flag);
- s_align* s_al = ssw_align(profile, translated_reference_, reference_length_,
- static_cast<int>(gap_opening_penalty_),
- static_cast<int>(gap_extending_penalty_),
- flag, filter.score_filter, filter.distance_filter, query_len);
-
- alignment->Clear();
- ConvertAlignment(*s_al, query_len, alignment);
- alignment->mismatches = CalculateNumberMismatch(*alignment, score_matrix_, translated_reference_, translated_query);
-
-
- // Free memory
- if (query_len > 1) delete [] translated_query;
- else delete translated_query;
- align_destroy(s_al);
- init_destroy(profile);
-
- return true;
-}
-
-
-bool Aligner::Align(const char* query, const char* ref, const int& ref_len,
- const Filter& filter, Alignment* alignment) const
-{
- if (!matrix_built_) return false;
-
- int query_len = strlen(query);
- if (query_len == 0) return false;
- int8_t* translated_query = new int8_t[query_len];
- TranslateBase(query, query_len, translated_query);
-
- // calculate the valid length
- int calculated_ref_length = static_cast<int>(strlen(ref));
- int valid_ref_len = (calculated_ref_length > ref_len)
- ? ref_len : calculated_ref_length;
- int8_t* translated_ref = new int8_t[valid_ref_len];
- TranslateBase(ref, valid_ref_len, translated_ref);
-
-
- const int8_t score_size = 2;
- s_profile* profile = ssw_init(translated_query, query_len, score_matrix_,
- score_matrix_size_, score_size);
-
- uint8_t flag = 0;
- SetFlag(filter, &flag);
- s_align* s_al = ssw_align(profile, translated_ref, valid_ref_len,
- static_cast<int>(gap_opening_penalty_),
- static_cast<int>(gap_extending_penalty_),
- flag, filter.score_filter, filter.distance_filter, query_len);
-
- alignment->Clear();
- ConvertAlignment(*s_al, query_len, alignment);
- alignment->mismatches = CalculateNumberMismatch(*alignment, score_matrix_, translated_ref, translated_query);
-
- // Free memory
- if (query_len > 1) delete [] translated_query;
- else delete translated_query;
- if (valid_ref_len > 1) delete [] translated_ref;
- else delete translated_ref;
- align_destroy(s_al);
- init_destroy(profile);
-
- return true;
-}
-
-void Aligner::Clear(void) {
- if (score_matrix_) delete [] score_matrix_;
- score_matrix_ = NULL;
-
- if (!default_matrix_ && translation_matrix_)
- delete [] translation_matrix_;
- translation_matrix_ = NULL;
-
- CleanReferenceSequence();
-
- default_matrix_ = false;
- matrix_built_ = false;
-}
-
-void Aligner::SetAllDefault(void) {
- score_matrix_size_ = 5;
- default_matrix_ = false;
- matrix_built_ = false;
- match_score_ = 2;
- mismatch_penalty_ = 2;
- gap_opening_penalty_ = 3;
- gap_extending_penalty_ = 1;
- reference_length_ = 0;
-}
-
-bool Aligner::ReBuild(void) {
- if (matrix_built_) return false;
-
- SetAllDefault();
- BuildDefaultMatrix();
-
- return true;
-}
-
-bool Aligner::ReBuild(
- const uint8_t& match_score,
- const uint8_t& mismatch_penalty,
- const uint8_t& gap_opening_penalty,
- const uint8_t& gap_extending_penalty) {
- if (matrix_built_) return false;
-
- SetAllDefault();
-
- match_score_ = match_score;
- mismatch_penalty_ = mismatch_penalty;
- gap_opening_penalty_ = gap_opening_penalty;
- gap_extending_penalty_ = gap_extending_penalty;
-
- BuildDefaultMatrix();
-
- return true;
-}
-
-bool Aligner::ReBuild(
- const int8_t* score_matrix,
- const int& score_matrix_size,
- const int8_t* translation_matrix,
- const int& translation_matrix_size) {
-
- score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
- memcpy(score_matrix_, score_matrix, sizeof(int8_t) * score_matrix_size_ * score_matrix_size_);
- translation_matrix_ = new int8_t[translation_matrix_size];
- memcpy(translation_matrix_, translation_matrix, sizeof(int8_t) * translation_matrix_size);
- matrix_built_ = true;
-
- return true;
-}
-
-void Aligner::BuildDefaultMatrix(void) {
- score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
- BuildSwScoreMatrix(match_score_, mismatch_penalty_, score_matrix_);
- translation_matrix_ = kBaseTranslation;
- matrix_built_ = true;
- default_matrix_ = true;
-}
-} // namespace StripedSmithWaterman
diff --git a/src/ssw_cpp.h b/src/ssw_cpp.h
deleted file mode 100644
index fb10f4f..0000000
--- a/src/ssw_cpp.h
+++ /dev/null
@@ -1,216 +0,0 @@
-#ifndef COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
-#define COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
-
-#include <stdint.h>
-#include <string>
-#include <vector>
-
-namespace StripedSmithWaterman {
-
-struct Alignment {
- uint16_t sw_score; // The best alignment score
- uint16_t sw_score_next_best; // The next best alignment score
- int32_t ref_begin; // Reference begin position of the best alignment
- int32_t ref_end; // Reference end position of the best alignment
- int32_t query_begin; // Query begin position of the best alignment
- int32_t query_end; // Query end position of the best alignment
- int32_t ref_end_next_best; // Reference end position of the next best alignment
- int32_t mismatches; // Number of mismatches of the alignment
- std::string cigar_string; // Cigar string of the best alignment
- std::vector<uint32_t> cigar; // Cigar stored in the BAM format
- // high 28 bits: length
- // low 4 bits: M/I/D/S/X (0/1/2/4/8);
- void Clear() {
- sw_score = 0;
- sw_score_next_best = 0;
- ref_begin = 0;
- ref_end = 0;
- query_begin = 0;
- query_end = 0;
- ref_end_next_best = 0;
- mismatches = 0;
- cigar_string.clear();
- cigar.clear();
- };
-};
-
-struct Filter {
- // NOTE: No matter the filter, those five fields will be given anyway.
- // sw_score; sw_score_next_best; ref_end; query_end; ref_end_next_best.
-
- bool report_begin_position; // Give ref_begin and query_begin.
- // If it is not set, ref_begin and query_begin are -1.
- bool report_cigar; // Give cigar_string and cigar.
- // report_begin_position is automatically TRUE.
-
- // When *report_cigar* is true and alignment passes these two filters,
- // cigar_string and cigar will be given.
- uint16_t score_filter; // score >= score_filter
- uint16_t distance_filter; // ((ref_end - ref_begin) < distance_filter) &&
- // ((query_end - read_begin) < distance_filter)
-
- Filter()
- : report_begin_position(true)
- , report_cigar(true)
- , score_filter(0)
- , distance_filter(32767)
- {};
-};
-
-class Aligner {
- public:
- // =========
- // @function Construct an Aligner on default values.
- // The function will build the {A.C,G,T,N} aligner.
- // If you target for other character aligners, then please
- // use the other constructor and pass the corresponding matrix in.
- // =========
- Aligner(void);
-
- // =========
- // @function Construct an Aligner by assigning scores.
- // The function will build the {A.C,G,T,N} aligner.
- // If you target for other character aligners, then please
- // use the other constructor and pass the corresponding matrix in.
- // =========
- Aligner(const uint8_t& match_score,
- const uint8_t& mismatch_penalty,
- const uint8_t& gap_opening_penalty,
- const uint8_t& gap_extending_penalty);
-
- // =========
- // @function Construct an Aligner by the specific matrixs.
- // =========
- Aligner(const int8_t* score_matrix,
- const int& score_matrix_size,
- const int8_t* translation_matrix,
- const int& translation_matrix_size);
-
- ~Aligner(void);
-
- // =========
- // @function Build the reference sequence and thus make
- // Align(const char* query, s_align* alignment) function;
- // otherwise the reference should be given when aligning.
- // [NOTICE] If there exists a sequence, that one will be deleted
- // and replaced.
- // @param seq The reference bases;
- // [NOTICE] It is not necessary null terminated.
- // @param length The length of bases will be be built.
- // @return The length of the built bases.
- // =========
- int SetReferenceSequence(const char* seq, const int& length);
-
- void CleanReferenceSequence(void);
-
- // =========
- // @function Set penalties for opening and extending gaps
- // [NOTICE] The defaults are 3 and 1 respectively.
- // =========
- void SetGapPenalty(const uint8_t& opening, const uint8_t& extending) {
- gap_opening_penalty_ = opening;
- gap_extending_penalty_ = extending;
- };
-
- void SetMismatchPenalty(const uint8_t& match, const uint8_t& mismatch) {
- match_score_ = match;
- mismatch_penalty_ = mismatch;
- };
-
- // =========
- // @function Align the query againt the reference that is set by
- // SetReferenceSequence.
- // @param query The query sequence.
- // @param filter The filter for the alignment.
- // @param alignment The container contains the result.
- // @return True: succeed; false: fail.
- // =========
- bool Align(const char* query, const Filter& filter, Alignment* alignment) const;
-
- // =========
- // @function Align the query againt the reference.
- // [NOTICE] The reference won't replace the reference
- // set by SetReferenceSequence.
- // @param query The query sequence.
- // @param ref The reference sequence.
- // [NOTICE] It is not necessary null terminated.
- // @param ref_len The length of the reference sequence.
- // @param filter The filter for the alignment.
- // @param alignment The container contains the result.
- // @return True: succeed; false: fail.
- // =========
- bool Align(const char* query, const char* ref, const int& ref_len,
- const Filter& filter, Alignment* alignment) const;
-
- // @function Clear up all containers and thus the aligner is disabled.
- // To rebuild the aligner please use Build functions.
- void Clear(void);
-
- // =========
- // @function Rebuild the aligner's ability on default values.
- // [NOTICE] If the aligner is not cleaned, rebuilding will fail.
- // @return True: succeed; false: fail.
- // =========
- bool ReBuild(void);
-
- // =========
- // @function Rebuild the aligner's ability by the specific matrixs.
- // [NOTICE] If the aligner is not cleaned, rebuilding will fail.
- // @return True: succeed; false: fail.
- // =========
- bool ReBuild(
- const uint8_t& match_score,
- const uint8_t& mismatch_penalty,
- const uint8_t& gap_opening_penalty,
- const uint8_t& gap_extending_penalty);
-
- // =========
- // @function Construct an Aligner by the specific matrixs.
- // [NOTICE] If the aligner is not cleaned, rebuilding will fail.
- // @return True: succeed; false: fail.
- // =========
- bool ReBuild(
- const int8_t* score_matrix,
- const int& score_matrix_size,
- const int8_t* translation_matrix,
- const int& translation_matrix_size);
-
- private:
- int8_t* score_matrix_;
- int score_matrix_size_;
- int8_t* translation_matrix_;
- bool default_matrix_;
- bool matrix_built_;
-
- uint8_t match_score_; // default: 2
- uint8_t mismatch_penalty_; // default: 2
- uint8_t gap_opening_penalty_; // default: 3
- uint8_t gap_extending_penalty_; // default: 1
-
- int8_t* translated_reference_;
- int32_t reference_length_;
-
- int TranslateBase(const char* bases, const int& length, int8_t* translated) const;
- void SetAllDefault(void);
- void BuildDefaultMatrix(void);
-
- Aligner& operator= (const Aligner&);
- Aligner (const Aligner&);
-}; // class Aligner
-
-
-// ================
-// inline functions
-// ================
-inline void Aligner::CleanReferenceSequence(void) {
- if (reference_length_ == 0) return;
-
- // delete the current buffer
- if (reference_length_ > 1) delete [] translated_reference_;
- else delete translated_reference_;
-
- reference_length_ = 0;
-}
-} // namespace StripedSmithWaterman
-
-#endif // COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libvcflib.git
More information about the debian-med-commit
mailing list