[med-svn] [last-align] 01/03: New upstream version 830

Andreas Tille tille at debian.org
Mon Jan 23 07:59:03 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository last-align.

commit 857fbc19c03d6e07fd865dfbf4880f1ebeb5f66d
Author: Andreas Tille <tille at debian.org>
Date:   Mon Jan 23 08:47:05 2017 +0100

    New upstream version 830
---
 ChangeLog.txt                   | 13 +++++++-
 src/split/cbrc_split_aligner.cc | 74 +++++++++++++++++++++++++++++++++++++++--
 src/split/cbrc_split_aligner.hh | 10 ++++++
 src/split/last-split.cc         | 51 +++++++++++++++++++---------
 src/version.hh                  |  2 +-
 5 files changed, 131 insertions(+), 19 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index 157198b..4b51ca3 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,8 +1,19 @@
+2017-01-19  Martin C. Frith  <Martin C. Frith>
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh,
+	src/split/last-split.cc:
+	Added don and acc to spliced alignment output.
+	[138b43db3812] [tip]
+
+	* src/split/cbrc_split_aligner.cc, src/split/last-split.cc:
+	Refactoring.
+	[39ef0c88bcaa]
+
 2016-12-19  Martin C. Frith  <Martin C. Frith>
 
 	* src/Centroid.cc:
 	Just re-ordered some probability calculations.
-	[c13b3ad70ddb] [tip]
+	[c13b3ad70ddb]
 
 	* src/Centroid.cc:
 	Refactoring.
diff --git a/src/split/cbrc_split_aligner.cc b/src/split/cbrc_split_aligner.cc
index bea2d07..7cfe2c7 100644
--- a/src/split/cbrc_split_aligner.cc
+++ b/src/split/cbrc_split_aligner.cc
@@ -707,6 +707,14 @@ void SplitAligner::initSpliceCoords(unsigned i) {
   assert(k == a.rend);  // xxx
 }
 
+static const uchar *seqBeg(const MultiSequence &m, size_t sequenceIndex) {
+  return m.seqReader() + m.seqBeg(sequenceIndex);
+}
+
+static const uchar *seqEnd(const MultiSequence &m, size_t sequenceIndex) {
+  return m.seqReader() + m.seqEnd(sequenceIndex);
+}
+
 void SplitAligner::initSpliceSignals(unsigned i) {
   const uchar *toUnmasked = alphabet.numbersToUppercase;
   const UnsplitAlignment& a = alns[i];
@@ -716,8 +724,8 @@ void SplitAligner::initSpliceSignals(unsigned i) {
     err("can't find " + std::string(a.rname) + " in the genome");
   size_t v = f->second % maxGenomeVolumes();
   size_t c = f->second / maxGenomeVolumes();
-  const uchar *chromBeg = genome[v].seqReader() + genome[v].seqBeg(c);
-  const uchar *chromEnd = genome[v].seqReader() + genome[v].seqEnd(c);
+  const uchar *chromBeg = seqBeg(genome[v], c);
+  const uchar *chromEnd = seqEnd(genome[v], c);
   if (a.rend > chromEnd - chromBeg)
     err("alignment beyond the end of " + std::string(a.rname));
 
@@ -741,6 +749,68 @@ void SplitAligner::initSpliceSignals(unsigned i) {
   }
 }
 
+const uchar sequenceEndSentinel = 4;
+
+static void getNextSignal(uchar *out, const uchar *seq) {
+  out[0] = seq[0];
+  out[1] = (seq[0] == sequenceEndSentinel) ? sequenceEndSentinel : seq[1];
+}
+
+static void getPrevSignal(uchar *out, const uchar *seq) {
+  out[1] = seq[-1];
+  out[0] = (seq[-1] == sequenceEndSentinel) ? sequenceEndSentinel : seq[-2];
+}
+
+static char decodeOneBase(const uchar *decode, uchar x) {
+  return (x == sequenceEndSentinel) ? 'N' : decode[x];
+}
+
+static void decodeSpliceSignal(char *out,
+			       const uchar *signal,
+			       const uchar *decode,
+			       const uchar *complement,
+			       bool isSameStrand) {
+  if (isSameStrand) {
+    out[0] = decodeOneBase(decode, signal[0]);
+    out[1] = decodeOneBase(decode, signal[1]);
+  } else {
+    out[0] = decodeOneBase(decode, complement[signal[1]]);
+    out[1] = decodeOneBase(decode, complement[signal[0]]);
+  }
+}
+
+void SplitAligner::spliceBegSignal(char *out,
+				   unsigned alnNum, unsigned queryPos,
+				   bool isSenseStrand) const {
+  const UnsplitAlignment& a = alns[alnNum];
+  bool isForwardStrand = (a.qstrand == '+');
+  StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
+  size_t v = f->second % maxGenomeVolumes();
+  size_t c = f->second / maxGenomeVolumes();
+  uchar signal[2];
+  unsigned coord = cell(spliceBegCoords, alnNum, queryPos);
+  if (isForwardStrand) getNextSignal(signal, seqBeg(genome[v], c) + coord);
+  else                 getPrevSignal(signal, seqEnd(genome[v], c) - coord);
+  decodeSpliceSignal(out, signal, alphabet.decode, alphabet.complement,
+		     isSenseStrand == isForwardStrand);
+}
+
+void SplitAligner::spliceEndSignal(char *out,
+				   unsigned alnNum, unsigned queryPos,
+				   bool isSenseStrand) const {
+  const UnsplitAlignment& a = alns[alnNum];
+  bool isForwardStrand = (a.qstrand == '+');
+  StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
+  size_t v = f->second % maxGenomeVolumes();
+  size_t c = f->second / maxGenomeVolumes();
+  uchar signal[2];
+  unsigned coord = cell(spliceEndCoords, alnNum, queryPos);
+  if (isForwardStrand) getPrevSignal(signal, seqBeg(genome[v], c) + coord);
+  else                 getNextSignal(signal, seqEnd(genome[v], c) - coord);
+  decodeSpliceSignal(out, signal, alphabet.decode, alphabet.complement,
+		     isSenseStrand == isForwardStrand);
+}
+
 struct RnameAndStrandLess {
   RnameAndStrandLess(const UnsplitAlignment *a) : alns(a) {}
 
diff --git a/src/split/cbrc_split_aligner.hh b/src/split/cbrc_split_aligner.hh
index a2ca7ac..6e6885b 100644
--- a/src/split/cbrc_split_aligner.hh
+++ b/src/split/cbrc_split_aligner.hh
@@ -107,6 +107,16 @@ public:
     // by flipSpliceSignals()
     double spliceSignalStrandLogOdds() const;
 
+    // Gets the 2 genome bases immediately downstream of queryPos in
+    // alnNum, and writes them into the buffer pointed to by "out"
+    void spliceBegSignal(char *out, unsigned alnNum, unsigned queryPos,
+			 bool isSenseStrand) const;
+
+    // Gets the 2 genome bases immediately upstream of queryPos in
+    // alnNum, and writes them into the buffer pointed to by "out"
+    void spliceEndSignal(char *out, unsigned alnNum, unsigned queryPos,
+			 bool isSenseStrand) const;
+
 private:
     static const int numQualCodes = 64;
     static int score_mat[64][64][numQualCodes];
diff --git a/src/split/last-split.cc b/src/split/last-split.cc
index 04790bc..1dd714e 100644
--- a/src/split/last-split.cc
+++ b/src/split/last-split.cc
@@ -66,11 +66,21 @@ static bool less(const cbrc::UnsplitAlignment& a,
   return a.linesBeg < b.linesBeg;  // stabilizes the sort
 }
 
+static void printSense(double senseStrandLogOdds) {
+  double b = senseStrandLogOdds / std::log(2.0);
+  if (b < 0.1 && b > -0.1) b = 0;
+  else if (b > 10) b = std::floor(b + 0.5);
+  else if (b < -10) b = std::ceil(b - 0.5);
+  int precision = (b < 10 && b > -10) ? 2 : 3;
+  std::cout << std::setprecision(precision) << " sense=" << b;
+}
+
 static void doOneAlignmentPart(cbrc::SplitAligner& sa,
 			       const cbrc::UnsplitAlignment& a,
+			       unsigned numOfParts, unsigned partNum,
 			       unsigned alnNum,
 			       unsigned qSliceBeg, unsigned qSliceEnd,
-			       double senseStrandLogOdds,
+			       bool isSenseStrand, double senseStrandLogOdds,
 			       const LastSplitOptions& opts,
 			       bool isAlreadySplit) {
   unsigned alnBeg, alnEnd;
@@ -118,15 +128,20 @@ static void doOneAlignmentPart(cbrc::SplitAligner& sa,
 
   std::cout << std::setprecision(mismapPrecision)
 	    << "a score=" << score << " mismap=" << mismap;
-  if (opts.direction == 2) {
-    double b = senseStrandLogOdds / std::log(2.0);
-    if (b < 0.1 && b > -0.1) b = 0;
-    else if (b > 10) b = std::floor(b + 0.5);
-    else if (b < -10) b = std::ceil(b - 0.5);
-    int sensePrecision = (b < 10 && b > -10) ? 2 : 3;
-    std::cout << std::setprecision(sensePrecision) << " sense=" << b;
+  if (opts.direction == 2) printSense(senseStrandLogOdds);
+  if (!opts.genome.empty() && !opts.no_split) {
+    char signal[3] = {0};
+    if (partNum > 0) {
+      sa.spliceEndSignal(signal, alnNum, qSliceBeg, isSenseStrand);
+      std::cout << (isSenseStrand ? " acc=" : " don=") << signal;
+    }
+    if (partNum + 1 < numOfParts) {
+      sa.spliceBegSignal(signal, alnNum, qSliceEnd, isSenseStrand);
+      std::cout << (isSenseStrand ? " don=" : " acc=") << signal;
+    }
   }
   std::cout << "\n" << std::setprecision(6);
+
   if (a.qstrand == '-') cbrc::flipMafStrands(s.begin(), s.end());
   if (opts.no_split && a.linesEnd[-1][0] == 'c') s.push_back(a.linesEnd[-1]);
   cbrc::printMaf(s);
@@ -164,9 +179,11 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
 
   if (opts.no_split) {
     if (opts.verbose) std::cerr << "\n";
-    for (unsigned i = 0; i < end - beg; ++i) {
-      doOneAlignmentPart(sa, beg[i], i, beg[i].qstart, beg[i].qend,
-			 senseStrandLogOdds, opts, isAlreadySplit);
+    unsigned numOfParts = end - beg;
+    for (unsigned i = 0; i < numOfParts; ++i) {
+      doOneAlignmentPart(sa, beg[i], numOfParts, i, i,
+			 beg[i].qstart, beg[i].qend,
+			 true, senseStrandLogOdds, opts, isAlreadySplit);
     }
   } else {
     long viterbiScore = LONG_MIN;
@@ -181,10 +198,11 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
       sa.flipSpliceSignals();
       if (opts.verbose) std::cerr << "\t" << viterbiScoreRev;
     }
+    bool isSenseStrand = (viterbiScore >= viterbiScoreRev);
     std::vector<unsigned> alnNums;
     std::vector<unsigned> queryBegs;
     std::vector<unsigned> queryEnds;
-    if (viterbiScore >= viterbiScoreRev) {
+    if (isSenseStrand) {
       sa.traceBack(viterbiScore, alnNums, queryBegs, queryEnds);
     } else {
       sa.flipSpliceSignals();
@@ -196,10 +214,13 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
     std::reverse(queryEnds.begin(), queryEnds.end());
 
     if (opts.verbose) std::cerr << "\n";
-    for (unsigned k = 0; k < alnNums.size(); ++k) {
+    unsigned numOfParts = alnNums.size();
+    for (unsigned k = 0; k < numOfParts; ++k) {
       unsigned i = alnNums[k];
-      doOneAlignmentPart(sa, beg[i], i, queryBegs[k], queryEnds[k],
-			 senseStrandLogOdds, opts, isAlreadySplit);
+      doOneAlignmentPart(sa, beg[i], numOfParts, k, i,
+			 queryBegs[k], queryEnds[k],
+			 isSenseStrand, senseStrandLogOdds,
+			 opts, isAlreadySplit);
     }
   }
 }
diff --git a/src/version.hh b/src/version.hh
index 25bdae7..d87fb52 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"828"
+"830"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git



More information about the debian-med-commit mailing list