[med-svn] [last-align] 01/03: New upstream version 830
Andreas Tille
tille at debian.org
Mon Jan 23 07:59:03 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository last-align.
commit 857fbc19c03d6e07fd865dfbf4880f1ebeb5f66d
Author: Andreas Tille <tille at debian.org>
Date: Mon Jan 23 08:47:05 2017 +0100
New upstream version 830
---
ChangeLog.txt | 13 +++++++-
src/split/cbrc_split_aligner.cc | 74 +++++++++++++++++++++++++++++++++++++++--
src/split/cbrc_split_aligner.hh | 10 ++++++
src/split/last-split.cc | 51 +++++++++++++++++++---------
src/version.hh | 2 +-
5 files changed, 131 insertions(+), 19 deletions(-)
diff --git a/ChangeLog.txt b/ChangeLog.txt
index 157198b..4b51ca3 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,8 +1,19 @@
+2017-01-19 Martin C. Frith <Martin C. Frith>
+
+ * src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh,
+ src/split/last-split.cc:
+ Added don and acc to spliced alignment output.
+ [138b43db3812] [tip]
+
+ * src/split/cbrc_split_aligner.cc, src/split/last-split.cc:
+ Refactoring.
+ [39ef0c88bcaa]
+
2016-12-19 Martin C. Frith <Martin C. Frith>
* src/Centroid.cc:
Just re-ordered some probability calculations.
- [c13b3ad70ddb] [tip]
+ [c13b3ad70ddb]
* src/Centroid.cc:
Refactoring.
diff --git a/src/split/cbrc_split_aligner.cc b/src/split/cbrc_split_aligner.cc
index bea2d07..7cfe2c7 100644
--- a/src/split/cbrc_split_aligner.cc
+++ b/src/split/cbrc_split_aligner.cc
@@ -707,6 +707,14 @@ void SplitAligner::initSpliceCoords(unsigned i) {
assert(k == a.rend); // xxx
}
+static const uchar *seqBeg(const MultiSequence &m, size_t sequenceIndex) {
+ return m.seqReader() + m.seqBeg(sequenceIndex);
+}
+
+static const uchar *seqEnd(const MultiSequence &m, size_t sequenceIndex) {
+ return m.seqReader() + m.seqEnd(sequenceIndex);
+}
+
void SplitAligner::initSpliceSignals(unsigned i) {
const uchar *toUnmasked = alphabet.numbersToUppercase;
const UnsplitAlignment& a = alns[i];
@@ -716,8 +724,8 @@ void SplitAligner::initSpliceSignals(unsigned i) {
err("can't find " + std::string(a.rname) + " in the genome");
size_t v = f->second % maxGenomeVolumes();
size_t c = f->second / maxGenomeVolumes();
- const uchar *chromBeg = genome[v].seqReader() + genome[v].seqBeg(c);
- const uchar *chromEnd = genome[v].seqReader() + genome[v].seqEnd(c);
+ const uchar *chromBeg = seqBeg(genome[v], c);
+ const uchar *chromEnd = seqEnd(genome[v], c);
if (a.rend > chromEnd - chromBeg)
err("alignment beyond the end of " + std::string(a.rname));
@@ -741,6 +749,68 @@ void SplitAligner::initSpliceSignals(unsigned i) {
}
}
+const uchar sequenceEndSentinel = 4;
+
+static void getNextSignal(uchar *out, const uchar *seq) {
+ out[0] = seq[0];
+ out[1] = (seq[0] == sequenceEndSentinel) ? sequenceEndSentinel : seq[1];
+}
+
+static void getPrevSignal(uchar *out, const uchar *seq) {
+ out[1] = seq[-1];
+ out[0] = (seq[-1] == sequenceEndSentinel) ? sequenceEndSentinel : seq[-2];
+}
+
+static char decodeOneBase(const uchar *decode, uchar x) {
+ return (x == sequenceEndSentinel) ? 'N' : decode[x];
+}
+
+static void decodeSpliceSignal(char *out,
+ const uchar *signal,
+ const uchar *decode,
+ const uchar *complement,
+ bool isSameStrand) {
+ if (isSameStrand) {
+ out[0] = decodeOneBase(decode, signal[0]);
+ out[1] = decodeOneBase(decode, signal[1]);
+ } else {
+ out[0] = decodeOneBase(decode, complement[signal[1]]);
+ out[1] = decodeOneBase(decode, complement[signal[0]]);
+ }
+}
+
+void SplitAligner::spliceBegSignal(char *out,
+ unsigned alnNum, unsigned queryPos,
+ bool isSenseStrand) const {
+ const UnsplitAlignment& a = alns[alnNum];
+ bool isForwardStrand = (a.qstrand == '+');
+ StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
+ size_t v = f->second % maxGenomeVolumes();
+ size_t c = f->second / maxGenomeVolumes();
+ uchar signal[2];
+ unsigned coord = cell(spliceBegCoords, alnNum, queryPos);
+ if (isForwardStrand) getNextSignal(signal, seqBeg(genome[v], c) + coord);
+ else getPrevSignal(signal, seqEnd(genome[v], c) - coord);
+ decodeSpliceSignal(out, signal, alphabet.decode, alphabet.complement,
+ isSenseStrand == isForwardStrand);
+}
+
+void SplitAligner::spliceEndSignal(char *out,
+ unsigned alnNum, unsigned queryPos,
+ bool isSenseStrand) const {
+ const UnsplitAlignment& a = alns[alnNum];
+ bool isForwardStrand = (a.qstrand == '+');
+ StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
+ size_t v = f->second % maxGenomeVolumes();
+ size_t c = f->second / maxGenomeVolumes();
+ uchar signal[2];
+ unsigned coord = cell(spliceEndCoords, alnNum, queryPos);
+ if (isForwardStrand) getPrevSignal(signal, seqBeg(genome[v], c) + coord);
+ else getNextSignal(signal, seqEnd(genome[v], c) - coord);
+ decodeSpliceSignal(out, signal, alphabet.decode, alphabet.complement,
+ isSenseStrand == isForwardStrand);
+}
+
struct RnameAndStrandLess {
RnameAndStrandLess(const UnsplitAlignment *a) : alns(a) {}
diff --git a/src/split/cbrc_split_aligner.hh b/src/split/cbrc_split_aligner.hh
index a2ca7ac..6e6885b 100644
--- a/src/split/cbrc_split_aligner.hh
+++ b/src/split/cbrc_split_aligner.hh
@@ -107,6 +107,16 @@ public:
// by flipSpliceSignals()
double spliceSignalStrandLogOdds() const;
+ // Gets the 2 genome bases immediately downstream of queryPos in
+ // alnNum, and writes them into the buffer pointed to by "out"
+ void spliceBegSignal(char *out, unsigned alnNum, unsigned queryPos,
+ bool isSenseStrand) const;
+
+ // Gets the 2 genome bases immediately upstream of queryPos in
+ // alnNum, and writes them into the buffer pointed to by "out"
+ void spliceEndSignal(char *out, unsigned alnNum, unsigned queryPos,
+ bool isSenseStrand) const;
+
private:
static const int numQualCodes = 64;
static int score_mat[64][64][numQualCodes];
diff --git a/src/split/last-split.cc b/src/split/last-split.cc
index 04790bc..1dd714e 100644
--- a/src/split/last-split.cc
+++ b/src/split/last-split.cc
@@ -66,11 +66,21 @@ static bool less(const cbrc::UnsplitAlignment& a,
return a.linesBeg < b.linesBeg; // stabilizes the sort
}
+static void printSense(double senseStrandLogOdds) {
+ double b = senseStrandLogOdds / std::log(2.0);
+ if (b < 0.1 && b > -0.1) b = 0;
+ else if (b > 10) b = std::floor(b + 0.5);
+ else if (b < -10) b = std::ceil(b - 0.5);
+ int precision = (b < 10 && b > -10) ? 2 : 3;
+ std::cout << std::setprecision(precision) << " sense=" << b;
+}
+
static void doOneAlignmentPart(cbrc::SplitAligner& sa,
const cbrc::UnsplitAlignment& a,
+ unsigned numOfParts, unsigned partNum,
unsigned alnNum,
unsigned qSliceBeg, unsigned qSliceEnd,
- double senseStrandLogOdds,
+ bool isSenseStrand, double senseStrandLogOdds,
const LastSplitOptions& opts,
bool isAlreadySplit) {
unsigned alnBeg, alnEnd;
@@ -118,15 +128,20 @@ static void doOneAlignmentPart(cbrc::SplitAligner& sa,
std::cout << std::setprecision(mismapPrecision)
<< "a score=" << score << " mismap=" << mismap;
- if (opts.direction == 2) {
- double b = senseStrandLogOdds / std::log(2.0);
- if (b < 0.1 && b > -0.1) b = 0;
- else if (b > 10) b = std::floor(b + 0.5);
- else if (b < -10) b = std::ceil(b - 0.5);
- int sensePrecision = (b < 10 && b > -10) ? 2 : 3;
- std::cout << std::setprecision(sensePrecision) << " sense=" << b;
+ if (opts.direction == 2) printSense(senseStrandLogOdds);
+ if (!opts.genome.empty() && !opts.no_split) {
+ char signal[3] = {0};
+ if (partNum > 0) {
+ sa.spliceEndSignal(signal, alnNum, qSliceBeg, isSenseStrand);
+ std::cout << (isSenseStrand ? " acc=" : " don=") << signal;
+ }
+ if (partNum + 1 < numOfParts) {
+ sa.spliceBegSignal(signal, alnNum, qSliceEnd, isSenseStrand);
+ std::cout << (isSenseStrand ? " don=" : " acc=") << signal;
+ }
}
std::cout << "\n" << std::setprecision(6);
+
if (a.qstrand == '-') cbrc::flipMafStrands(s.begin(), s.end());
if (opts.no_split && a.linesEnd[-1][0] == 'c') s.push_back(a.linesEnd[-1]);
cbrc::printMaf(s);
@@ -164,9 +179,11 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
if (opts.no_split) {
if (opts.verbose) std::cerr << "\n";
- for (unsigned i = 0; i < end - beg; ++i) {
- doOneAlignmentPart(sa, beg[i], i, beg[i].qstart, beg[i].qend,
- senseStrandLogOdds, opts, isAlreadySplit);
+ unsigned numOfParts = end - beg;
+ for (unsigned i = 0; i < numOfParts; ++i) {
+ doOneAlignmentPart(sa, beg[i], numOfParts, i, i,
+ beg[i].qstart, beg[i].qend,
+ true, senseStrandLogOdds, opts, isAlreadySplit);
}
} else {
long viterbiScore = LONG_MIN;
@@ -181,10 +198,11 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
sa.flipSpliceSignals();
if (opts.verbose) std::cerr << "\t" << viterbiScoreRev;
}
+ bool isSenseStrand = (viterbiScore >= viterbiScoreRev);
std::vector<unsigned> alnNums;
std::vector<unsigned> queryBegs;
std::vector<unsigned> queryEnds;
- if (viterbiScore >= viterbiScoreRev) {
+ if (isSenseStrand) {
sa.traceBack(viterbiScore, alnNums, queryBegs, queryEnds);
} else {
sa.flipSpliceSignals();
@@ -196,10 +214,13 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
std::reverse(queryEnds.begin(), queryEnds.end());
if (opts.verbose) std::cerr << "\n";
- for (unsigned k = 0; k < alnNums.size(); ++k) {
+ unsigned numOfParts = alnNums.size();
+ for (unsigned k = 0; k < numOfParts; ++k) {
unsigned i = alnNums[k];
- doOneAlignmentPart(sa, beg[i], i, queryBegs[k], queryEnds[k],
- senseStrandLogOdds, opts, isAlreadySplit);
+ doOneAlignmentPart(sa, beg[i], numOfParts, k, i,
+ queryBegs[k], queryEnds[k],
+ isSenseStrand, senseStrandLogOdds,
+ opts, isAlreadySplit);
}
}
}
diff --git a/src/version.hh b/src/version.hh
index 25bdae7..d87fb52 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"828"
+"830"
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git
More information about the debian-med-commit
mailing list