[med-svn] [Git][med-team/last-align][upstream] New upstream version 1454

Nilesh Patra (@nilesh) gitlab at salsa.debian.org
Wed May 24 11:00:28 BST 2023



Nilesh Patra pushed to branch upstream at Debian Med / last-align


Commits:
d6583835 by Nilesh Patra at 2023-05-24T15:06:21+05:30
New upstream version 1454
- - - - -


23 changed files:

- bin/maf-convert
- doc/last-split.rst
- examples/last-bisulfite-paired.sh
- examples/last-bisulfite.sh
- src/Alignment.cc
- src/Centroid.cc
- src/Centroid.hh
- src/GappedXdropAlignerDna.cc
- src/LastEvaluer.cc
- src/LastEvaluer.hh
- src/LastalArguments.cc
- src/LastalArguments.hh
- src/lastal.cc
- src/lastdb.cc
- src/makefile
- src/split/cbrc_split_aligner.cc
- src/split/cbrc_split_aligner.hh
- src/split/last-split.cc
- src/split/mcf_last_splitter.cc
- src/split/mcf_last_splitter.hh
- test/last-test.out
- test/maf-convert-test.out
- test/maf-convert-test.sh


Changes:

=====================================
bin/maf-convert
=====================================
@@ -175,6 +175,7 @@ def mafInput(opts, lines):
             sLines.append(fields)
         elif line[0] == "a":
             aLine = line
+            opts.headerMode &= 1
         elif line[0] == "q":
             qLines.append(line)
         elif line[0] == "p":


=====================================
doc/last-split.rst
=====================================
@@ -303,15 +303,6 @@ Details
   You can combine both sources of error (roughly) by taking the
   maximum of the two error probabilities for each base.
 
-The following points matter only if you are doing something unusual
-(e.g. bisulfite alignment):
-
-* If the header has more than one score matrix, last-split will use
-  the first one.
-
-* It assumes this score matrix applies to all alignments, when the
-  alignments are oriented to use the forward strand of the query.
-
 last-split5
 -----------
 


=====================================
examples/last-bisulfite-paired.sh
=====================================
@@ -30,13 +30,13 @@ trap 'rm -f $tmp.*' EXIT
 cat > $tmp.script << 'EOF'
 t=$1.$$
 
-lastal -pBISF -s1 -Q1 -D1000 -i1 "$2" "$4" > $t.t1f
-lastal -pBISR -s0 -Q1 -D1000 -i1 "$3" "$4" > $t.t1r
+lastal -pBISF -S1 -s1 -Q1 -D1000 -i1 "$2" "$4" > $t.t1f
+lastal -pBISF -S1 -s0 -Q1 -D1000 -i1 "$3" "$4" > $t.t1r
 last-merge-batches $t.t1f $t.t1r > $t.t1
 rm $t.t1f $t.t1r
 
-lastal -pBISF -s0 -Q1 -D1000 -i1 "$2" "$5" > $t.t2f
-lastal -pBISR -s1 -Q1 -D1000 -i1 "$3" "$5" > $t.t2r
+lastal -pBISR -S1 -s0 -Q1 -D1000 -i1 "$2" "$5" > $t.t2f
+lastal -pBISR -S1 -s1 -Q1 -D1000 -i1 "$3" "$5" > $t.t2r
 last-merge-batches $t.t2f $t.t2r > $t.t2
 rm $t.t2f $t.t2r
 


=====================================
examples/last-bisulfite.sh
=====================================
@@ -30,8 +30,8 @@ trap 'rm -f $tmp.*' EXIT
 # Convert C to t, and all other letters to uppercase:
 perl -pe 'y/Cca-z/ttA-Z/ if $. % 4 == 2' "$@" > "$tmp".q
 
-lastal -pBISF -s1 -Q1 -D1000 -i1 "$my_f" "$tmp".q > "$tmp".f
-lastal -pBISR -s0 -Q1 -D1000 -i1 "$my_r" "$tmp".q > "$tmp".r
+lastal -pBISF -S1 -s1 -Q1 -D1000 -i1 "$my_f" "$tmp".q > "$tmp".f
+lastal -pBISF -S1 -s0 -Q1 -D1000 -i1 "$my_r" "$tmp".q > "$tmp".r
 
 last-merge-batches "$tmp".f "$tmp".r | last-split -m0.1 -fMAF+ |
 perl -F'(\s+)' -ane '$F[12] =~ y/ta/CG/ if /^s/ and $s++ % 2; print @F'


=====================================
src/Alignment.cc
=====================================
@@ -420,7 +420,10 @@ void Alignment::extend( std::vector< SegmentPair >& chunks,
     centroid.backward(isForward, probMat, gap, globality);
     if (outputType > 4 && outputType < 7) {  // gamma-centroid / LAMA alignment
       centroid.dp(outputType, gamma);
-      centroid.traceback(chunks, outputType, gamma);
+      size_t beg1, beg2, length;
+      while (centroid.traceback(beg1, beg2, length, outputType, gamma)) {
+	chunks.push_back(SegmentPair(beg1, beg2, length));
+      }
     }
     getColumnCodes(centroid, columnCodes, chunks, isForward);
     if (outputType == 7) {


=====================================
src/Centroid.cc
=====================================
@@ -336,33 +336,34 @@ namespace cbrc{
     return bestScore;
   }
 
-  void Centroid::traceback_centroid( std::vector< SegmentPair >& chunks,
-				     double gamma ) const{
-    //std::cout << "[c] bestAntiDiagonal=" << bestAntiDiagonal << ": bestPos1=" << bestPos1 << std::endl;
-
-    size_t k = bestAntiDiagonal;
-    size_t i = bestPos1;
-    size_t oldPos1 = i;
-
-    while( k > 0 ){
-      const size_t h = xa.hori( k, i );
-      const size_t v = xa.vert( k, i );
-      const size_t d = xa.diag( k, i );
+  bool Centroid::traceback_centroid(size_t &beg1, size_t &beg2, size_t &length,
+				    double gamma) {
+    size_t oldPos1 = bestPos1;
+
+    while (bestAntiDiagonal > 0) {
+      const size_t h = xa.hori(bestAntiDiagonal, bestPos1);
+      const size_t v = xa.vert(bestAntiDiagonal, bestPos1);
+      const size_t d = xa.diag(bestAntiDiagonal, bestPos1);
       const double matchProb = fM[d] * bM[d];
       const int m = maxIndex( X[d] + (gamma + 1) * matchProb - 1, X[h], X[v] );
       if( m == 0 ){
-	k -= 2;
-	i -= 1;
+	bestAntiDiagonal -= 2;
+	bestPos1 -= 1;
       }
-      if( (m > 0 && oldPos1 != i) || k == 0 ){
-	chunks.push_back( SegmentPair( i, k - i, oldPos1 - i ) );
+      if ((m > 0 && oldPos1 != bestPos1) || bestAntiDiagonal == 0) {
+	beg1 = bestPos1;
+	beg2 = bestAntiDiagonal - bestPos1;
+	length = oldPos1 - bestPos1;
+	return true;
       }
       if( m > 0 ){
-	k -= 1;
-	i -= (m == 1);
-	oldPos1 = i;
+	bestAntiDiagonal -= 1;
+	bestPos1 -= (m == 1);
+	oldPos1 = bestPos1;
       }
     }
+
+    return false;
   }
 
   double Centroid::dp_ama( double gamma ){
@@ -420,41 +421,42 @@ namespace cbrc{
     return bestScore;
   }
 
-  void Centroid::traceback_ama( std::vector< SegmentPair >& chunks,
-			    double gamma ) const{
-    //std::cout << "[c] bestAntiDiagonal=" << bestAntiDiagonal << ": bestPos1=" << bestPos1 << std::endl;
-
-    size_t k = bestAntiDiagonal;
-    size_t i = bestPos1;
-    size_t oldPos1 = i;
+  bool Centroid::traceback_ama(size_t &beg1, size_t &beg2, size_t &length,
+			       double gamma) {
+    size_t oldPos1 = bestPos1;
 
-    while( k > 0 ){
-      const size_t j = k - i;
-      const size_t h = xa.hori( k, i );
-      const size_t v = xa.vert( k, i );
-      const size_t d = xa.diag( k, i );
+    while (bestAntiDiagonal > 0) {
+      const size_t bestPos2 = bestAntiDiagonal - bestPos1;
+      const size_t h = xa.hori(bestAntiDiagonal, bestPos1);
+      const size_t v = xa.vert(bestAntiDiagonal, bestPos1);
+      const size_t d = xa.diag(bestAntiDiagonal, bestPos1);
       const double matchProb = fM[d] * bM[d];
-      const double thisD = mD[i];
-      const double thisI = mI[j];
-      const double thisXD = mX1[i] - thisD;
-      const double thisXI = mX2[j] - thisI;
+      const double thisD = mD[bestPos1];
+      const double thisI = mI[bestPos2];
+      const double thisXD = mX1[bestPos1] - thisD;
+      const double thisXI = mX2[bestPos2] - thisI;
       const double s = 2 * gamma * matchProb - (thisXD + thisXI);
       const double t = gamma * thisI - thisXI;
       const double u = gamma * thisD - thisXD;
       const int m = maxIndex( X[d] + s, X[h] + u, X[v] + t );
       if( m == 0 ){
-	k -= 2;
-	i -= 1;
+	bestAntiDiagonal -= 2;
+	bestPos1 -= 1;
       }
-      if( (m > 0 && oldPos1 != i) || k == 0 ){
-	chunks.push_back( SegmentPair( i, k - i, oldPos1 - i ) );
+      if ((m > 0 && oldPos1 != bestPos1) || bestAntiDiagonal == 0) {
+	beg1 = bestPos1;
+	beg2 = bestAntiDiagonal - bestPos1;
+	length = oldPos1 - bestPos1;
+	return true;
       }
       if( m > 0 ){
-	k -= 1;
-	i -= (m == 1);
-	oldPos1 = i;
+	bestAntiDiagonal -= 1;
+	bestPos1 -= (m == 1);
+	oldPos1 = bestPos1;
       }
     }
+
+    return false;
   }
 
   void Centroid::getMatchAmbiguities(std::vector<char>& ambiguityCodes,


=====================================
src/Centroid.hh
=====================================
@@ -6,7 +6,6 @@
 
 #include "GappedXdropAligner.hh"
 #include "mcf_gap_costs.hh"
-#include "SegmentPair.hh"
 #include "OneQualityScoreMatrix.hh"
 
 #include <assert.h>
@@ -68,17 +67,21 @@ namespace cbrc{
       return 0;
     }
 
-    void traceback(std::vector<SegmentPair> &chunks,
-		   int outputType, double gamma) const {
-      if (outputType==5) traceback_centroid(chunks, gamma);
-      if (outputType==6) traceback_ama(chunks, gamma);
+    // Get the next gapless chunk of the alignment, in far-to-near
+    // order, or return false if there isn't one
+    bool traceback(size_t &beg1, size_t &beg2, size_t &length,
+		   int outputType, double gamma) {
+      return (outputType < 6) ? traceback_centroid(beg1, beg2, length, gamma)
+	:                       traceback_ama(beg1, beg2, length, gamma);
     }
 
     double dp_centroid( double gamma );
-    void traceback_centroid( std::vector< SegmentPair >& chunks, double gamma ) const;
+    bool traceback_centroid(size_t &beg1, size_t &beg2, size_t &length,
+			    double gamma);
 
     double dp_ama( double gamma );
-    void traceback_ama( std::vector< SegmentPair >& chunks, double gamma ) const;
+    bool traceback_ama(size_t &beg1, size_t &beg2, size_t &length,
+		       double gamma);
 
     void getMatchAmbiguities(std::vector<char>& ambiguityCodes,
 			     size_t seq1end, size_t seq2end,


=====================================
src/GappedXdropAlignerDna.cc
=====================================
@@ -243,16 +243,15 @@ bool GappedXdropAligner::getNextChunkDna(size_t &end1,
   size_t h, d;
   int x, y, z;
 
-  while (1) {
+  do {
+    bestAntidiagonal -= 2;
+    bestSeq1position -= 1;
     h = origins[bestAntidiagonal * 2 + 2] + bestSeq1position - 1;
     d = origins[bestAntidiagonal * 2] + bestSeq1position - 1;
     x = xTinyScores[d] + scoreRises[bestAntidiagonal];
     y = yTinyScores[h] + delGrowCost;
     z = zTinyScores[h + 1] + insGrowCost;
-    if (x > y || x > z || bestAntidiagonal == 0) break;
-    bestAntidiagonal -= 2;
-    bestSeq1position -= 1;
-  }
+  } while (x <= y && x <= z && bestAntidiagonal > 0);
 
   length = end1 - bestSeq1position;
   if (bestAntidiagonal == 0) return true;


=====================================
src/LastEvaluer.cc
=====================================
@@ -422,10 +422,9 @@ void LastEvaluer::initFullScores(const const_dbl_ptr *substitutionProbs,
 				 const double *letterFreqs1, int alphabetSize1,
 				 const double *letterFreqs2, int alphabetSize2,
 				 const GapCosts &gapCosts, double scale,
+				 int numOfAlignments, int seqLength,
 				 int verbosity, bool isFrameshift) {
-  int numOfAlignments = 50;  // suggested by Y-K Yu, R Bundschuh, T Hwa, 2002
-  int seqLength1 = 200;  // xxx long enough to avoid edge effects ???
-
+  int seqLength1 = seqLength;
   int seqLength2 = seqLength1;
   int seqLength3 = seqLength2;
   if (isFrameshift) {


=====================================
src/LastEvaluer.hh
=====================================
@@ -71,19 +71,10 @@ public:
   void initFullScores(const const_dbl_ptr *substitutionProbs,
 		      const double *letterFreqs1, int alphabetSize1,
 		      const double *letterFreqs2, int alphabetSize2,
-		      const GapCosts &gapCosts, double scale, int verbosity,
+		      const GapCosts &gapCosts, double scale,
+		      int numOfAlignments, int seqLength, int verbosity,
 		      bool isFrameshift = false);
 
-  void initFrameshift(const const_dbl_ptr *substitutionProbs,
-		      const double *proteinLetterFreqs, int numProteinLetters,
-		      const double *tranDnaLetterFreqs, int numTranDnaLetters,
-		      const GapCosts &gapCosts, double scale, int verbosity) {
-    initFullScores(substitutionProbs,
-		   proteinLetterFreqs, numProteinLetters,
-		   tranDnaLetterFreqs, numTranDnaLetters,
-		   gapCosts, scale, verbosity, true);
-  }
-
   void setSearchSpace(double databaseLength,  // number of database letters
 		      double databaseMaxSeqLength,  // length of longest seq
 		      double numOfStrands) {  // 1 or 2


=====================================
src/LastalArguments.cc
=====================================
@@ -20,6 +20,10 @@ static void badopt( char opt, const char* arg ){
   ERR( std::string("bad option value: -") + opt + ' ' + arg );
 }
 
+static void badopt(const char *opt, const char *arg) {
+  ERR(std::string("bad option value: --") + opt  + '=' + arg);
+}
+
 static void parseIntList(const char *in, std::vector<int> &out) {
   std::istringstream s(in);
   out.clear();
@@ -107,6 +111,8 @@ LastalArguments::LastalArguments() :
   gamma(1),
   geneticCodeFile("1"),
   verbosity(0),
+  gumbelSimSequenceLength(0),
+  gumbelSimAlignmentCount(50),  // from Y-K Yu, R Bundschuh, T Hwa, 2002
   isSplit(false){}
 
 void LastalArguments::fromArgs( int argc, char** argv, bool optionsOnly ){
@@ -211,6 +217,8 @@ Split options:\n\
   static struct option lOpts[] = {
     { "help",    no_argument,       0, 'h' },
     { "version", no_argument,       0, 'V' },
+    { "gumbel-len", required_argument, 0, 'L' - 'A' },
+    { "gumbel-num", required_argument, 0, 'N' - 'A' },
     { "split",   no_argument,       0, 128 + 0 },
     { "splice",  no_argument,       0, 128 + 1 },
     { "split-f", required_argument, 0, 128 + 'f' },
@@ -226,8 +234,8 @@ Split options:\n\
     { 0, 0, 0, 0}
   };
 
-  int c;
-  while ((c = getopt_long(argc, argv, sOpts, lOpts, &c)) != -1) {
+  int c, lOptsIndex;
+  while ((c = getopt_long(argc, argv, sOpts, lOpts, &lOptsIndex)) != -1) {
     switch(c){
     case 'h':
       std::cout << help;
@@ -398,6 +406,15 @@ Split options:\n\
       unstringify( inputFormat, optarg );
       break;
 
+    case 'L' - 'A':
+      unstringify(gumbelSimSequenceLength, optarg);
+      if (gumbelSimSequenceLength <= 0) badopt(lOpts[lOptsIndex].name, optarg);
+      break;
+    case 'N' - 'A':
+      unstringify(gumbelSimAlignmentCount, optarg);
+      if (gumbelSimAlignmentCount <= 0) badopt(lOpts[lOptsIndex].name, optarg);
+      break;
+
     case 128 + 1:
       splitOpts.isSplicedAlignment = true;
       break;
@@ -600,6 +617,11 @@ void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
   if (scoreType == 0 && frameshiftCosts.size() > 1)
     ERR("can't combine option -J0 with new-style frameshifts");
 
+  if (gumbelSimSequenceLength == 0) {
+    gumbelSimSequenceLength = isTranslated() ? 200 : 500;
+    // xxx long enough to avoid edge effects ???
+  }
+
   if (frameshiftCosts.size() == 1 && frameshiftCosts[0] > 0) {
     if (frameshiftCosts[0] < delGrowCosts[0])
       ERR("the frameshift cost must not be less than the gap extension cost");


=====================================
src/LastalArguments.hh
=====================================
@@ -121,6 +121,9 @@ struct LastalArguments{
   std::string geneticCodeFile;
   int verbosity;
 
+  int gumbelSimSequenceLength;
+  int gumbelSimAlignmentCount;
+
   bool isSplit;
   LastSplitOptions splitOpts;
 


=====================================
src/lastal.cc
=====================================
@@ -345,11 +345,15 @@ static void calculateScoreStatistics(const std::string& matrixName,
     gaplessEvaluer.init(0, 0, 0, alph.letters.c_str(), scoreMat, p1, p2, false,
 			0, 0, 0, 0, fsCost, geneticCode, 0, 0);
     if (args.scoreType != 0 && isGapped) {
-      if (args.temperature < 0) return;
+      if (args.temperature < 0 &&
+	  gapCosts.delProbPieces[0].openProb +
+	  gapCosts.insProbPieces[0].openProb > 0) return;
       unsigned alphSize2 = scoreMatrix.isCodonCols() ? 64 : alph.size;
       evaluer.initFullScores(fwdMatrices.ratios, p1, alph.size,
 			     stats.letterProbs2(), alphSize2,
-			     gapCosts, stats.lambda(), args.verbosity,
+			     gapCosts, stats.lambda(),
+			     args.gumbelSimAlignmentCount,
+			     args.gumbelSimSequenceLength, args.verbosity,
 			     gapCosts.isNewFrameshifts());
     } else {
       const mcf::GapCosts::Piece &del = gapCosts.delPieces[0];
@@ -1587,6 +1591,7 @@ void lastal( int argc, char** argv ){
     setLastSplitParams(splitParams, args.splitOpts, scoreMatrix.cells,
 		       scoreMatrix.rowSymbols.c_str(),
 		       scoreMatrix.colSymbols.c_str(),
+		       args.isQueryStrandMatrix,
 		       args.delOpenCosts[0], args.delGrowCosts[0],
 		       args.insOpenCosts[0], args.insGrowCosts[0],
 		       args.temperature, refLetters, args.inputFormat);


=====================================
src/lastdb.cc
=====================================
@@ -109,6 +109,7 @@ void writePrjFile( const std::string& fileName, const LastdbArguments& args,
 #include "version.hh"
     << '\n';
   f << "alphabet=" << alph << '\n';
+  if (args.strand != 1) f << "strand=" << args.strand << '\n';
   f << "numofsequences=" << sequenceCount << '\n';
   f << "numofletters=" << letterTotal << '\n';
   f << "maxsequenceletters=" << maxSeqLen << '\n';


=====================================
src/makefile
=====================================
@@ -26,19 +26,19 @@ mcf_substitution_matrix_stats.o tantan.o LastdbArguments.o
 indexObj4 = MultiSequence.o MultiSequenceQual.o SubsetSuffixArray.o	\
 SubsetSuffixArraySort.o lastdb.o
 
-alignObj0 = Alphabet.o CyclicSubsetSeed.o LambdaCalculator.o		\
-ScoreMatrix.o SubsetMinimizerFinder.o TantanMasker.o			\
-dna_words_finder.o fileMap.o tantan.o LastalArguments.o			\
-GappedXdropAligner.o GappedXdropAlignerDna.o GappedXdropAlignerPssm.o	\
-GappedXdropAligner2qual.o GappedXdropAligner3frame.o			\
-GappedXdropAlignerFrame.o mcf_aligment_path_adder.o			\
-mcf_frameshift_xdrop_aligner.o mcf_gap_costs.o GeneticCode.o		\
-GreedyXdropAligner.o LastEvaluer.o OneQualityScoreMatrix.o		\
-QualityPssmMaker.o TwoQualityScoreMatrix.o cbrc_linalg.o		\
-mcf_substitution_matrix_stats.o split/cbrc_unsplit_alignment.o		\
-split/last_split_options.o $(alpObj)
-
-alignObj4 = Alignment.o AlignmentPot.o AlignmentWrite.o Centroid.o	\
+alignObj0 = Alphabet.o Centroid.o CyclicSubsetSeed.o			\
+LambdaCalculator.o ScoreMatrix.o SubsetMinimizerFinder.o		\
+TantanMasker.o dna_words_finder.o fileMap.o tantan.o			\
+LastalArguments.o GappedXdropAligner.o GappedXdropAlignerDna.o		\
+GappedXdropAlignerPssm.o GappedXdropAligner2qual.o			\
+GappedXdropAligner3frame.o GappedXdropAlignerFrame.o			\
+mcf_aligment_path_adder.o mcf_frameshift_xdrop_aligner.o		\
+mcf_gap_costs.o GeneticCode.o GreedyXdropAligner.o LastEvaluer.o	\
+OneQualityScoreMatrix.o QualityPssmMaker.o TwoQualityScoreMatrix.o	\
+cbrc_linalg.o mcf_substitution_matrix_stats.o				\
+split/cbrc_unsplit_alignment.o split/last_split_options.o $(alpObj)
+
+alignObj4 = Alignment.o AlignmentPot.o AlignmentWrite.o			\
 DiagonalTable.o MultiSequence.o MultiSequenceQual.o SegmentPair.o	\
 SegmentPairPot.o SubsetSuffixArray.o SubsetSuffixArraySearch.o		\
 lastal.o split/cbrc_split_aligner.o split/mcf_last_splitter.o
@@ -143,7 +143,7 @@ ScoreMatrixData.hh: ../data/*.mat
 	../build/mat-inc.sh ../data/*.mat > $@
 
 VERSION1 = git describe --dirty
-VERSION2 = echo ' (HEAD -> main, tag: 1447) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
+VERSION2 = echo ' (HEAD -> main, tag: 1454) ' | sed -e 's/.*tag: *//' -e 's/[,) ].*//'
 
 VERSION = \"`test -e ../.git && $(VERSION1) || $(VERSION2)`\"
 
@@ -163,28 +163,28 @@ depend:
 	mv m makefile
 Alignment.o Alignment.o5 Alignment.o8: Alignment.cc Alignment.hh Centroid.hh GappedXdropAligner.hh \
  mcf_big_seq.hh mcf_contiguous_queue.hh mcf_reverse_queue.hh \
- mcf_gap_costs.hh mcf_simd.hh ScoreMatrixRow.hh SegmentPair.hh \
- OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh \
- GreedyXdropAligner.hh mcf_frameshift_xdrop_aligner.hh Alphabet.hh \
- GeneticCode.hh TwoQualityScoreMatrix.hh
+ mcf_gap_costs.hh mcf_simd.hh ScoreMatrixRow.hh OneQualityScoreMatrix.hh \
+ mcf_substitution_matrix_stats.hh GreedyXdropAligner.hh SegmentPair.hh \
+ mcf_frameshift_xdrop_aligner.hh Alphabet.hh GeneticCode.hh \
+ TwoQualityScoreMatrix.hh
 AlignmentPot.o AlignmentPot.o5 AlignmentPot.o8: AlignmentPot.cc AlignmentPot.hh Alignment.hh Centroid.hh \
  GappedXdropAligner.hh mcf_big_seq.hh mcf_contiguous_queue.hh \
  mcf_reverse_queue.hh mcf_gap_costs.hh mcf_simd.hh ScoreMatrixRow.hh \
- SegmentPair.hh OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh \
- GreedyXdropAligner.hh mcf_frameshift_xdrop_aligner.hh
+ OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh \
+ GreedyXdropAligner.hh SegmentPair.hh mcf_frameshift_xdrop_aligner.hh
 AlignmentWrite.o AlignmentWrite.o5 AlignmentWrite.o8: AlignmentWrite.cc Alignment.hh Centroid.hh \
  GappedXdropAligner.hh mcf_big_seq.hh mcf_contiguous_queue.hh \
  mcf_reverse_queue.hh mcf_gap_costs.hh mcf_simd.hh ScoreMatrixRow.hh \
- SegmentPair.hh OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh \
- GreedyXdropAligner.hh mcf_frameshift_xdrop_aligner.hh GeneticCode.hh \
- LastEvaluer.hh alp/sls_alignment_evaluer.hpp alp/sls_pvalues.hpp \
- alp/sls_basic.hpp MultiSequence.hh VectorOrMmap.hh Mmap.hh fileMap.hh \
- stringify.hh Alphabet.hh
+ OneQualityScoreMatrix.hh mcf_substitution_matrix_stats.hh \
+ GreedyXdropAligner.hh SegmentPair.hh mcf_frameshift_xdrop_aligner.hh \
+ GeneticCode.hh LastEvaluer.hh alp/sls_alignment_evaluer.hpp \
+ alp/sls_pvalues.hpp alp/sls_basic.hpp MultiSequence.hh VectorOrMmap.hh \
+ Mmap.hh fileMap.hh stringify.hh Alphabet.hh
 Alphabet.o Alphabet.o5 Alphabet.o8: Alphabet.cc Alphabet.hh mcf_big_seq.hh
 cbrc_linalg.o cbrc_linalg.o5 cbrc_linalg.o8: cbrc_linalg.cc cbrc_linalg.hh
 Centroid.o Centroid.o5 Centroid.o8: Centroid.cc Centroid.hh GappedXdropAligner.hh mcf_big_seq.hh \
  mcf_contiguous_queue.hh mcf_reverse_queue.hh mcf_gap_costs.hh \
- mcf_simd.hh ScoreMatrixRow.hh SegmentPair.hh OneQualityScoreMatrix.hh \
+ mcf_simd.hh ScoreMatrixRow.hh OneQualityScoreMatrix.hh \
  mcf_substitution_matrix_stats.hh GappedXdropAlignerInl.hh
 CyclicSubsetSeed.o CyclicSubsetSeed.o5 CyclicSubsetSeed.o8: CyclicSubsetSeed.cc CyclicSubsetSeed.hh \
  CyclicSubsetSeedData.hh zio.hh mcf_zstream.hh stringify.hh
@@ -236,7 +236,7 @@ lastal.o lastal.o5 lastal.o8: lastal.cc last.hh Alphabet.hh mcf_big_seq.hh \
  alp/sls_alignment_evaluer.hpp alp/sls_pvalues.hpp alp/sls_basic.hpp \
  GeneticCode.hh SubsetMinimizerFinder.hh AlignmentPot.hh Alignment.hh \
  Centroid.hh GappedXdropAligner.hh mcf_contiguous_queue.hh \
- mcf_reverse_queue.hh mcf_simd.hh SegmentPair.hh GreedyXdropAligner.hh \
+ mcf_reverse_queue.hh mcf_simd.hh GreedyXdropAligner.hh SegmentPair.hh \
  SegmentPairPot.hh ScoreMatrix.hh TantanMasker.hh tantan.hh \
  DiagonalTable.hh gaplessXdrop.hh gaplessPssmXdrop.hh \
  gaplessTwoQualityXdrop.hh zio.hh mcf_zstream.hh threadUtil.hh \


=====================================
src/split/cbrc_split_aligner.cc
=====================================
@@ -143,7 +143,7 @@ void mergeInto(unsigned* beg1,
   }
 }
 
-int SplitAlignerParams::score_mat[64][64][numQualCodes];
+int SplitAlignerParams::substitutionMatrix[2][64][64][numQualCodes];
 
 // The score for a cis-splice with the given distance (i.e. intron length)
 int SplitAlignerParams::calcSpliceScore(double dist) const {
@@ -778,6 +778,7 @@ void SplitAligner::calcBaseScores(const SplitAlignerParams &params,
 
   const UnsplitAlignment& a = alns[i];
   const size_t origin = matrixRowOrigins[i];
+  const bool isRev = !a.isForwardStrand();
 
   int *matBeg = &Smat[(origin + dpBeg(i)) * 2];
   int *alnBeg = &Smat[(origin + a.qstart) * 2];
@@ -815,7 +816,7 @@ void SplitAligner::calcBaseScores(const SplitAlignerParams &params,
       assert(q >= 0);
       if (q >= params.numQualCodes) q = params.numQualCodes - 1;
       *matBeg++ = delScore;
-      *matBeg++ = params.score_mat[x % 64][y % 64][q];
+      *matBeg++ = params.substitutionMatrix[isRev][x % 64][y % 64][q];
       delScore = 0;
       insCompensationScore = 0;
     }
@@ -1446,9 +1447,16 @@ static int matrixLookup(const std::vector< std::vector<int> >& matrix,
   return (r && c) ? matrix.at(r - rowNames).at(c - colNames) : min(matrix);
 }
 
+static int complementedMatrixIndex(int i) {
+  static const char fwd[] = "ACGTRYKMBDHVacgtrykmbdhv";
+  static const char rev[] = "TGCAYRMKVHDBtgcayrmkvhdb";
+  const char *p = strchr(fwd, i + 64);
+  return p ? rev[p - fwd] - 64 : i;
+}
+
 void SplitAlignerParams::setScoreMat(const std::vector< std::vector<int> > &sm,
 				     const char *rowNames,
-				     const char *colNames) {
+				     const char *colNames, bool isQrySeq) {
   const std::string bases = "ACGT";
 
   // Reverse-engineer the abundances of ACGT from the score matrix:
@@ -1462,6 +1470,7 @@ void SplitAlignerParams::setScoreMat(const std::vector< std::vector<int> > &sm,
 
   mcf::SubstitutionMatrixStats stats;
   stats.calcFromScale(&bmat[0], blen, scale);
+  const double *p2 = stats.letterProbs2();
 
   for (int i = 64; i < 128; ++i) {
     char x = std::toupper(i);
@@ -1471,16 +1480,22 @@ void SplitAlignerParams::setScoreMat(const std::vector< std::vector<int> > &sm,
       for (int q = 0; q < numQualCodes; ++q) {
 	std::string::size_type xc = bases.find(x);
 	std::string::size_type yc = bases.find(y);
-	if (xc == std::string::npos || yc == std::string::npos) {
-	  score_mat[i % 64][j % 64][q] = score;
-	} else {
-	  double p = stats.letterProbs2()[yc];
-	  score_mat[i % 64][j % 64][q] = generalizedScore(score, scale, q, p);
-	}
+	substitutionMatrix[0][i % 64][j % 64][q] =
+	  (xc == std::string::npos || yc == std::string::npos) ?
+	  score : generalizedScore(score, scale, q, p2[yc]);
       }
     }
   }
 
+  for (int i = 0; i < 64; ++i) {
+    for (int j = 0; j < 64; ++j) {
+      int x = isQrySeq ? i : complementedMatrixIndex(i);
+      int y = isQrySeq ? j : complementedMatrixIndex(j);
+      memcpy(substitutionMatrix[1][i][j], substitutionMatrix[0][x][y],
+	     sizeof substitutionMatrix[0][0][0]);
+    }
+  }
+
   maxMatchScore = max(sm);
 }
 


=====================================
src/split/cbrc_split_aligner.hh
=====================================
@@ -46,7 +46,7 @@ struct SplitAlignerParams {
 		       double meanLogDistIn, double sdevLogDistIn);
 
   void setScoreMat(const std::vector< std::vector<int> > &sm,
-		   const char *rowNames, const char *colNames);
+		   const char *rowNames, const char *colNames, bool isQrySeq);
 
   void readGenome(const std::string &baseName);
 
@@ -57,7 +57,7 @@ struct SplitAlignerParams {
   void print() const;
 
   static const int numQualCodes = 64;
-  static int score_mat[64][64][numQualCodes];
+  static int substitutionMatrix[2][64][64][numQualCodes];
   int maxMatchScore;
   int qualityOffset;
   int delOpenScore;


=====================================
src/split/last-split.cc
=====================================
@@ -177,6 +177,7 @@ void lastSplit(LastSplitOptions& opts) {
   std::string word, name, key;
   int state = 0;
   int sequenceFormat = 1;  // xxx ???
+  int substitutionMatrixStrand = 1;
   int gapExistenceCost = -1;
   int gapExtensionCost = -1;
   int insExistenceCost = -1;
@@ -231,6 +232,7 @@ void lastSplit(LastSplitOptions& opts) {
 	    if (key == "A") ws >> insExistenceCost;
 	    if (key == "B") ws >> insExtensionCost;
 	    if (key == "e") ws >> lastalScoreThreshold;
+	    if (key == "S") ws >> substitutionMatrixStrand;
 	    if (key == "t") ws >> scale;
 	    if (key == "Q") ws >> sequenceFormat;
 	    if (key == "letters") ws >> genomeSize;
@@ -253,6 +255,7 @@ void lastSplit(LastSplitOptions& opts) {
 	  }
 	  setLastSplitParams(params, opts,
 			     scoreMatrix, rowNames.c_str(), colNames.c_str(),
+			     substitutionMatrixStrand,
 			     gapExistenceCost, gapExtensionCost,
 			     insExistenceCost, insExtensionCost,
 			     scale, genomeSize, sequenceFormat);


=====================================
src/split/mcf_last_splitter.cc
=====================================
@@ -24,6 +24,8 @@ static bool less(const cbrc::UnsplitAlignment& a,
   if (qalignCmp != 0        ) return qalignCmp < 0;
   int ralignCmp = strcmp(a.ralign, b.ralign);
   if (ralignCmp != 0        ) return ralignCmp < 0;
+  int rnameCmp = strcmp(a.rname, b.rname);
+  if (rnameCmp  != 0        ) return rnameCmp  < 0;
   return a.linesBeg < b.linesBeg;  // stabilizes the sort
 }
 
@@ -272,6 +274,7 @@ void setLastSplitParams(cbrc::SplitAlignerParams &params,
 			const LastSplitOptions &opts,
 			const std::vector< std::vector<int> > &scoreMatrix,
 			const char *rowNames, const char *colNames,
+			int scoreMatrixStrand,
 			int delOpenCost, int delGrowCost,
 			int insOpenCost, int insGrowCost,
 			double scale, double genomeSize, int sequenceFormat) {
@@ -296,7 +299,7 @@ void setLastSplitParams(cbrc::SplitAlignerParams &params,
 		   -jumpCost, -restartCost, scale, qualityOffset);
   double splicePrior = opts.isSplicedAlignment ? opts.cis : 0.0;
   params.setSpliceParams(splicePrior, opts.mean, opts.sdev);
-  params.setScoreMat(scoreMatrix, rowNames, colNames);
+  params.setScoreMat(scoreMatrix, rowNames, colNames, scoreMatrixStrand);
   params.setSpliceSignals();
   if (!opts.genome.empty()) params.readGenome(opts.genome);
 }


=====================================
src/split/mcf_last_splitter.hh
=====================================
@@ -15,6 +15,7 @@ void setLastSplitParams(cbrc::SplitAlignerParams &params,
 			const LastSplitOptions &opts,
 			const std::vector< std::vector<int> > &scoreMatrix,
 			const char *rowNames, const char *colNames,
+			int scoreMatrixStrand,
 			int delOpenCost, int delGrowCost,
 			int insOpenCost, int insGrowCost,
 			double scale, double genomeSize, int sequenceFormat);


=====================================
test/last-test.out
=====================================
@@ -1576,11 +1576,11 @@ TEST lastal -e40 -u3 -fTAB /tmp/last-test hg19-M.fa
 # Query sequences=1 normal letters=16571
 
 #
-# a=29 b=2 A=28 B=1 e=119.332 d=63 x=119 y=47 z=119 D=1e+06 E=2.80852e+07
+# a=29 b=2 A=28 B=1 e=120.16 d=63 x=120 y=47 z=120 D=1e+06 E=2.80852e+07
 # R=01 u=2 s=2 S=1 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=4.72795 j=3 Q=0
 # /tmp/last-test
 # Reference sequences=2 normal letters=17803
-# lambda=0.211508 K=2.56984
+# lambda=0.211508 K=3.06201
 #
 #     A   C   G   T   M   S   K   W   R   Y   B   D   H   V
 # A   4  -3  -2  -5   2  -3  -4   1   2  -4  -3   1   0   1
@@ -1603,17 +1603,17 @@ TEST lastal -e40 -u3 -fTAB /tmp/last-test hg19-M.fa
 #
 # score	name1	start1	alnSize1	strand1	seqSize1	name2	start2	alnSize2	strand2	seqSize2	blocks
 9472.4	chrM	1234	13651	+	16775	chrM	585	13564	+	16571	9,0:1,56,0:1,82,3:0,63,1:0,19,1:0,55,0:1,58,1:0,98,5:0,14,3:0,114,2:0,95,4:0,30,1:0,34,1:0,38,0:1,33,0:1,60,6:0,57,5:0,34,0:7,60,0:1,9,2:0,33,16:0,21,0:9,25,0:2,36,14:0,21,1:0,16,4:0,26,4:0,11,2:0,56,0:1,43,1:0,24,3:0,93,1:0,41,0:4,11,16:0,38,0:1,40,3:0,30,1:0,16,0:8,38,0:1,24,0:2,37,1:0,11,0:9,23,0:4,25,3:0,13,3:0,41,0:2,37,1:0,33,2:0,37,3:0,212,2:0,8,4:0,70,12:0,16,0:1,15,0:4,21,1:0,148,1:0,53,0:1,67,6:0,19,3:0,21,5:0,15,1:0,44,0:1,29,7:0,20,21:0,930,0:2,29,2:0,20,0:1,11,10:0,70,0:3,63,1:0,673,0:1,26,1:0,263,0:5,29,2:0,79,5:0,32,3:0,21,0:1,68,2:0,80,0:30,81,2:0,35,3:0,17,0:8,450,0:3,1085,0:3,67,5:0,58,1:0,18,1:0,152,0:3,514,0:17,18,0:4,14,2:0,27,0:4,62,0:9,33,0:5,52,8:0,138,3:0,1406,1:0,21,3:0,340,3:0,48,3:0,458,0:6,12,6:0,1281,8:0,129,9:0,51,3:0,23,0:4,29,0:5,515,0:3,900,8:0,17,0:8,171,3:0,84	EG2=0	E=0
-2501.4	chrM	14904	1273	+	16775	chrM	14756	1268	+	16571	1127,2:0,22,0:1,24,4:0,94	EG2=4.4e-212	E=2.6e-221
-247.4	chrM	16213	562	+	16775	chrM	14155	588	+	16571	135,0:27,75,0:5,29,5:0,246,2:0,54,0:1,16	EG2=4.8e-05	E=2.9e-14
-127.3	chrM	14396	89	+	16775	chrM	8953	95	+	16571	35,0:9,13,3:0,38	EG2=5.3e+06	E=0.0031
-121.1	chrM	7217	72	+	16775	chrM	3316	72	+	16571	72	EG2=1.9e+07	E=0.011
+2501.4	chrM	14904	1273	+	16775	chrM	14756	1268	+	16571	1127,2:0,22,0:1,24,4:0,94	EG2=5.2e-212	E=3.1e-221
+247.4	chrM	16213	562	+	16775	chrM	14155	588	+	16571	135,0:27,75,0:5,29,5:0,246,2:0,54,0:1,16	EG2=5.8e-05	E=3.4e-14
+127.3	chrM	14396	89	+	16775	chrM	8953	95	+	16571	35,0:9,13,3:0,38	EG2=6.3e+06	E=0.0037
+121.1	chrM	7217	72	+	16775	chrM	3316	72	+	16571	72	EG2=2.3e+07	E=0.014
 # Query sequences=1 normal letters=16571
 #
-# a=29 b=2 A=28 B=1 e=86.6721 d=63 x=86 y=47 z=86 D=1000 E=2.80852e+10
+# a=29 b=2 A=28 B=1 e=87.5006 d=63 x=87 y=47 z=87 D=1000 E=2.80852e+10
 # R=01 u=2 s=2 S=1 M=0 T=0 m=10 l=1 n=10 k=1 w=1000 t=4.72795 j=3 Q=1
 # /tmp/last-test
 # Reference sequences=2 normal letters=17803
-# lambda=0.211508 K=2.56984
+# lambda=0.211508 K=3.06201
 #
 #     A   C   G   T   M   S   K   W   R   Y   B   D   H   V
 # A   4  -3  -2  -5   2  -3  -4   1   2  -4  -3   1   0   1
@@ -1636,32 +1636,32 @@ TEST lastal -e40 -u3 -fTAB /tmp/last-test hg19-M.fa
 #
 # name start alnSize strand seqSize alignment
 #
-a score=99.2 EG2=2e+09 E=0.0026
+a score=99.2 EG2=2.4e+09 E=0.0031
 s chrM         2330 24 + 16775 ACAAGGTCGCCTTGACTTGCCCCC
 s SRR001981.51    1 24 -    36 ACTATTTCGCCTCGACTTACCCCC
 q SRR001981.51                 IC0I2D6AIII3II;I at IIIIIII
 
-a score=92.1 EG2=9e+09 E=0.011
+a score=92.1 EG2=1.1e+10 E=0.014
 s chrM          6998 26 + 16775 GTAGAAGCTGGGGCCGGCACAGGATG
 s SRR001981.100    7 26 +    36 GAAGAAGCCGGGCCAGACACAGGGAG
 q SRR001981.100                 (IIIIIII3IIII&II%II*<I(&&&
 
-a score=97.4 EG2=2.9e+09 E=0.0037
+a score=97.4 EG2=3.4e+09 E=0.0044
 s chrM          14374 22 + 16775 ACGAACCCCCAGCAATCACCCC
 s SRR001981.479    14 22 -    36 ACAAACCCCTAGCGATCACCTC
 q SRR001981.479                  F84IIIIIIII&IIDI)IIIII
 
-a score=93.8 EG2=6.2e+09 E=0.0079
+a score=93.8 EG2=7.3e+09 E=0.0094
 s chrM          7971 17 + 16775 ACCCAGACGCCTACACA
 s SRR001981.641    8 17 -    36 ACCCAGACGCCCACACA
 q SRR001981.641                 IIIII:4IEIIIIII/I
 
-a score=89.4 EG2=1.6e+10 E=0.02
+a score=89.4 EG2=1.9e+10 E=0.024
 s chrM          14413 31 + 16775 CCCAGCCATTCTCCCAATCATACGACTAGCC
 s SRR001981.641     5 31 -    36 CCGACCCAGACGCCCACACAGACAACTAGCC
 q SRR001981.641                  I8?IIIII:4IEIIIIII/IAIIIIIIIIII
 
-a score=88.4 EG2=2e+10 E=0.025
+a score=88.4 EG2=2.3e+10 E=0.03
 s chrM          2708 21 + 16775 AGCTGGGTGATAGCTGGTTAC
 s SRR001981.832   14 21 +    36 AGGTGAGTGATAGTTGGTTAC
 q SRR001981.832                 =IIII at IIIII2III,?IIH8


=====================================
test/maf-convert-test.out
=====================================
@@ -23343,6 +23343,80 @@ SRR359290.10000	16	chr12	110785492	255	27=1X47=	*	0	0	AATGTTTGCGCATGTTCGAGATGAGT
 @HD	VN:1.3	SO:unsorted
 90089	0	chrX	23075590	100	48H5=1X3=1I27=1X3=1D1=1X24=7D2=5I2=1X3=1X10=1X17=1I9=1D4=2D2=1X40=1D7=3D56=1D10=1D9=1D13=3D90=2D10=1I40=1D50=1D5=1D68=3D65=3D1X1=2D30=1808H	*	0	0	GCCGGAGGAAGTCCGAGAGGAAGCGGAGGCGCGAGCTAGAGCAGCGGCTCCCGTCGGCCTCCGGCAGCGTTAAACTAGGAGGCCGGAAGGCAGGCGCGCACGGCGGAGAAGGCGGGCGGAGGCGAACATATTAATGAAAAGTGCCATAAACTGAAAAACCAAACATAGGGTAGGTGCTGCAAAGTTGGTGGTAGCTGTGGCAGTGTTTTTACTGACATTTTATGTTATTCTCAAGTATTGAAATAAAATGGATGCAAGTTTAAATCTATTTGCAAGATCAGCATTGGACACAGCTGCACGTTCTACAAAGCCTCCCAGATATAAGTGTGGGATCTCAAAAGCTTGCCCTGAAGCATTTTGCTTTTTAAAATGGCAAGTGGAGCAGCCAACGTGGTGGGACCCAAATCTGCCTGGAAGATAATGTTTTAATGAGTGGTGTTAAGAATAATGTTGAAGAGGATCAATGTTGCCTTGGCAAATGGAAAAACAGGAGAAGTATTAGACACTAAATATTTTGACATGTGGGGAGATGTGGCACCATTTATTGAGTTTCTGAAGGCCATACAAGATGGAACAATAGTTTTAATGGGGTGATGATGGAGCAACCAAACTCAATGATGAG	*	NM:i:50	AS:i:3061
 90089	16	chr7	121348854	90	47H42=1X1=1X1D14=2D4=1D43=1D11=1D65=1I37=1D114=2D184=2D15=1D47=1D75=3D120=3D53=2D3=1D25=2D48=1X12=1X55=1D94=1D52=2D69=1D19=1D94=1D33=2X16=1D8=2D84=1D40=1D1=1X2=1D46=1D1=1D8=1D13=1I26=1D22=3D1X49=592N9=1D9=1X2D41=1D2=1D20=1D19=7D675H	*	0	0	ACAAAGCAAAACCAAGAGTATTTATTTTCATTATTTTAAAAAGCCCAACATTTATTTCAATCCTGGAACTGTAAAATAACTATTCTTTTAATTTACAGAATTGAGGAAGTATTTTAATGTCAAAATACACATGCTGAATGTGATTTAGCATATAAGCACCAAAAATCCCAAAAGTATACAAAAGATTGTTTAAACATATAAGGTGCATTTATTCATGAATAATTACATTCAGTTTGAAGGAAGAAACAGTACTCCAAAGTTCATCAATATTTAAGAAATTATCCGAATAGCTTCCAACTTGGTTTACAGCTAAATAACAGATACTGCTTTTAGGTTTTTATACAATGCATCTAAACTTTAGGTTCGAAATTTAAGTGCTGCCTTACACTGTGAATGGCAGTTTGATCACTAAATTTATGTTCCAGTTTGAGATACTTCATACACAACATATATTTAATGCCCTGAAAACTTCTGCTTAAAAACTGAGTTATTTAGTGAACATCTGAAGGTAACAAAAAGATAGATCTTCTATATTTTATGTAGAATTGGGTATAAAGTCATTTGGGGTCAAACACAATTATTTTTAAATGTTATCAGTCCGCCCAGTAATAAGAACTTTTGTAAATATGCCACTATAGCTTCGGGCAATAATTGCTATGAAGTTAGATAAACACAGGATTAATTTCTTGGTTTACAAATCAAAATTATTGTGATATTAAAGCATGCCTCCTTCCAAACCACTTGGTCAATGCTGATAACAATTAAGAAAAACGTATAGTATTGCTCTAAATATACTGACTTAGCTGTCCTGAATGACCTAATTAGGTTGGAGGGACTTGGCCTGGTTCTCCAGTTCTGAAGCTGCCACCTCCAGCCTCACTCTAAATAGCTTATACAAAATCAAATTATTCCCATAGTTAACACAAATCCAGCAAGCTGTGGTCCTAAATGCCATCTACATTTATATATTTACTTTTTACTGTGTACATTAAGAATTTTGTTGTGCTTTTCACATATCCGATGTATGTTTTCATATTAAACATTCCTCAGTTTCTTGGGAAGGAAAGAAAAGATCATGAAAAGAGGTACATCAGGAGAGTCGGACTGTAAAGTAACATTACACACAACTAGCCAAATCTCTTTTTAATTTAAAAGATAATGTCATCACAACGCAACATatagaaacataaagaaaATAAAGTATCCACCTAAAATCCCTATTATTCCATGATATTTTCATAGCAACTAGTATatatatcaatatatttttCACAAACCATTCAGTTACACATTGTGTATGCTTTAGAATTTAAAATATGTATTACCATAGCAGTTCAAACATTGTACTTTTAGGAAATGTGGAGACATGGTTCAGTGATTTGATCATTGTGATGATTATTTACAATGCTATCTATTTACGTTAGCAATAAATAATCCTGATATCAAAGAGACAATACTCATGCACACACCAAGATGGCTGCATGTTTTAAATATTTACACATAGCTCTGCCATTTatagctctcccattaagaGGAAGTGCGCTTCTTCAATTCTCTTCCACATTTCCATTAGTCTTGCTTCTGGGGATGCATCCTTCCATTTCTGCTTCAGGCCATCCTTCATATTTGTTTGTATCCTTATTGTTCTTTATGTGCTGTTCAAAGGGCTTTTTATTAATGCCCTTCCCACCACAGAAGACCCAGTTGTCTCTAAACCAGATTAGTAATAGATGTGCTCCCAAATCAGCAATGAGCC	*	NM:i:68
+102	0	chr3	23607557	255	673H23=3H	*	0	0	ACGTTCTGGTTTATGTTTCCTTG	*	NM:i:0	AS:i:138	EV:Z:0.13
+102	0	chr14	104715673	255	479H15=1X7=1D2=1D4=191H	*	0	0	ctcctgGCTGGCAGGGCAGGCTGGGTGCT	*	NM:i:3	AS:i:126	EV:Z:1.8
+102	0	chr2	240456632	255	91H19=2I6=581H	*	0	0	TGGGCAGCCTGGATGGGTGTCCACAGG	*	NM:i:2	AS:i:125	EV:Z:2.2
+102	0	chr11	67464147	255	476H16=1X6=200H	*	0	0	cggctcctgGCTGGCAGGGCAGG	*	NM:i:1	AS:i:124	EV:Z:2.7
+102	0	chr1	21590631	255	467H10=1X2=2D6=6D1=7D17=195H	*	0	0	TGGGAgccacggctcctgGCTGGCAGGGCAGGCTGGG	*	NM:i:16	AS:i:121	EV:Z:5.3
+102	0	chr10	122813035	255	472H17=1I3=1X5=200H	*	0	0	gccacggctcctgGCTGGCAGGGCAGG	*	NM:i:2	AS:i:120	EV:Z:6.5
+102	0	chr10	124281395	255	618H20=61H	*	0	0	GGCACCGCCATCTTGGCCTC	*	NM:i:0	AS:i:120	EV:Z:6.5
+102	0	chr1	161050906	255	458H10=3D2=3D10=1X9=209H	*	0	0	TGCTGGGTTTGGGAgccacggctcctgGCTGG	*	NM:i:7	AS:i:119	EV:Z:8.1
+102	0	chr5	93489543	255	6H8=1X13=671H	*	0	0	CGGTACCAGATCTGCAGTATTT	*	NM:i:1	AS:i:118	EV:Z:10
+102	0	chr14	80332130	255	640H9=1X7=1X9=32H	*	0	0	CCTTCCCCATGTATCTGCGTTGTTCAC	*	NM:i:2	AS:i:118	EV:Z:10
+102	0	chr15	45614750	255	467H10=1X13=208H	*	0	0	TGGGAgccacggctcctgGCTGGC	*	NM:i:1	AS:i:118	EV:Z:10
+102	0	chr8	76840859	255	425H16=1D5=7D6=4D2=1X2=1I7=234H	*	0	0	ATCCAGCTTGGCCACCGCCACACCAGCCCCGGCTGCTGGG	*	NM:i:14	AS:i:117	EV:Z:13
+102	0	chr3	152862894	255	87H16=1D3=1D6=587H	*	0	0	TAGGTGGGCAGCCTGGATGGGTGTC	*	NM:i:2	AS:i:116	EV:Z:16
+102	0	chr1	145870018	255	140H19=540H	*	0	0	ATAGGAATAGTGGTAGTAG	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr5	4867426	255	472H19=208H	*	0	0	gccacggctcctgGCTGGC	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr5	74376578	255	10H19=670H	*	0	0	ACCAGATCTGCAGTATTTT	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr7	157969524	255	493H17=1X4=184H	*	0	0	GGCAGGCTGGGTGCTGCTTGGG	*	NM:i:1	AS:i:114	EV:Z:24
+102	0	chr13	113342216	255	470H19=210H	*	0	0	GAgccacggctcctgGCTG	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr17	19079610	255	470H19=210H	*	0	0	GAgccacggctcctgGCTG	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr17	19205792	255	470H19=210H	*	0	0	GAgccacggctcctgGCTG	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr17	27793640	255	546H19=134H	*	0	0	GCCTGCACAGCCGGGGCCC	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr18	8561209	255	459H6=1D2=5D10=1X12=209H	*	0	0	GCTGGGTTTGGGAgccacggctcctgGCTGG	*	NM:i:7	AS:i:114	EV:Z:24
+102	0	chr21	14291181	255	666H3=1X14=1D1=1D8=6H	*	0	0	CCTGCTTACGTTCTGGTTTATGTTTCC	*	NM:i:3	AS:i:114	EV:Z:24
+102	0	chr22	23657491	255	470H19=210H	*	0	0	GAgccacggctcctgGCTG	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chrX	83302602	255	680H19=	*	0	0	GGTTTATGTTTCCTTGAGT	*	NM:i:0	AS:i:114	EV:Z:24
+102	0	chr5	173701200	255	508H16=2I7=166H	*	0	0	GCTTGGGCCATGGTGGCCGCTCCTG	*	NM:i:2	AS:i:113	EV:Z:30
+102	0	chr3	127741376	255	485H5=4D18=191H	*	0	0	GCTGGCAGGGCAGGCTGGGTGCT	*	NM:i:4	AS:i:112	EV:Z:37
+102	0	chr4	3839063	255	433H8=1X12=245H	*	0	0	TGGCCACCGCCACACCAGCCC	*	NM:i:1	AS:i:112	EV:Z:37
+102	0	chr18	55335093	255	256H14=1X6=422H	*	0	0	AGTGTGCCGGCCACGATCATG	*	NM:i:1	AS:i:112	EV:Z:37
+102	0	chr17	28925930	255	464H5=1X3=4I17=205H	*	0	0	GTTTGGGAgccacggctcctgGCTGGCAGG	*	NM:i:5	AS:i:111	EV:Z:46
+102	0	chr3	186865652	255	442H4=1X16=236H	*	0	0	CCACACCAGCCCCGGCTGCTG	*	NM:i:1	AS:i:110	EV:Z:58
+102	0	chr5	169763616	255	487H16=3D1=1D8=187H	*	0	0	TGGCAGGGCAGGCTGGGTGCTGCTT	*	NM:i:4	AS:i:110	EV:Z:58
+102	0	chr7	65260307	255	442H4=1X16=236H	*	0	0	CCACACCAGCCCCGGCTGCTG	*	NM:i:1	AS:i:110	EV:Z:58
+102	0	chr19	47072202	255	371H3=1D17=1X3=3D2=1X5=296H	*	0	0	CAGGCCGAAGGTCATGGGCCACAGAGAACTCC	*	NM:i:6	AS:i:110	EV:Z:58
+102	0	chr20	4034148	255	442H4=1X16=236H	*	0	0	CCACACCAGCCCCGGCTGCTG	*	NM:i:1	AS:i:110	EV:Z:58
+102	16	chr19	1390856	255	289H5=4D3=2D56=1X31=2D1X3=2D56=3I21=1D9=221H	*	0	0	GCCCGCCGGAGTTCTCTGTGGCCCATGACCTTCGGCCTGGCCTGCTGCGCCGTGGAGATGATGCGCATGGCAGCACCCCGCTACGACATGGACCGCCGGCGGTCTTCCGCGCCAGCccgcgccagtccgACGTCATGATCGTGGCCGGCACACTCACCGCCAACAAGATGGCCCCAGCGCTCGCAAGGT	*	NM:i:16	AS:i:954	EV:Z:1e-78
+102	16	chr19	1388830	255	190H22=3D5=1I31=2X2=1X2=1I1X40=401H	*	0	0	CAGCACCCAGCCTGCCCTGCCAGCCAGGAGCCGTGGCTCCCAAACccagcagccggggcTGGTGTGGCGGTGGCCAAGCTGGATGACCTCGTCAACTGGGCCCGCCGG	*	NM:i:9	AS:i:495	EV:Z:2.5e-35
+102	16	chr19	1388524	255	124H13=1I31=1I3=2D7=1D13=506H	*	0	0	GCTCCAGCGTGGGCCCCGGCTGTGCAGGCACGAGGTGTCCATCAGGAGCGGCCACCATGGCCCAAGCAG	*	NM:i:5	AS:i:321	EV:Z:6.8e-19
+102	16	chr19	1393241	255	521H37=1D2=3D7=1X1=4D14=1X7=1X1=2D3=1D5=98H	*	0	0	GCTGCGCCAACGGAGGAGGCTACTACCACTATTCCTATCGGTGAGGCGGACCGCATCGTGCCTGTGGACACCCATCCAGG	*	NM:i:14	AS:i:320	EV:Z:8.4e-19
+102	16	chr19	1395388	255	597H30=1I7=2D3=1X5=1X1=1X2=1D1X7=2D1=1I8=1X7I1=2D20=3H	*	0	0	CAGGCTGCCCACCTACGGCCGAGGCCCTGCCTCTACGGTCCCGCAGCCGTAGTGAAGATCGCCGGGAGCGACAAAAATACTGCAGATCTGGTACCGCAG	*	NM:i:21	AS:i:291	EV:Z:4.6e-16
+102	16	chr19	1391117	255	474H7=1I30=1D9=178H	*	0	0	AGGTCTAGCGACCAGATGCCGGAGCCGCGCTACGTGGTTCCATGGGG	*	NM:i:2	AS:i:237	EV:Z:5.8e-11
+102	16	chr19	1387808	255	83H13=2I27=574H	*	0	0	CAGCTCCTGGCCTGCGCGCGGCTTCCGGATCCTTGGTCTGCG	*	NM:i:2	AS:i:215	EV:Z:7e-09
+102	16	chr19	1383911	255	54H26=1X5=613H	*	0	0	GAAGGCCGAGGCCAAGATGGCGGTGCCGTCAG	*	NM:i:1	AS:i:178	EV:Z:2.2e-05
+102	16	chr7	101590380	255	175H6=1X2=1D1X3=1D18=2I8=483H	*	0	0	CCACCATGGCCCAAGCAGCACCCAGCCTGCCCTGCCAGCCA	*	NM:i:6	AS:i:133	EV:Z:0.39
+102	16	chr12	71544987	255	209H3=1D17=3D2=3D10=458H	*	0	0	CCAGCCAGGAGCCGTGGCTCCCAAACccagca	*	NM:i:7	AS:i:129	EV:Z:0.92
+102	16	chr19	17615431	255	53H21=625H	*	0	0	GGAAGGCCGAGGCCAAGATGG	*	NM:i:0	AS:i:126	EV:Z:1.8
+102	16	chr19	18430560	255	194H7=4D18=480H	*	0	0	ACCCAGCCTGCCCTGCCAGCCAGGA	*	NM:i:4	AS:i:124	EV:Z:2.7
+102	16	chr8	98433262	255	190H7=5D18=484H	*	0	0	CAGCACCCAGCCTGCCCTGCCAGCC	*	NM:i:5	AS:i:121	EV:Z:5.3
+102	16	chr4	54783705	255	211H10=1X11=5D1=1D7=458H	*	0	0	AGCCAGGAGCCGTGGCTCCCAAACccagca	*	NM:i:7	AS:i:120	EV:Z:6.5
+102	16	chr3	130609409	255	189H7=1X11=1D6=485H	*	0	0	GCAGCACCCAGCCTGCCCTGCCAGC	*	NM:i:2	AS:i:119	EV:Z:8.1
+102	16	chr3	37588377	255	573H4=1D4=2I16=1X3=2D1=1X5=89H	*	0	0	GCATCGTGCCTGTGGACACCCATCCAGGCTGCCCACC	*	NM:i:7	AS:i:117	EV:Z:13
+102	16	chr16	722484	255	586H3=1X17=4D5=87H	*	0	0	GGACACCCATCCAGGCTGCCCACCTA	*	NM:i:5	AS:i:116	EV:Z:16
+102	16	chr2	192414463	255	173H5=1D11=1X9=500H	*	0	0	GGCCACCATGGCCCAAGCAGCACCCA	*	NM:i:2	AS:i:115	EV:Z:19
+102	16	chr7	101723246	255	208H4=1D17=3D2=3D2=1X8=457H	*	0	0	GCCAGCCAGGAGCCGTGGCTCCCAAACccagcag	*	NM:i:8	AS:i:115	EV:Z:19
+102	16	chr10	126932870	255	62H9=2D3=3D1=1X3=3D16=604H	*	0	0	AGGCCAAGATGGCGGTGCCGTCAGCTCCTGGCC	*	NM:i:9	AS:i:115	EV:Z:19
+102	16	chr22	20726589	255	604H4=2D9=1X13=68H	*	0	0	CCCACCTACGGCCGAGGCCCTGCCTCT	*	NM:i:3	AS:i:115	EV:Z:19
+102	16	chr22	46458801	255	42H12=1X9=635H	*	0	0	CAGATACATGGGGAAGGCCGAG	*	NM:i:1	AS:i:115	EV:Z:19
+102	16	chr3	89815480	255	178H19=502H	*	0	0	CCATGGCCCAAGCAGCACC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr5	171574455	255	224H19=456H	*	0	0	GGCTCCCAAACccagcagc	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr6	1842832	255	210H19=470H	*	0	0	CAGCCAGGAGCCGTGGCTC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr7	100370597	255	210H19=470H	*	0	0	CAGCCAGGAGCCGTGGCTC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr12	19357536	255	192H19=488H	*	0	0	GCACCCAGCCTGCCCTGCC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr17	19172451	255	210H19=470H	*	0	0	CAGCCAGGAGCCGTGGCTC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr17	29190070	255	203H19=477H	*	0	0	GCCCTGCCAGCCAGGAGCC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr17	58670491	255	210H19=470H	*	0	0	CAGCCAGGAGCCGTGGCTC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr20	17684745	255	578H19=102H	*	0	0	GTGCCTGTGGACACCCATC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr21	43027935	255	196H19=484H	*	0	0	CCAGCCTGCCCTGCCAGCC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chrX	39928417	255	210H19=470H	*	0	0	CAGCCAGGAGCCGTGGCTC	*	NM:i:0	AS:i:114	EV:Z:24
+102	16	chr3	33026625	255	173H4=1X6=1X12=502H	*	0	0	GGCCACCATGGCCCAAGCAGCACC	*	NM:i:2	AS:i:112	EV:Z:37
+102	16	chr5	177958542	255	136H12=1X10=540H	*	0	0	GCCCCGGCTGTGCAGGCACGAGG	*	NM:i:1	AS:i:111	EV:Z:46
+102	16	chr1	155997874	255	175H8=1I4=1I2=3D1X15=1X6=485H	*	0	0	CCACCATGGCCCAAGCAGCACCCAGCCTGCCCTGCCAGC	*	NM:i:7	AS:i:110	EV:Z:58
+102	16	chr6	65786102	255	453H13=1X5=1X3=223H	*	0	0	AAGATGGCCCCAGCGCTCGCAAG	*	NM:i:2	AS:i:110	EV:Z:58
+102	16	chr6	170257702	255	191H17=1D1=1D6=484H	*	0	0	AGCACCCAGCCTGCCCTGCCAGCC	*	NM:i:2	AS:i:110	EV:Z:58
+102	16	chr15	78230486	255	166H7=1X12=1D4=509H	*	0	0	CAGGAGCGGCCACCATGGCCCAAG	*	NM:i:2	AS:i:110	EV:Z:58
 221	chr1	160106735	75	+	249250621	SRR359290.9001	0	75	-	75	75
 450	chr1	231468663	75	+	249250621	SRR359290.9002	0	75	+	75	75
 120	chr15	50775672	24	+	102531392	SRR359290.9002	22	24	+	75	24


=====================================
test/maf-convert-test.sh
=====================================
@@ -45,7 +45,7 @@ maf2=bs100.maf
     $r -n sam $maf2
     head -n999 $maf1 | $r -r 'ID:1 PL:ILLUMINA SM:x' sam
     $r -d sam $maf1
-    $r -j1e9 sam 90089.maf
+    $r -j1e9 sam 90089.maf 102.maf
     head -n999 $maf1 | $r -n tab
     head -n999 $maf1 | $r tab
     $r -n tab frameshift-new.maf



View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/d65838350533e510953bb0de20d993bb80acd1af

-- 
View it on GitLab: https://salsa.debian.org/med-team/last-align/-/commit/d65838350533e510953bb0de20d993bb80acd1af
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230524/cb851c81/attachment-0001.htm>


More information about the debian-med-commit mailing list