[med-svn] [Git][med-team/lambda-align2][upstream] New upstream version 2.0.1

Fri Jul 21 11:31:32 BST 2023


Sascha Steinbiss pushed to branch upstream at Debian Med / lambda-align2


Commits:
3412b707 by Sascha Steinbiss at 2023-07-21T11:30:59+02:00
New upstream version 2.0.1
- - - - -


11 changed files:

- .gitmodules
- .travis.yml
- INFO
- README.rst
- bin/lambda2.in
- src/CMakeLists.txt
- src/search_algo.hpp
- src/search_datastructures.hpp
- src/search_options.hpp
- src/search_output.hpp
- src/shared_misc.hpp


Changes:

=====================================
.gitmodules
=====================================
@@ -1,3 +1,3 @@
 [submodule "seqan"]
 	path = include/seqan
-	url = git://github.com/seqan/seqan.git
+	url = ../../seqan/seqan.git


=====================================
.travis.yml
=====================================
@@ -42,9 +42,8 @@ matrix:
       addons:
         apt:
           sources: ['ubuntu-toolchain-r-test']
-          packages: ['g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] # g++ required for newer libstdc++
-      install: export CXX="/usr/local/clang-5.0.0/bin/clang++"
-      env: CMAKE_ARGS="-DCMAKE_BUILD_TYPE=RelWithDebInfo" LD_LIBRARY_PATH="/usr/local/clang-5.0.0/lib/:${LD_LIBRARY_PATH}"
+          packages: ['clang-5.0', 'g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] # g++ required for newer libstdc++
+      install: export CXX="clang++-5.0"
 
     - os: osx
       compiler: gcc-4.9
@@ -55,6 +54,7 @@ matrix:
       env: CMAKE_ARGS="-DLAMBDA_FASTBUILD=1 -DLAMBDA_STATIC_BUILD=1"
 
     - os: osx
+      osx_image: xcode10.1
       compiler: gcc-7
       before_install:
         - brew update


=====================================
INFO
=====================================
@@ -6,7 +6,7 @@ Copyright: 2013-2019, Hannes Hauswedell; 2016-2019 Knut Reinert, FU-Berlin
 Status: under development
 Description: Lambda is a biological sequence aligner optimized for many
 query sequences and searches in protein space. It is highly compatible
-to BLAST (bitscore and e-value statistics, tab seperated and verbose
+to BLAST (bitscore and e-value statistics, tab separated and verbose
 output formats), much faster than BLAST and many other comparable tools
 and supports many other input and output formats, including standards-
 conformant .sam and .bam and many compression types


=====================================
README.rst
=====================================
@@ -3,7 +3,7 @@ Lambda: the Local Aligner for Massive Biological DatA
 
 Lambda is a local aligner optimized for many query sequences and searches in protein space. It...
 
-* is highly compatible to BLAST (bitscore and e-value statistics, tab seperated and verbose output formats)
+* is highly compatible to BLAST (bitscore and e-value statistics, tab separated and verbose output formats)
 * is much faster than BLAST and many other comparable tools
 * supports many other input and output formats, including standards-conformant ``.sam`` and ``.bam`` and many compression types
 * has special features for species annotation and taxonomic analysis


=====================================
bin/lambda2.in
=====================================
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-CURDIR="$(readlink -f $(dirname "$0"))/"
+CURDIR="$(cd "$(dirname "$0")" && pwd -P)/"
 SYSTEM_BIN_DIR="@CMAKE_INSTALL_FULL_BINDIR@/"
 
 if [ "${CURDIR}" = "${SYSTEM_BIN_DIR}" ]; then          # we are installed


=====================================
src/CMakeLists.txt
=====================================
@@ -13,7 +13,7 @@
 # change this after every release
 set (SEQAN_APP_VERSION_MAJOR "2")
 set (SEQAN_APP_VERSION_MINOR "0")
-set (SEQAN_APP_VERSION_PATCH "0")
+set (SEQAN_APP_VERSION_PATCH "1")
 
 # don't change the following
 set (SEQAN_APP_VERSION "${SEQAN_APP_VERSION_MAJOR}.${SEQAN_APP_VERSION_MINOR}.${SEQAN_APP_VERSION_PATCH}")
@@ -31,7 +31,7 @@ find_package(ZLIB   QUIET)
 find_package(BZip2  QUIET)
 find_package(SeqAn  QUIET REQUIRED CONFIG)
 
-message(STATUS "These dependencies where found:")
+message(STATUS "These dependencies were found:")
 message(   "     OPENMP     ${OPENMP_FOUND}      ${OpenMP_CXX_FLAGS}")
 message(   "     ZLIB       ${ZLIB_FOUND}      ${ZLIB_VERSION_STRING}")
 message(   "     BZIP2      ${BZIP2_FOUND}      ${BZIP2_VERSION_STRING}")
@@ -78,7 +78,7 @@ if (SEQAN_VERSION_STRING VERSION_LESS "${MINIMUM_SEQAN_VERSION}")
     return ()
 endif ()
 
-message(STATUS "The requirements where met.")
+message(STATUS "The requirements were met.")
 
 # ----------------------------------------------------------------------------
 # App-Level Configuration


=====================================
src/search_algo.hpp
=====================================
@@ -1058,7 +1058,7 @@ _searchSingleIndex(LocalDataHolder<TGlobalHolder, TScoreExtension> & lH)
                 desiredOccs = (length(lH.matches) - oldTotalMatches) >= lH.options.maxMatches
                             ? minResults
                             : (lH.options.maxMatches - (length(lH.matches) - oldTotalMatches)) * seedHeurFactor /
-                                std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, 1ul);
+                                std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, static_cast<size_t>(1));
 
                 if (desiredOccs == 0)
                     desiredOccs = minResults;
@@ -1127,7 +1127,7 @@ _searchSingleIndex(LocalDataHolder<TGlobalHolder, TScoreExtension> & lH)
                     desiredOccs = (length(lH.matches) - oldTotalMatches) >= lH.options.maxMatches
                                 ? minResults
                                 : (lH.options.maxMatches - (length(lH.matches) - oldTotalMatches)) * seedHeurFactor /
-                                    std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, 1ul);
+                                    std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, static_cast<size_t>(1));
 
                     if (desiredOccs == 0)
                         desiredOccs = minResults;
@@ -1736,7 +1736,7 @@ computeBlastMatch(typename TBlastRecord::TBlastMatch  & bm,
     computeBitScore(bm, context(lH.gH.outfile));
 
     computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
-    if (bm.eValue > lH.options.eCutOff)
+    if (bm.eValue > lH.options.maxEValue)
         return EVALUE;
 
     _setFrames(bm, m, lH);
@@ -2376,7 +2376,11 @@ iterateMatchesFullSimd(TLocalHolder & lH)
 
         bm.sLength = sIsTranslated(TGlobalHolder::blastProgram)
                         ? lH.gH.untransSubjSeqLengths[bm._n_sId]
-                        : length(lH.gH.subjSeqs[it->subjId]);
+                        : length(lH.gH.subjSeqs[_untrueSubjId(bm, lH)]);
+
+        bm.qLength = qIsTranslated(TGlobalHolder::blastProgram)
+                        ? lH.gH.untransQrySeqLengths[bm._n_qId ]
+                        : length(lH.gH.qrySeqs[it->qryId]);
 
         _setupAlignInfix(bm, *it, lH);
 
@@ -2427,17 +2431,28 @@ iterateMatchesFullSimd(TLocalHolder & lH)
     {
         TBlastMatch & bm = *it;
 
-        computeEValueThreadSafe(bm,
-                                qIsTranslated(TGlobalHolder::blastProgram)
-                                    ? lH.gH.untransQrySeqLengths[bm._n_qId]
-                                    : length(lH.gH.qrySeqs[bm._n_qId]),
-                                context(lH.gH.outfile));
+        if (lH.options.minBitScore > 0)
+        {
+            seqan::computeBitScore(bm, seqan::context(lH.gH.outfile));
 
-        if (bm.eValue > lH.options.eCutOff)
+            if (bm.bitScore < lH.options.minBitScore)
+            {
+                ++lH.stats.hitsFailedExtendBitScoreTest;
+                it = blastMatches.erase(it);
+                continue;
+            }
+        }
+
+        if (lH.options.maxEValue < 100)
         {
-            ++lH.stats.hitsFailedExtendEValueTest;
-            it = blastMatches.erase(it);
-            continue;
+            computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile));
+
+            if (bm.eValue > lH.options.maxEValue)
+            {
+                ++lH.stats.hitsFailedExtendEValueTest;
+                it = blastMatches.erase(it);
+                continue;
+            }
         }
 
         ++it;
@@ -2480,9 +2495,12 @@ iterateMatchesFullSimd(TLocalHolder & lH)
             continue;
         }
 
-        computeBitScore(bm, context(lH.gH.outfile));
+        // not computed previously
+        if (lH.options.minBitScore == 0)
+            seqan::computeBitScore(bm, seqan::context(lH.gH.outfile));
 
-        // evalue computed previously
+        if (lH.options.maxEValue == 100)
+            computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile));
 
         ++it;
     }
@@ -2504,7 +2522,7 @@ iterateMatchesFullSimd(TLocalHolder & lH)
             TBlastRecord record(lH.gH.qryIds[itLast->_n_qId]);
             record.qLength = (qIsTranslated(TGlobalHolder::blastProgram)
                                 ? lH.gH.untransQrySeqLengths[itLast->_n_qId]
-                                : length(lH.gH.qrySeqs[itLast->_n_qId]));
+                                : length(lH.gH.qrySeqs[_untrueQryId(*itLast, lH)]));
             // move the matches into the record
             record.matches.splice(record.matches.begin(),
                                   blastMatches,
@@ -2555,7 +2573,7 @@ iterateMatchesFullSerial(TLocalHolder & lH)
                         ? lH.gH.untransQrySeqLengths[trueQryId]
                         : length(lH.gH.qrySeqs[lH.matches[0].qryId]));
 
-    unsigned band = _bandSize(record.qLength, lH);
+    size_t band = _bandSize(length(lH.gH.qrySeqs[lH.matches[0].qryId]), lH);
 
 #ifdef LAMBDA_MICRO_STATS
     double start = sysTime();
@@ -2578,29 +2596,31 @@ iterateMatchesFullSerial(TLocalHolder & lH)
                         ? lH.gH.untransSubjSeqLengths[bm._n_sId]
                         : length(lH.gH.subjSeqs[it->subjId]);
 
+        bm.qLength = qIsTranslated(TGlobalHolder::blastProgram)
+                        ? lH.gH.untransQrySeqLengths[bm._n_qId ]
+                        : length(lH.gH.qrySeqs[it->qryId]);
+
         _setupAlignInfix(bm, *it, lH);
 
         _setFrames(bm, m, lH);
 
         // Run extension WITHOUT TRACEBACK
-        typedef AlignConfig2<LocalAlignment_<>,
-                            DPBandConfig<BandOn>,
-                            FreeEndGaps_<True, True, True, True>,
-                            TracebackOff> TAlignConfig;
-
-        DPScoutState_<Default> scoutState;
+        bm.alignStats.alignmentScore = localAlignmentScore(bm.alignRow0,
+                                                           bm.alignRow1,
+                                                           seqanScheme(context(lH.gH.outfile).scoringScheme),
+                                                           -band,
+                                                           +band);
 
-        bm.alignStats.alignmentScore = _setUpAndRunAlignment(lH.alignContext.dpContext,
-                                                             lH.alignContext.traceSegment,
-                                                             scoutState,
-                                                             source(bm.alignRow0),
-                                                             source(bm.alignRow1),
-                                                             seqanScheme(context(lH.gH.outfile).scoringScheme),
-                                                             TAlignConfig(-band, +band));
+        computeBitScore(bm, context(lH.gH.outfile));
+        if (bm.bitScore < lH.options.minBitScore)
+        {
+            ++lH.stats.hitsFailedExtendBitScoreTest;
+            record.matches.pop_back();
+            continue;
+        }
 
         computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
-
-        if (bm.eValue > lH.options.eCutOff)
+        if (bm.eValue > lH.options.maxEValue)
         {
             ++lH.stats.hitsFailedExtendEValueTest;
             record.matches.pop_back();
@@ -2625,7 +2645,6 @@ iterateMatchesFullSerial(TLocalHolder & lH)
             continue;
         }
 
-        computeBitScore(bm, context(lH.gH.outfile));
 
         if (lH.options.hasSTaxIds)
             bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId];


=====================================
src/search_datastructures.hpp
=====================================
@@ -111,6 +111,7 @@ struct StatsHolder
 
 // post-extension
     uint64_t hitsFailedExtendPercentIdentTest;
+    uint64_t hitsFailedExtendBitScoreTest;
     uint64_t hitsFailedExtendEValueTest;
     uint64_t hitsAbundant;
     uint64_t hitsDuplicate;
@@ -150,6 +151,7 @@ struct StatsHolder
         hitsPutativeAbundant = 0;
 
         hitsFailedExtendPercentIdentTest = 0;
+        hitsFailedExtendBitScoreTest = 0;
         hitsFailedExtendEValueTest = 0;
         hitsAbundant = 0;
         hitsDuplicate = 0;
@@ -183,6 +185,7 @@ struct StatsHolder
         hitsPutativeAbundant += rhs.hitsPutativeAbundant;
 
         hitsFailedExtendPercentIdentTest += rhs.hitsFailedExtendPercentIdentTest;
+        hitsFailedExtendBitScoreTest += rhs.hitsFailedExtendBitScoreTest;
         hitsFailedExtendEValueTest += rhs.hitsFailedExtendEValueTest;
         hitsAbundant += rhs.hitsAbundant;
         hitsDuplicate += rhs.hitsDuplicate;
@@ -253,12 +256,15 @@ void printStats(StatsHolder const & stats, LambdaOptions const & options)
             std::cout << "\n - failed pre-extend test   " << R
                       << stats.hitsFailedPreExtendTest  << RR
                       << (rem -= stats.hitsFailedPreExtendTest);
-        std::cout << "\n - failed %-identity test   " << R
-                  << stats.hitsFailedExtendPercentIdentTest << RR
-                  << (rem -= stats.hitsFailedExtendPercentIdentTest);
         std::cout << "\n - failed e-value test      " << R
                   << stats.hitsFailedExtendEValueTest << RR
                   << (rem -= stats.hitsFailedExtendEValueTest);
+        std::cout << "\n - failed bitScore test     " << R
+                  << stats.hitsFailedExtendBitScoreTest << RR
+                  << (rem -= stats.hitsFailedExtendBitScoreTest);
+        std::cout << "\n - failed %-identity test   " << R
+                  << stats.hitsFailedExtendPercentIdentTest << RR
+                  << (rem -= stats.hitsFailedExtendPercentIdentTest);
         std::cout << "\n - duplicates               " << R
                   << stats.hitsDuplicate              << RR
                   << (rem -= stats.hitsDuplicate);


=====================================
src/search_options.hpp
=====================================
@@ -96,7 +96,8 @@ struct LambdaOptions : public SharedOptions
 
     int             xDropOff    = 0;
     int             band        = -1;
-    double          eCutOff     = 0;
+    double          minBitScore = 0;
+    double          maxEValue   = 1e-04;
     int             idCutOff    = 0;
     unsigned long   maxMatches  = 500;
 
@@ -203,7 +204,7 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
 
     addSection(parser, "Output Options");
     addOption(parser, ArgParseOption("o", "output",
-        "File to hold reports on hits (.m* are blastall -m* formats; .m8 is tab-seperated, .m9 is tab-seperated with "
+        "File to hold reports on hits (.m* are blastall -m* formats; .m8 is tab-separated, .m9 is tab-separated with "
         "with comments, .m0 is pairwise format).",
         ArgParseArgument::OUTPUT_FILE,
         "OUT"));
@@ -247,6 +248,14 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
     setMinValue(parser, "e-value", "0");
     setMaxValue(parser, "e-value", "100");
 
+    addOption(parser, ArgParseOption("", "bit-score",
+        "Output only matches that score above this threshold.",
+        ArgParseArgument::DOUBLE));
+    setDefaultValue(parser, "bit-score", "0");
+    setMinValue(parser, "bit-score", "0");
+    setMaxValue(parser, "bit-score", "1000");
+
+
     addOption(parser, ArgParseOption("n", "num-matches",
         "Print at most this number of matches per query.",
         ArgParseArgument::INTEGER));
@@ -646,7 +655,7 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
     getOptionValue(buffer, parser, "output-columns");
     if (buffer == "help")
     {
-        std::cout << "Please specify the columns in this format -oc 'column1 column2', i.e. space-seperated and "
+        std::cout << "Please specify the columns in this format -oc 'column1 column2', i.e. space-separated and "
                   << "enclosed in single quotes.\nThe specifiers are the same as in NCBI Blast, currently "
                   << "the following are supported:\n";
         for (unsigned i = 0; i < length(BlastMatchField<>::implemented); ++i)
@@ -693,7 +702,7 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
     getOptionValue(buffer, parser, "sam-bam-tags");
     if (buffer == "help")
     {
-        std::cout << "Please specify the tags in this format -oc 'tag1 tag2', i.e. space-seperated and "
+        std::cout << "Please specify the tags in this format -oc 'tag1 tag2', i.e. space-separated and "
                   << "enclosed in quotes. The order of tags is not preserved.\nThe following specifiers are "
                   << "supported:\n";
 
@@ -771,7 +780,8 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
 
     getOptionValue(options.seedDeltaIncreasesLength, parser, "seed-delta-increases-length");
 
-    getOptionValue(options.eCutOff, parser, "e-value");
+    getOptionValue(options.maxEValue, parser, "e-value");
+    getOptionValue(options.minBitScore, parser, "bit-score");
     getOptionValue(options.idCutOff, parser, "percent-identity");
 
     getOptionValue(options.xDropOff, parser, "x-drop");
@@ -835,10 +845,9 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
     // TODO always prescore 1
     getOptionValue(options.preScoring, parser, "pre-scoring");
 
-    //TODO reactivate
-//     if ((!isSet(parser, "pre-scoring")) &&
-//         (options.alphReduction == 0))
-//         options.preScoring = 1;
+    if ((!isSet(parser, "pre-scoring")) &&
+        (options.reducedAlphabet == options.transAlphabet))
+        options.preScoring = 1;
 
     getOptionValue(options.preScoringThresh, parser, "pre-scoring-threshold");
 //     if (options.preScoring == 0)
@@ -904,8 +913,9 @@ printOptions(LambdaOptions const & options)
               << "  db index type:            " << _indexEnumToName(options.dbIndexType) << "\n"
               << " OUTPUT (file)\n"
               << "  output file:              " << options.output << "\n"
+              << "  maximum e-value:          " << options.maxEValue << "\n"
+              << "  minimum bit-score:        " << options.minBitScore << "\n"
               << "  minimum % identity:       " << options.idCutOff << "\n"
-              << "  maximum e-value:          " << options.eCutOff << "\n"
               << "  max #matches per query:   " << options.maxMatches << "\n"
               << "  include subj names in sam:" << options.samWithRefHeader << "\n"
               << "  include seq in sam/bam:   " << options.samBamSeq << "\n"


=====================================
src/search_output.hpp
=====================================
@@ -330,7 +330,7 @@ myWriteHeader(TGH & globalHolder, TLambdaOptions const & options)
         if (sIsTranslated(TGH::blastProgram))
         {
             //TODO can we get around a copy?
-            subjSeqLengths = globalHolder.untransSubjSeqLengths;
+            subjSeqLengths = prefix(globalHolder.untransSubjSeqLengths, length(globalHolder.untransSubjSeqLengths) - 1);
         } else
         {
             // compute lengths ultra-fast


=====================================
src/shared_misc.hpp
=====================================
@@ -26,7 +26,10 @@
 #include <locale>
 #include <type_traits>
 #include <forward_list>
-#include <sys/sysctl.h>
+
+#if __has_include(<sys/sysctl.h>)
+    #include <sys/sysctl.h>
+#endif
 
 #include <seqan/basic.h>
 #include <seqan/sequence.h>



View it on GitLab: https://salsa.debian.org/med-team/lambda-align2/-/commit/3412b7070adc55dc43b247812220879a6396e52a

-- 
View it on GitLab: https://salsa.debian.org/med-team/lambda-align2/-/commit/3412b7070adc55dc43b247812220879a6396e52a
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230721/88ef2872/attachment-0001.htm>