[med-svn] [Git][med-team/lambda-align2][upstream] New upstream version 2.0.1
Sascha Steinbiss (@satta)
gitlab at salsa.debian.org
Fri Jul 21 11:31:32 BST 2023
Sascha Steinbiss pushed to branch upstream at Debian Med / lambda-align2
Commits:
3412b707 by Sascha Steinbiss at 2023-07-21T11:30:59+02:00
New upstream version 2.0.1
- - - - -
11 changed files:
- .gitmodules
- .travis.yml
- INFO
- README.rst
- bin/lambda2.in
- src/CMakeLists.txt
- src/search_algo.hpp
- src/search_datastructures.hpp
- src/search_options.hpp
- src/search_output.hpp
- src/shared_misc.hpp
Changes:
=====================================
.gitmodules
=====================================
@@ -1,3 +1,3 @@
[submodule "seqan"]
path = include/seqan
- url = git://github.com/seqan/seqan.git
+ url = ../../seqan/seqan.git
=====================================
.travis.yml
=====================================
@@ -42,9 +42,8 @@ matrix:
addons:
apt:
sources: ['ubuntu-toolchain-r-test']
- packages: ['g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] # g++ required for newer libstdc++
- install: export CXX="/usr/local/clang-5.0.0/bin/clang++"
- env: CMAKE_ARGS="-DCMAKE_BUILD_TYPE=RelWithDebInfo" LD_LIBRARY_PATH="/usr/local/clang-5.0.0/lib/:${LD_LIBRARY_PATH}"
+ packages: ['clang-5.0', 'g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] # g++ required for newer libstdc++
+ install: export CXX="clang++-5.0"
- os: osx
compiler: gcc-4.9
@@ -55,6 +54,7 @@ matrix:
env: CMAKE_ARGS="-DLAMBDA_FASTBUILD=1 -DLAMBDA_STATIC_BUILD=1"
- os: osx
+ osx_image: xcode10.1
compiler: gcc-7
before_install:
- brew update
=====================================
INFO
=====================================
@@ -6,7 +6,7 @@ Copyright: 2013-2019, Hannes Hauswedell; 2016-2019 Knut Reinert, FU-Berlin
Status: under development
Description: Lambda is a biological sequence aligner optimized for many
query sequences and searches in protein space. It is highly compatible
-to BLAST (bitscore and e-value statistics, tab seperated and verbose
+to BLAST (bitscore and e-value statistics, tab separated and verbose
output formats), much faster than BLAST and many other comparable tools
and supports many other input and output formats, including standards-
conformant .sam and .bam and many compression types
=====================================
README.rst
=====================================
@@ -3,7 +3,7 @@ Lambda: the Local Aligner for Massive Biological DatA
Lambda is a local aligner optimized for many query sequences and searches in protein space. It...
-* is highly compatible to BLAST (bitscore and e-value statistics, tab seperated and verbose output formats)
+* is highly compatible to BLAST (bitscore and e-value statistics, tab separated and verbose output formats)
* is much faster than BLAST and many other comparable tools
* supports many other input and output formats, including standards-conformant ``.sam`` and ``.bam`` and many compression types
* has special features for species annotation and taxonomic analysis
=====================================
bin/lambda2.in
=====================================
@@ -1,6 +1,6 @@
#!/bin/sh
-CURDIR="$(readlink -f $(dirname "$0"))/"
+CURDIR="$(cd "$(dirname "$0")" && pwd -P)/"
SYSTEM_BIN_DIR="@CMAKE_INSTALL_FULL_BINDIR@/"
if [ "${CURDIR}" = "${SYSTEM_BIN_DIR}" ]; then # we are installed
=====================================
src/CMakeLists.txt
=====================================
@@ -13,7 +13,7 @@
# change this after every release
set (SEQAN_APP_VERSION_MAJOR "2")
set (SEQAN_APP_VERSION_MINOR "0")
-set (SEQAN_APP_VERSION_PATCH "0")
+set (SEQAN_APP_VERSION_PATCH "1")
# don't change the following
set (SEQAN_APP_VERSION "${SEQAN_APP_VERSION_MAJOR}.${SEQAN_APP_VERSION_MINOR}.${SEQAN_APP_VERSION_PATCH}")
@@ -31,7 +31,7 @@ find_package(ZLIB QUIET)
find_package(BZip2 QUIET)
find_package(SeqAn QUIET REQUIRED CONFIG)
-message(STATUS "These dependencies where found:")
+message(STATUS "These dependencies were found:")
message( " OPENMP ${OPENMP_FOUND} ${OpenMP_CXX_FLAGS}")
message( " ZLIB ${ZLIB_FOUND} ${ZLIB_VERSION_STRING}")
message( " BZIP2 ${BZIP2_FOUND} ${BZIP2_VERSION_STRING}")
@@ -78,7 +78,7 @@ if (SEQAN_VERSION_STRING VERSION_LESS "${MINIMUM_SEQAN_VERSION}")
return ()
endif ()
-message(STATUS "The requirements where met.")
+message(STATUS "The requirements were met.")
# ----------------------------------------------------------------------------
# App-Level Configuration
=====================================
src/search_algo.hpp
=====================================
@@ -1058,7 +1058,7 @@ _searchSingleIndex(LocalDataHolder<TGlobalHolder, TScoreExtension> & lH)
desiredOccs = (length(lH.matches) - oldTotalMatches) >= lH.options.maxMatches
? minResults
: (lH.options.maxMatches - (length(lH.matches) - oldTotalMatches)) * seedHeurFactor /
- std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, 1ul);
+ std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, static_cast<size_t>(1));
if (desiredOccs == 0)
desiredOccs = minResults;
@@ -1127,7 +1127,7 @@ _searchSingleIndex(LocalDataHolder<TGlobalHolder, TScoreExtension> & lH)
desiredOccs = (length(lH.matches) - oldTotalMatches) >= lH.options.maxMatches
? minResults
: (lH.options.maxMatches - (length(lH.matches) - oldTotalMatches)) * seedHeurFactor /
- std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, 1ul);
+ std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, static_cast<size_t>(1));
if (desiredOccs == 0)
desiredOccs = minResults;
@@ -1736,7 +1736,7 @@ computeBlastMatch(typename TBlastRecord::TBlastMatch & bm,
computeBitScore(bm, context(lH.gH.outfile));
computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
- if (bm.eValue > lH.options.eCutOff)
+ if (bm.eValue > lH.options.maxEValue)
return EVALUE;
_setFrames(bm, m, lH);
@@ -2376,7 +2376,11 @@ iterateMatchesFullSimd(TLocalHolder & lH)
bm.sLength = sIsTranslated(TGlobalHolder::blastProgram)
? lH.gH.untransSubjSeqLengths[bm._n_sId]
- : length(lH.gH.subjSeqs[it->subjId]);
+ : length(lH.gH.subjSeqs[_untrueSubjId(bm, lH)]);
+
+ bm.qLength = qIsTranslated(TGlobalHolder::blastProgram)
+ ? lH.gH.untransQrySeqLengths[bm._n_qId ]
+ : length(lH.gH.qrySeqs[it->qryId]);
_setupAlignInfix(bm, *it, lH);
@@ -2427,17 +2431,28 @@ iterateMatchesFullSimd(TLocalHolder & lH)
{
TBlastMatch & bm = *it;
- computeEValueThreadSafe(bm,
- qIsTranslated(TGlobalHolder::blastProgram)
- ? lH.gH.untransQrySeqLengths[bm._n_qId]
- : length(lH.gH.qrySeqs[bm._n_qId]),
- context(lH.gH.outfile));
+ if (lH.options.minBitScore > 0)
+ {
+ seqan::computeBitScore(bm, seqan::context(lH.gH.outfile));
- if (bm.eValue > lH.options.eCutOff)
+ if (bm.bitScore < lH.options.minBitScore)
+ {
+ ++lH.stats.hitsFailedExtendBitScoreTest;
+ it = blastMatches.erase(it);
+ continue;
+ }
+ }
+
+ if (lH.options.maxEValue < 100)
{
- ++lH.stats.hitsFailedExtendEValueTest;
- it = blastMatches.erase(it);
- continue;
+ computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile));
+
+ if (bm.eValue > lH.options.maxEValue)
+ {
+ ++lH.stats.hitsFailedExtendEValueTest;
+ it = blastMatches.erase(it);
+ continue;
+ }
}
++it;
@@ -2480,9 +2495,12 @@ iterateMatchesFullSimd(TLocalHolder & lH)
continue;
}
- computeBitScore(bm, context(lH.gH.outfile));
+ // not computed previously
+ if (lH.options.minBitScore == 0)
+ seqan::computeBitScore(bm, seqan::context(lH.gH.outfile));
- // evalue computed previously
+ if (lH.options.maxEValue == 100)
+ computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile));
++it;
}
@@ -2504,7 +2522,7 @@ iterateMatchesFullSimd(TLocalHolder & lH)
TBlastRecord record(lH.gH.qryIds[itLast->_n_qId]);
record.qLength = (qIsTranslated(TGlobalHolder::blastProgram)
? lH.gH.untransQrySeqLengths[itLast->_n_qId]
- : length(lH.gH.qrySeqs[itLast->_n_qId]));
+ : length(lH.gH.qrySeqs[_untrueQryId(*itLast, lH)]));
// move the matches into the record
record.matches.splice(record.matches.begin(),
blastMatches,
@@ -2555,7 +2573,7 @@ iterateMatchesFullSerial(TLocalHolder & lH)
? lH.gH.untransQrySeqLengths[trueQryId]
: length(lH.gH.qrySeqs[lH.matches[0].qryId]));
- unsigned band = _bandSize(record.qLength, lH);
+ size_t band = _bandSize(length(lH.gH.qrySeqs[lH.matches[0].qryId]), lH);
#ifdef LAMBDA_MICRO_STATS
double start = sysTime();
@@ -2578,29 +2596,31 @@ iterateMatchesFullSerial(TLocalHolder & lH)
? lH.gH.untransSubjSeqLengths[bm._n_sId]
: length(lH.gH.subjSeqs[it->subjId]);
+ bm.qLength = qIsTranslated(TGlobalHolder::blastProgram)
+ ? lH.gH.untransQrySeqLengths[bm._n_qId ]
+ : length(lH.gH.qrySeqs[it->qryId]);
+
_setupAlignInfix(bm, *it, lH);
_setFrames(bm, m, lH);
// Run extension WITHOUT TRACEBACK
- typedef AlignConfig2<LocalAlignment_<>,
- DPBandConfig<BandOn>,
- FreeEndGaps_<True, True, True, True>,
- TracebackOff> TAlignConfig;
-
- DPScoutState_<Default> scoutState;
+ bm.alignStats.alignmentScore = localAlignmentScore(bm.alignRow0,
+ bm.alignRow1,
+ seqanScheme(context(lH.gH.outfile).scoringScheme),
+ -band,
+ +band);
- bm.alignStats.alignmentScore = _setUpAndRunAlignment(lH.alignContext.dpContext,
- lH.alignContext.traceSegment,
- scoutState,
- source(bm.alignRow0),
- source(bm.alignRow1),
- seqanScheme(context(lH.gH.outfile).scoringScheme),
- TAlignConfig(-band, +band));
+ computeBitScore(bm, context(lH.gH.outfile));
+ if (bm.bitScore < lH.options.minBitScore)
+ {
+ ++lH.stats.hitsFailedExtendBitScoreTest;
+ record.matches.pop_back();
+ continue;
+ }
computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile));
-
- if (bm.eValue > lH.options.eCutOff)
+ if (bm.eValue > lH.options.maxEValue)
{
++lH.stats.hitsFailedExtendEValueTest;
record.matches.pop_back();
@@ -2625,7 +2645,6 @@ iterateMatchesFullSerial(TLocalHolder & lH)
continue;
}
- computeBitScore(bm, context(lH.gH.outfile));
if (lH.options.hasSTaxIds)
bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId];
=====================================
src/search_datastructures.hpp
=====================================
@@ -111,6 +111,7 @@ struct StatsHolder
// post-extension
uint64_t hitsFailedExtendPercentIdentTest;
+ uint64_t hitsFailedExtendBitScoreTest;
uint64_t hitsFailedExtendEValueTest;
uint64_t hitsAbundant;
uint64_t hitsDuplicate;
@@ -150,6 +151,7 @@ struct StatsHolder
hitsPutativeAbundant = 0;
hitsFailedExtendPercentIdentTest = 0;
+ hitsFailedExtendBitScoreTest = 0;
hitsFailedExtendEValueTest = 0;
hitsAbundant = 0;
hitsDuplicate = 0;
@@ -183,6 +185,7 @@ struct StatsHolder
hitsPutativeAbundant += rhs.hitsPutativeAbundant;
hitsFailedExtendPercentIdentTest += rhs.hitsFailedExtendPercentIdentTest;
+ hitsFailedExtendBitScoreTest += rhs.hitsFailedExtendBitScoreTest;
hitsFailedExtendEValueTest += rhs.hitsFailedExtendEValueTest;
hitsAbundant += rhs.hitsAbundant;
hitsDuplicate += rhs.hitsDuplicate;
@@ -253,12 +256,15 @@ void printStats(StatsHolder const & stats, LambdaOptions const & options)
std::cout << "\n - failed pre-extend test " << R
<< stats.hitsFailedPreExtendTest << RR
<< (rem -= stats.hitsFailedPreExtendTest);
- std::cout << "\n - failed %-identity test " << R
- << stats.hitsFailedExtendPercentIdentTest << RR
- << (rem -= stats.hitsFailedExtendPercentIdentTest);
std::cout << "\n - failed e-value test " << R
<< stats.hitsFailedExtendEValueTest << RR
<< (rem -= stats.hitsFailedExtendEValueTest);
+ std::cout << "\n - failed bitScore test " << R
+ << stats.hitsFailedExtendBitScoreTest << RR
+ << (rem -= stats.hitsFailedExtendBitScoreTest);
+ std::cout << "\n - failed %-identity test " << R
+ << stats.hitsFailedExtendPercentIdentTest << RR
+ << (rem -= stats.hitsFailedExtendPercentIdentTest);
std::cout << "\n - duplicates " << R
<< stats.hitsDuplicate << RR
<< (rem -= stats.hitsDuplicate);
=====================================
src/search_options.hpp
=====================================
@@ -96,7 +96,8 @@ struct LambdaOptions : public SharedOptions
int xDropOff = 0;
int band = -1;
- double eCutOff = 0;
+ double minBitScore = 0;
+ double maxEValue = 1e-04;
int idCutOff = 0;
unsigned long maxMatches = 500;
@@ -203,7 +204,7 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
addSection(parser, "Output Options");
addOption(parser, ArgParseOption("o", "output",
- "File to hold reports on hits (.m* are blastall -m* formats; .m8 is tab-seperated, .m9 is tab-seperated with "
+ "File to hold reports on hits (.m* are blastall -m* formats; .m8 is tab-separated, .m9 is tab-separated with "
"with comments, .m0 is pairwise format).",
ArgParseArgument::OUTPUT_FILE,
"OUT"));
@@ -247,6 +248,14 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
setMinValue(parser, "e-value", "0");
setMaxValue(parser, "e-value", "100");
+ addOption(parser, ArgParseOption("", "bit-score",
+ "Output only matches that score above this threshold.",
+ ArgParseArgument::DOUBLE));
+ setDefaultValue(parser, "bit-score", "0");
+ setMinValue(parser, "bit-score", "0");
+ setMaxValue(parser, "bit-score", "1000");
+
+
addOption(parser, ArgParseOption("n", "num-matches",
"Print at most this number of matches per query.",
ArgParseArgument::INTEGER));
@@ -646,7 +655,7 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
getOptionValue(buffer, parser, "output-columns");
if (buffer == "help")
{
- std::cout << "Please specify the columns in this format -oc 'column1 column2', i.e. space-seperated and "
+ std::cout << "Please specify the columns in this format -oc 'column1 column2', i.e. space-separated and "
<< "enclosed in single quotes.\nThe specifiers are the same as in NCBI Blast, currently "
<< "the following are supported:\n";
for (unsigned i = 0; i < length(BlastMatchField<>::implemented); ++i)
@@ -693,7 +702,7 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
getOptionValue(buffer, parser, "sam-bam-tags");
if (buffer == "help")
{
- std::cout << "Please specify the tags in this format -oc 'tag1 tag2', i.e. space-seperated and "
+ std::cout << "Please specify the tags in this format -oc 'tag1 tag2', i.e. space-separated and "
<< "enclosed in quotes. The order of tags is not preserved.\nThe following specifiers are "
<< "supported:\n";
@@ -771,7 +780,8 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
getOptionValue(options.seedDeltaIncreasesLength, parser, "seed-delta-increases-length");
- getOptionValue(options.eCutOff, parser, "e-value");
+ getOptionValue(options.maxEValue, parser, "e-value");
+ getOptionValue(options.minBitScore, parser, "bit-score");
getOptionValue(options.idCutOff, parser, "percent-identity");
getOptionValue(options.xDropOff, parser, "x-drop");
@@ -835,10 +845,9 @@ parseCommandLine(LambdaOptions & options, int argc, char const ** argv)
// TODO always prescore 1
getOptionValue(options.preScoring, parser, "pre-scoring");
- //TODO reactivate
-// if ((!isSet(parser, "pre-scoring")) &&
-// (options.alphReduction == 0))
-// options.preScoring = 1;
+ if ((!isSet(parser, "pre-scoring")) &&
+ (options.reducedAlphabet == options.transAlphabet))
+ options.preScoring = 1;
getOptionValue(options.preScoringThresh, parser, "pre-scoring-threshold");
// if (options.preScoring == 0)
@@ -904,8 +913,9 @@ printOptions(LambdaOptions const & options)
<< " db index type: " << _indexEnumToName(options.dbIndexType) << "\n"
<< " OUTPUT (file)\n"
<< " output file: " << options.output << "\n"
+ << " maximum e-value: " << options.maxEValue << "\n"
+ << " minimum bit-score: " << options.minBitScore << "\n"
<< " minimum % identity: " << options.idCutOff << "\n"
- << " maximum e-value: " << options.eCutOff << "\n"
<< " max #matches per query: " << options.maxMatches << "\n"
<< " include subj names in sam:" << options.samWithRefHeader << "\n"
<< " include seq in sam/bam: " << options.samBamSeq << "\n"
=====================================
src/search_output.hpp
=====================================
@@ -330,7 +330,7 @@ myWriteHeader(TGH & globalHolder, TLambdaOptions const & options)
if (sIsTranslated(TGH::blastProgram))
{
//TODO can we get around a copy?
- subjSeqLengths = globalHolder.untransSubjSeqLengths;
+ subjSeqLengths = prefix(globalHolder.untransSubjSeqLengths, length(globalHolder.untransSubjSeqLengths) - 1);
} else
{
// compute lengths ultra-fast
=====================================
src/shared_misc.hpp
=====================================
@@ -26,7 +26,10 @@
#include <locale>
#include <type_traits>
#include <forward_list>
-#include <sys/sysctl.h>
+
+#if __has_include(<sys/sysctl.h>)
+ #include <sys/sysctl.h>
+#endif
#include <seqan/basic.h>
#include <seqan/sequence.h>
View it on GitLab: https://salsa.debian.org/med-team/lambda-align2/-/commit/3412b7070adc55dc43b247812220879a6396e52a
--
View it on GitLab: https://salsa.debian.org/med-team/lambda-align2/-/commit/3412b7070adc55dc43b247812220879a6396e52a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20230721/88ef2872/attachment-0001.htm>
More information about the debian-med-commit
mailing list