[med-svn] [Git][med-team/gatb-core][upstream] New upstream version 1.4.1+git20180206.6f8fce8+dfsg
Andreas Tille
gitlab at salsa.debian.org
Tue Jan 22 19:43:33 GMT 2019
Andreas Tille pushed to branch upstream at Debian Med / gatb-core
Commits:
75cbd09b by Andreas Tille at 2019-01-22T15:21:36Z
New upstream version 1.4.1+git20180206.6f8fce8+dfsg
- - - - -
20 changed files:
- − gatb-core/.gitignore
- gatb-core/CMakeLists.txt
- − gatb-core/doc/book/.gitignore
- gatb-core/src/gatb/bank/api/IBank.hpp
- gatb-core/src/gatb/bank/impl/AbstractBank.hpp
- gatb-core/src/gatb/bank/impl/BankComposite.hpp
- gatb-core/src/gatb/bcalm2/bglue_algo.cpp
- gatb-core/src/gatb/bcalm2/bglue_algo.hpp
- gatb-core/src/gatb/bcalm2/ograph.cpp
- gatb-core/src/gatb/bcalm2/unionFind.hpp
- gatb-core/src/gatb/debruijn/impl/Graph.cpp
- gatb-core/src/gatb/debruijn/impl/UnitigsConstructionAlgorithm.cpp
- gatb-core/src/gatb/kmer/impl/MPHFAlgorithm.cpp
- gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
- gatb-core/src/gatb/template/TemplateSpecialization10.cpp.in
- gatb-core/src/gatb/tools/collections/impl/MapMPHF.hpp
- gatb-core/src/gatb/tools/compression/DnaCoder.cpp
- gatb-core/src/gatb/tools/misc/api/StringsRepository.hpp
- gatb-core/test/unit/src/bcalm/TestBcalm.cpp
- gatb-core/test/unit/src/debruijn/TestDebruijn.cpp
Changes:
=====================================
gatb-core/.gitignore deleted
=====================================
@@ -1,7 +0,0 @@
-CMakeLists.txt.user
-/build*
-/.settings/
-/.cproject
-*.leon
-.DS_Store
-
=====================================
gatb-core/CMakeLists.txt
=====================================
@@ -274,8 +274,11 @@ IF (EXISTS "${PROJECT_SOURCE_DIR}/examples")
ADD_SUBDIRECTORY(examples)
ENDIF()
ENDIF()
-# add example snippets into binary archive (use by CPack directive)
-INSTALL(DIRECTORY "${PROJECT_SOURCE_DIR}/examples/" DESTINATION "examples")
+
+IF (NOT DEFINED GATB_CORE_EXCLUDE_EXAMPLES)
+ # add example snippets into binary archive (use by CPack directive)
+ INSTALL(DIRECTORY "${PROJECT_SOURCE_DIR}/examples/" DESTINATION "examples")
+ENDIF()
################################################################################
# INSTALL
=====================================
gatb-core/doc/book/.gitignore deleted
=====================================
@@ -1,3 +0,0 @@
-/main.aux
-/main.log
-/main.pdf
=====================================
gatb-core/src/gatb/bank/api/IBank.hpp
=====================================
@@ -92,6 +92,10 @@ public:
* \return estimation of the number of sequences of sub bank i */
virtual int64_t estimateNbItemsBanki (int i) = 0;
+ /** Return the vector of sub IBank objects (in case of bank composite), or a vector containing only the bank itself
+ * \return the IBank objects. */
+ virtual const std::vector<IBank*> getBanks() const = 0;
+
/** \copydoc tools::collections::Iterable::iterator */
virtual tools::dp::Iterator<Sequence>* iterator () = 0;
=====================================
gatb-core/src/gatb/bank/impl/AbstractBank.hpp
=====================================
@@ -29,6 +29,7 @@
/********************************************************************************/
#include <gatb/bank/api/IBank.hpp>
+#include <vector>
/********************************************************************************/
namespace gatb {
@@ -58,7 +59,13 @@ public:
int64_t estimateNbItemsBanki (int i) { return this->estimateNbItems(); }
-
+ /** \copydoc IBank::getBanks */
+ const std::vector<IBank*> getBanks() const {
+ std::vector<IBank*> _banks;
+ _banks.push_back((IBank *)this);
+ return _banks;
+ };
+
/** \copydoc IBank::estimateNbItems */
int64_t estimateNbItems ()
=====================================
gatb-core/src/gatb/bank/impl/BankComposite.hpp
=====================================
@@ -157,7 +157,7 @@ public:
/** Return the vector of IBank objects.
* \return the IBank objects. */
- const std::vector<IBank*>& getBanks() const { return _banks; }
+ const std::vector<IBank*> getBanks() const { return _banks; }
/** Get the number of referred banks.
* \return the number of referred banks */
=====================================
gatb-core/src/gatb/bcalm2/bglue_algo.cpp
=====================================
@@ -16,7 +16,7 @@
/*#include "ctpl_stl.h" // alternative to threadpool // https://github.com/vit-vit/CTPL/blob/master/ctpl_stl.h // didn't commit because didnt use
#include "buffer_allocator.h" // memory pool from https://github.com/vincetse/allocator, didn't commit the files because didnt use
-#include "buffer_manager.h" // memory pool
+#include "buffer_manager.h" // same, memory pool from https://github.com/vincetse/allocator/blob/master/include/lazy/memory/
*/
#include <gatb/tools/designpattern/impl/Command.hpp>
@@ -27,14 +27,8 @@
#include <gatb/tools/storage/impl/Storage.hpp>
#include <gatb/tools/storage/impl/StorageTools.hpp>
-#include <gatb/tools/math/NativeInt64.hpp>
-#include <gatb/tools/math/NativeInt128.hpp>
-#include <gatb/tools/math/LargeInt.hpp>
-
-#include <gatb/bank/impl/Banks.hpp>
#include <gatb/bank/impl/Bank.hpp>
-#include <gatb/bank/impl/BankHelpers.hpp>
-#include <gatb/bank/impl/BankConverterAlgorithm.hpp>
+#include <gatb/bank/impl/Banks.hpp>
#include <gatb/kmer/impl/Model.hpp>
@@ -658,6 +652,7 @@ template<size_t SPAN>
void bglue(Storage *storage,
std::string prefix,
int kmerSize,
+ int nb_glue_partitions,
int nb_threads,
bool verbose
)
@@ -673,12 +668,19 @@ void bglue(Storage *storage,
bool debug_uf_stats = false; // formerly cmdline parameter
bool only_uf = false; // idem
+ logging("Starting bglue with " + std::to_string( nb_threads) + " threads");
+
//int nbGluePartitions=200; // no longer fixed
// autodetecting number of partitions
int max_open_files = System::file().getMaxFilesNumber() / 2;
int nbGluePartitions = std::min(2000, max_open_files); // ceil it at 2000 anyhow
- logging("Starting bglue with " + std::to_string( nb_threads) + " threads");
+ if (nb_glue_partitions > 0)
+ {
+ nbGluePartitions = nb_glue_partitions;
+ logging("Using user-defined number of glue partitions: " + std::to_string( nb_glue_partitions));
+ }
+
// create a hasher for UF
typedef typename Kmer<SPAN>::ModelCanonical ModelCanon;
@@ -764,9 +766,15 @@ void bglue(Storage *storage,
// actually, in the current implementation, partition_t is not used, but values are indeed hardcoded in 32 bits (the UF implementation uses a 64 bits hash table for internal stuff)
// We loop over sequences.
- /*for (it.first(); !it.isDone(); it.next())
+
+ /* // uncomment for non-dispatcher version
+ auto it = in->iterator();
+ for (it->first(); !it->isDone(); it->next())
{
- string seq = it->toString();*/
+ const string seq = (*it)->toString();
+ const string comment = (*it)->getComment();
+ */
+
auto createUF = [k, &modelCanon, \
&uf_mphf, &ufkmers, &hasher](const Sequence& sequence)
{
@@ -838,7 +846,7 @@ void bglue(Storage *storage,
if (debug_uf_stats) // for debugging
{
ufkmers.printStats("uf kmers");
- //ufkmers.dumpUF("uf.dump");
+ //ufkmers.dump("uf.dump");
logging("after computing UF stats");
}
@@ -934,9 +942,10 @@ void bglue(Storage *storage,
// partition the glue into many files, à la dsk
+ std::mutex mtx; // lock to avoid a nasty bug when calling output()
auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
&get_UFclass, &gluePartitions,
- &out, &outLock, &nb_seqs_in_partition, &out_id, nbGluePartitions]
+ &out, &outLock, &nb_seqs_in_partition, &out_id, nbGluePartitions, &mtx]
(const Sequence& sequence)
{
const string &seq = sequence.toString();
@@ -961,6 +970,10 @@ void bglue(Storage *storage,
const string abundances = comment.substr(3);
float mean_abundance = get_mean_abundance(abundances);
uint32_t sum_abundances = get_sum_abundance(abundances);
+
+ // for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
+ // anyway this fixes the problem, i'll understand it some other time.
+ std::lock_guard<std::mutex> lock(mtx);
output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance));
// km is not a standard GFA field so i'm putting it in lower case as per the spec
// maybe could optimize by writing to disk using queues, if that's ever a bottleneck
@@ -1014,7 +1027,6 @@ void bglue(Storage *storage,
// glue all partitions using a thread pool
ThreadPool pool(nb_threads);
- std::mutex mtx; // lock to avoid a nasty bug when calling output()
for (int partition = 0; partition < nbGluePartitions; partition++)
{
auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, ©_nb_seqs_in_partition,
=====================================
gatb-core/src/gatb/bcalm2/bglue_algo.hpp
=====================================
@@ -137,6 +137,7 @@ class UnbufferedFastaIterator
void bglue(gatb::core::tools::storage::impl::Storage* storage,
std::string prefix,
int kmerSize,
+ int nb_glue_partitions,
int nb_threads,
bool verbose
);
=====================================
gatb-core/src/gatb/bcalm2/ograph.cpp
=====================================
@@ -217,7 +217,7 @@ inline void graph3<span>::update_connected(kmerIndiceT<span> &ki)
{
if (ki.position == SEQ_LEFT)
connected_left[ki.indice] = true;
- else
+ if (ki.position == SEQ_RIGHT)
connected_right[ki.indice] = true;
}
@@ -249,25 +249,33 @@ void graph3<span>::debruijn(){
kL=left[iL];
kR=right[iR];
- if (debug_index > 0) if (kL.indice == debug_index || kR.indice == debug_index ) std::cout << " kl / kR " << kL.indice << " " << kR.indice << " " << kL.kmmer << " " << kR.kmmer << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << std::endl;
+ //~ std::cout << " kl / kR " << kL.indice << " " << kR.indice << " " << kL.kmmer << " " << kR.kmmer << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << std::endl;
if(kL.kmmer==kR.kmmer){
if (debug_index > 0) if (kL.indice == debug_index || kR.indice == debug_index ) std::cout << " identical, kl / kR " << kL.indice << " " << kR.indice << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << " positions " << kL.position << " " << kR.position << std::endl;
- update_connected(kL);
- update_connected(kR);
+ if(isNumber (unitigs[kL.indice][0])){
+ }
+ if(not kL.indice==kR.indice){
+ update_connected(kL);
+ update_connected(kR);
+ }
// found the same (k-1)-mer in the left and right array, it means that two sequences end with those and could be potentially compacted
bool go(true);
++iL;++iR;
if(left[iL].kmmer==kL.kmmer){
go=false;
- update_connected(left[iL]);
- while(left[++iL].kmmer<=kR.kmmer ){if(iL==sizeLeft){return;}}
+ if(not left[iL].indice==right[iR].indice){
+ update_connected(left[iL]);
+ }
+ while(left[++iL].kmmer<=kR.kmmer ){}
}
if(right[iR].kmmer==kL.kmmer){
go=false;
- update_connected(right[iR]);
- while(right[++iR].kmmer<=kL.kmmer ){if(iR==sizeRight){return;}}
+ if(not left[iL].indice==right[iR].indice){
+ update_connected(right[iR]);
+ }
+ while(right[++iR].kmmer<=kL.kmmer ){}
}
if(go){
compaction(kL.indice,kR.indice,kL.kmmer);
@@ -348,7 +356,7 @@ void graph3<span>::addtuple(tuple<string,uint,uint,uint>& tuple){
// input tuple: <unitigs string, left minimizer, right minimizer, abundance>
unitigs[indiceUnitigs]=get<0>(tuple);
unitigs_abundances[indiceUnitigs].push_back(get<3>(tuple));
-
+
bool debug = false;
string debug_kmer = "GTTTTTTAGATTCTGAGTGGAACGATGAATG";
=====================================
gatb-core/src/gatb/bcalm2/unionFind.hpp
=====================================
@@ -131,16 +131,52 @@ public:
std::cout << "raw space of UF hash data: " << ( 2*getNumKeys * sizeof(T) ) /1024/1024 << " MB" << std::endl; // 2x because each key of type T is associated to a value of type T
}
- // debug function
- void dumpUF(std::string file)
+ // normalize the UF first: the class id is the element of the smallest id
+ // added to make the UF deterministic when populated by multiple threads
+ // requires 3xUF memory while doing this operation, so i'm not enabling it by default
+ void normalize()
+ {
+ std::vector<uint32_t> smallest_elt_in_class(size());
+ std::vector<uint32_t> mDataMirror(size());
+ for (uint32_t i=0; i<size(); ++i)
+ {
+ smallest_elt_in_class[find(i)] = size();
+ mDataMirror[i] = mData[i];
+ }
+ for (uint32_t i=0; i<size(); ++i)
+ smallest_elt_in_class[find(i)] = std::min(smallest_elt_in_class[find(i)],i);
+ for (uint32_t i=0; i<size(); ++i)
+ //mData[i] = smallest_for_class[find(i)]; // this is fishy to modify mData at same time we do the find
+ mDataMirror[i] = smallest_elt_in_class[find(i)]; // this is fishy to modify mData at same time we do the find
+ for (uint32_t i=0; i<size(); ++i)
+ mData[i] = mDataMirror[i];
+ }
+
+ void dump(std::string file)
{
std::ofstream dumpfile;
dumpfile.open (file);
+ dumpfile << size() << std::endl;
for (uint32_t i=0; i<size(); ++i)
dumpfile << i << " " << mData[i] << std::endl;
dumpfile.close();
}
+ void load(std::string file)
+ {
+ std::ifstream dumpfile(file);
+ uint32_t uf_size;
+ dumpfile >> uf_size;
+ if (size() != uf_size) {std::cout << "error: loading a UF of the wrong size" << std::endl; exit(1);}
+ uint64_t osef, value;
+ for (uint32_t i=0; i<size(); ++i)
+ {
+ dumpfile >> osef >> value;
+ mData[i] = value;
+ }
+ }
+
+
mutable std::vector<std::atomic<uint64_t>> mData;
};
=====================================
gatb-core/src/gatb/debruijn/impl/Graph.cpp
=====================================
@@ -656,10 +656,11 @@ IOptionsParser* GraphTemplate<Node, Edge, GraphDataVariant>::getOptionsParser (b
OptionsParser* parserDebug = new OptionsParser ("debug ");
// those are only valid for GraphUnitigs, but GraphUnitigs doesn't have custom options (yet) so i'm adding here
+ parserDebug->push_front (new OptionOneParam ("-nb-glue-partitions", "number of glue partitions (automatically calculated by default)", false, "0"));
parserDebug->push_front (new OptionNoParam ("-skip-links", "same, but skip links"));
parserDebug->push_front (new OptionNoParam ("-redo-links", "same, but redo links"));
parserDebug->push_front (new OptionNoParam ("-skip-bglue", "same, but skip bglue"));
- parserDebug->push_front (new OptionNoParam ("-redo-bglue", "same, but redo bglue "));
+ parserDebug->push_front (new OptionNoParam ("-redo-bglue", "same, but redo bglue"));
parserDebug->push_front (new OptionNoParam ("-skip-bcalm", "same, but skip bcalm"));
parserDebug->push_front (new OptionNoParam ("-redo-bcalm", "debug function, redo the bcalm algo"));
@@ -3245,7 +3246,7 @@ struct queryAbundance_visitor : public boost::static_visitor<int> {
unsigned long hashIndex = getNodeIndex<span>(data, node);
if(hashIndex == ULLONG_MAX) return 0; // node was not found in the mphf
- unsigned char value = (*(data._abundance)).at(hashIndex);
+ int value = data._abundance->abundanceAt(hashIndex); // uses discretized abundance
return value;
}
=====================================
gatb-core/src/gatb/debruijn/impl/UnitigsConstructionAlgorithm.cpp
=====================================
@@ -102,13 +102,16 @@ void UnitigsConstructionAlgorithm<span>::execute ()
int minimizer_type =
getInput()->getInt(STR_MINIMIZER_TYPE);
bool verbose = getInput()->getInt(STR_VERBOSE);
+ int nb_glue_partitions = 0;
+ if (getInput()->get("-nb-glue-partitions"))
+ nb_glue_partitions = getInput()->getInt("-nb-glue-partitions");
unsigned int nbThreads = this->getDispatcher()->getExecutionUnitsNumber();
if ((unsigned int)nb_threads > nbThreads)
std::cout << "Uh. Unitigs graph construction called with nb_threads " << nb_threads << " but dispatcher has nbThreads " << nbThreads << std::endl;
if (do_bcalm) bcalm2<span>(&_storage, unitigs_filename, kmerSize, abundance, minimizerSize, nbThreads, minimizer_type, verbose);
- if (do_bglue) bglue<span> (&_storage, unitigs_filename, kmerSize, nbThreads, verbose);
+ if (do_bglue) bglue<span> (&_storage, unitigs_filename, kmerSize, nb_glue_partitions, nbThreads, verbose);
if (do_links) link_tigs<span>(unitigs_filename, kmerSize, nbThreads, nb_unitigs, verbose);
/** We gather some statistics. */
=====================================
gatb-core/src/gatb/kmer/impl/MPHFAlgorithm.cpp
=====================================
@@ -252,6 +252,7 @@ void MPHFAlgorithm<span,Abundance_t,NodeState_t>::populate ()
if (abundance > max_abundance_discrete)
{
_nb_abundances_above_precision++;
+ //std::cout << "found abundance larger than discrete: " << abundance << std::endl;
abundance = max_abundance_discrete;
}
=====================================
gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
=====================================
@@ -222,7 +222,7 @@ IOptionsParser* SortingCountAlgorithm<span>::getOptionsParser (bool mandatory)
parser->push_back (new OptionOneParam (STR_COMPRESS_LEVEL, "h5 compression level (0:none, 9:best)", false, "0"));
parser->push_back (new OptionOneParam (STR_STORAGE_TYPE, "storage type of kmer counts ('hdf5' or 'file')", false, "hdf5" ));
- IOptionsParser* devParser = new OptionsParser ("kmer count, algorithmic options");
+ IOptionsParser* devParser = new OptionsParser ("kmer count, advanced performance tweaks");
devParser->push_back (new OptionOneParam (STR_MINIMIZER_TYPE, "minimizer type (0=lexi, 1=freq)", false, "0"));
devParser->push_back (new OptionOneParam (STR_MINIMIZER_SIZE, "size of a minimizer", false, "10"));
=====================================
gatb-core/src/gatb/template/TemplateSpecialization10.cpp.in
=====================================
@@ -23,6 +23,7 @@ template void bcalm2<${KSIZE}>(Storage* storage,
template void bglue<${KSIZE}>(Storage* storage,
std::string prefix,
int kmerSize,
+ int nb_glue_partitions,
int nb_threads,
bool verbose
);
=====================================
gatb-core/src/gatb/tools/collections/impl/MapMPHF.hpp
=====================================
@@ -194,9 +194,8 @@ namespace gatb {
/** Get the value for a given key
* \param[in] key : the key
* \return the value associated to the key. */
- int operator[] (const Key& key) {
- return floorf((_abundanceDiscretization [data[hash(key)]] + _abundanceDiscretization [data[hash(key)]+1])/2.0);
- //return data[hash(key)];
+ Value& operator[] (const Key& key) {
+ return data[hash(key)];
}
/** Get the value for a given index
@@ -207,11 +206,17 @@ namespace gatb {
}
-
Value& at (const Key& key) {
return data[hash(key)];
}
+ int abundanceAt (const Key& key) {
+ return floorf((_abundanceDiscretization [data[hash(key)]] + _abundanceDiscretization [data[hash(key)]+1])/2.0);
+ }
+
+ int abundanceAt (typename Hash::Code code) {
+ return floorf((_abundanceDiscretization [data[code]] + _abundanceDiscretization [data[code]+1])/2.0);
+ }
/** Get the hash code of the given key. */
typename Hash::Code getCode (const Key& key) { return hash(key); }
=====================================
gatb-core/src/gatb/tools/compression/DnaCoder.cpp
=====================================
@@ -362,7 +362,10 @@ void DnaEncoder::execute(){
if(_readSize < _kmerSize){
encodeNoAnchorRead();
- smoothQuals();
+ if(! _leon->_isFasta)
+ {
+ smoothQuals();
+ }
endRead();
return;
}
=====================================
gatb-core/src/gatb/tools/misc/api/StringsRepository.hpp
=====================================
@@ -103,6 +103,7 @@ public:
const char* uri_output_tmp () { return "-out-tmp"; }
const char* verbose () { return "-verbose"; }
const char* help () { return "-help"; }
+ const char* help_short () { return "-h"; }
const char* version () { return "-version"; }
const char* bloom_type () { return "-bloom"; }
const char* debloom_type () { return "-debloom"; }
@@ -156,6 +157,7 @@ public:
#define STR_URI_OUTPUT_TMP gatb::core::tools::misc::StringRepository::singleton().uri_output_tmp ()
#define STR_VERBOSE gatb::core::tools::misc::StringRepository::singleton().verbose ()
#define STR_HELP gatb::core::tools::misc::StringRepository::singleton().help ()
+#define STR_HELP_SHORT gatb::core::tools::misc::StringRepository::singleton().help_short ()
#define STR_VERSION gatb::core::tools::misc::StringRepository::singleton().version ()
#define STR_BLOOM_TYPE gatb::core::tools::misc::StringRepository::singleton().bloom_type()
#define STR_DEBLOOM_TYPE gatb::core::tools::misc::StringRepository::singleton().debloom_type()
=====================================
gatb-core/test/unit/src/bcalm/TestBcalm.cpp
=====================================
@@ -74,7 +74,7 @@ public:
/********************************************************************************/
- void bcalm_test1 () // i wanna test de UF because it was buggy in multithreads
+ void bcalm_test1 () // i wanna test de UF because there was a weird behavior in bcalm. turns out: it wasn't the UF that was faulty, but let's keep this test.
{
int nb_uf_elts = 3000000;
unionFind<uint32_t> uf(nb_uf_elts);
@@ -90,9 +90,11 @@ public:
};
- //doJoins(0,nb_uf_elts/2);
- //doJoins(nb_uf_elts/2,nb_uf_elts-1);
- //
+ doJoins(nb_uf_elts/2,nb_uf_elts-1);
+ doJoins(0,nb_uf_elts/2);
+
+ // what if it's not a multithread bug but an order of operations bug, somehow
+ /* // threaded
std::thread first(doJoins,0,nb_uf_elts/3);
std::thread second(doJoins,nb_uf_elts/3,2*(nb_uf_elts/3));
std::thread third(doJoins,2*(nb_uf_elts/3),nb_uf_elts-1);
@@ -100,6 +102,7 @@ public:
first.join();
second.join();
third.join();
+ */
int foundclass = uf.find(0); // not always 0, depends which thread starts first
for (int i = 1; i < nb_uf_elts; i++)
=====================================
gatb-core/test/unit/src/debruijn/TestDebruijn.cpp
=====================================
@@ -87,6 +87,7 @@ class TestDebruijn : public Test
/********************************************************************************/
CPPUNIT_TEST_SUITE_GATB (TestDebruijn);
+ CPPUNIT_TEST_GATB (debruijn_large_abundance_query);
CPPUNIT_TEST_GATB (debruijn_test7);
CPPUNIT_TEST_GATB (debruijn_deletenode);
//CPPUNIT_TEST_GATB (debruijn_checksum); // FIXME removed it because it's a damn long test
@@ -1248,6 +1249,38 @@ public:
debruijn_deletenode2_fct (graph2);
}
+
+ /********************************************************************************/
+
+ /** */
+ void debruijn_large_abundance_query ()
+ {
+ const char* sequence = "TTGCTCACATGTTCTTTCCTGCGTTATCCCG";
+ char *bigseq= (char *)calloc(strlen(sequence)*1001,1);
+ bigseq[0]='\0';
+ for (int i = 0; i < 1000; i++)
+ strcat(bigseq,sequence);
+
+ size_t kmerSize = strlen (sequence);
+
+ // We create the graph.
+ Graph graph = Graph::create (new BankStrings (bigseq, 0), "-kmer-size %d -abundance-min 1 -verbose 0 -max-memory %d", kmerSize, MAX_MEMORY);
+
+ GraphIterator<Node> it = graph.iterator();
+
+ // debugging
+ /*for (it.first(); !it.isDone(); it.next())
+ {
+ //std::cout << graph.toString (it.item()) << " test printing node abundance " << it.item().abundance << std::endl;
+ }*/
+
+ // random access to nodes
+ Node node = graph.buildNode ((char*)sequence);
+ CPPUNIT_ASSERT (graph.toString(node) == sequence);
+ int abundance = graph.queryAbundance(node);
+ //std::cout << graph.toString(node) << " test printing node abundance " << abundance << " expected abundance:" << 1000 << std::endl;
+ CPPUNIT_ASSERT (abundance > 600 && abundance < 2000); // allow for imprecision
+ }
};
View it on GitLab: https://salsa.debian.org/med-team/gatb-core/commit/75cbd09b8831e9a75766136964ebe4f3a6566e6b
--
View it on GitLab: https://salsa.debian.org/med-team/gatb-core/commit/75cbd09b8831e9a75766136964ebe4f3a6566e6b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190122/9001751e/attachment-0001.html>
More information about the debian-med-commit
mailing list