[med-svn] [Git][med-team/gatb-core][upstream] New upstream version 1.4.1+git20180206.6f8fce8+dfsg

Tue Jan 22 19:43:33 GMT 2019

Andreas Tille pushed to branch upstream at Debian Med / gatb-core


Commits:
75cbd09b by Andreas Tille at 2019-01-22T15:21:36Z
New upstream version 1.4.1+git20180206.6f8fce8+dfsg
- - - - -


20 changed files:

- − gatb-core/.gitignore
- gatb-core/CMakeLists.txt
- − gatb-core/doc/book/.gitignore
- gatb-core/src/gatb/bank/api/IBank.hpp
- gatb-core/src/gatb/bank/impl/AbstractBank.hpp
- gatb-core/src/gatb/bank/impl/BankComposite.hpp
- gatb-core/src/gatb/bcalm2/bglue_algo.cpp
- gatb-core/src/gatb/bcalm2/bglue_algo.hpp
- gatb-core/src/gatb/bcalm2/ograph.cpp
- gatb-core/src/gatb/bcalm2/unionFind.hpp
- gatb-core/src/gatb/debruijn/impl/Graph.cpp
- gatb-core/src/gatb/debruijn/impl/UnitigsConstructionAlgorithm.cpp
- gatb-core/src/gatb/kmer/impl/MPHFAlgorithm.cpp
- gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
- gatb-core/src/gatb/template/TemplateSpecialization10.cpp.in
- gatb-core/src/gatb/tools/collections/impl/MapMPHF.hpp
- gatb-core/src/gatb/tools/compression/DnaCoder.cpp
- gatb-core/src/gatb/tools/misc/api/StringsRepository.hpp
- gatb-core/test/unit/src/bcalm/TestBcalm.cpp
- gatb-core/test/unit/src/debruijn/TestDebruijn.cpp


Changes:

=====================================
gatb-core/.gitignore deleted
=====================================
@@ -1,7 +0,0 @@
-CMakeLists.txt.user
-/build*
-/.settings/
-/.cproject
-*.leon
-.DS_Store
-


=====================================
gatb-core/CMakeLists.txt
=====================================
@@ -274,8 +274,11 @@ IF (EXISTS "${PROJECT_SOURCE_DIR}/examples")
         ADD_SUBDIRECTORY(examples)
     ENDIF()
 ENDIF()
-# add example snippets into binary archive (use by CPack directive)
-INSTALL(DIRECTORY "${PROJECT_SOURCE_DIR}/examples/" DESTINATION "examples")
+
+IF (NOT DEFINED GATB_CORE_EXCLUDE_EXAMPLES)
+    # add example snippets into binary archive (use by CPack directive)
+    INSTALL(DIRECTORY "${PROJECT_SOURCE_DIR}/examples/" DESTINATION "examples")
+ENDIF()
 
 ################################################################################
 #  INSTALL 


=====================================
gatb-core/doc/book/.gitignore deleted
=====================================
@@ -1,3 +0,0 @@
-/main.aux
-/main.log
-/main.pdf


=====================================
gatb-core/src/gatb/bank/api/IBank.hpp
=====================================
@@ -92,6 +92,10 @@ public:
 	 * \return estimation of the number of sequences of sub bank i */
 	virtual int64_t estimateNbItemsBanki (int i) = 0;
 	
+	/** Return the vector of  sub IBank objects (in case of bank composite), or a vector containing only the bank itself
+	 * \return the IBank objects. */
+	virtual const std::vector<IBank*> getBanks() const  = 0;
+	
     /** \copydoc tools::collections::Iterable::iterator */
     virtual tools::dp::Iterator<Sequence>* iterator () = 0;
 


=====================================
gatb-core/src/gatb/bank/impl/AbstractBank.hpp
=====================================
@@ -29,6 +29,7 @@
 /********************************************************************************/
 
 #include <gatb/bank/api/IBank.hpp>
+#include <vector>
 
 /********************************************************************************/
 namespace gatb      {
@@ -58,7 +59,13 @@ public:
 	
 	int64_t estimateNbItemsBanki (int i)  { return this->estimateNbItems(); }
 
-	
+	/** \copydoc IBank::getBanks */
+	const std::vector<IBank*> getBanks() const  {
+		std::vector<IBank*> _banks;
+		_banks.push_back((IBank *)this);
+		return _banks;
+	};
+
 	
     /** \copydoc IBank::estimateNbItems */
     int64_t estimateNbItems ()


=====================================
gatb-core/src/gatb/bank/impl/BankComposite.hpp
=====================================
@@ -157,7 +157,7 @@ public:
 
     /** Return the vector of IBank objects.
      * \return the IBank objects. */
-    const std::vector<IBank*>& getBanks() const { return _banks; }
+    const std::vector<IBank*> getBanks() const { return _banks; }
 
     /** Get the number of referred banks.
      * \return the number of referred banks */


=====================================
gatb-core/src/gatb/bcalm2/bglue_algo.cpp
=====================================
@@ -16,7 +16,7 @@
 
 /*#include "ctpl_stl.h" // alternative to threadpool // https://github.com/vit-vit/CTPL/blob/master/ctpl_stl.h // didn't commit because didnt use
 #include "buffer_allocator.h" // memory pool from https://github.com/vincetse/allocator, didn't commit the files because didnt use
-#include "buffer_manager.h" // memory pool
+#include "buffer_manager.h" // same, memory pool from https://github.com/vincetse/allocator/blob/master/include/lazy/memory/
 */
 
 #include <gatb/tools/designpattern/impl/Command.hpp>
@@ -27,14 +27,8 @@
 #include <gatb/tools/storage/impl/Storage.hpp>
 #include <gatb/tools/storage/impl/StorageTools.hpp>
 
-#include <gatb/tools/math/NativeInt64.hpp>
-#include <gatb/tools/math/NativeInt128.hpp>
-#include <gatb/tools/math/LargeInt.hpp>
-
-#include <gatb/bank/impl/Banks.hpp>
 #include <gatb/bank/impl/Bank.hpp>
-#include <gatb/bank/impl/BankHelpers.hpp>
-#include <gatb/bank/impl/BankConverterAlgorithm.hpp>
+#include <gatb/bank/impl/Banks.hpp>
 
 #include <gatb/kmer/impl/Model.hpp>
 
@@ -658,6 +652,7 @@ template<size_t SPAN>
 void bglue(Storage *storage, 
         std::string prefix,
         int kmerSize, 
+        int nb_glue_partitions, 
         int nb_threads, 
         bool verbose
         )
@@ -673,12 +668,19 @@ void bglue(Storage *storage,
     bool debug_uf_stats = false; // formerly cmdline parameter
     bool only_uf = false; // idem
 
+    logging("Starting bglue with " + std::to_string( nb_threads) + " threads");
+
     //int nbGluePartitions=200; // no longer fixed 
     // autodetecting number of partitions
     int max_open_files = System::file().getMaxFilesNumber() / 2;
     int nbGluePartitions = std::min(2000, max_open_files); // ceil it at 2000 anyhow
 
-    logging("Starting bglue with " + std::to_string( nb_threads) + " threads");
+    if (nb_glue_partitions > 0)
+    {
+        nbGluePartitions = nb_glue_partitions;
+        logging("Using user-defined number of glue partitions: " + std::to_string( nb_glue_partitions));
+    }
+
 
     // create a hasher for UF
     typedef typename Kmer<SPAN>::ModelCanonical ModelCanon;
@@ -764,9 +766,15 @@ void bglue(Storage *storage,
     // actually, in the current implementation, partition_t is not used, but values are indeed hardcoded in 32 bits (the UF implementation uses a 64 bits hash table for internal stuff)
 
     // We loop over sequences.
-    /*for (it.first(); !it.isDone(); it.next())
+
+    /* // uncomment for non-dispatcher version
+    auto it = in->iterator();
+    for (it->first(); !it->isDone(); it->next())
     {
-        string seq = it->toString();*/
+        const string seq = (*it)->toString();
+        const string comment = (*it)->getComment();
+    */
+    
     auto createUF = [k, &modelCanon, \
         &uf_mphf, &ufkmers, &hasher](const Sequence& sequence)
     {
@@ -838,7 +846,7 @@ void bglue(Storage *storage,
     if (debug_uf_stats) // for debugging
     {
         ufkmers.printStats("uf kmers");
-        //ufkmers.dumpUF("uf.dump");
+        //ufkmers.dump("uf.dump");
         logging("after computing UF stats");
     }
 
@@ -934,9 +942,10 @@ void bglue(Storage *storage,
 
 
     // partition the glue into many files, à la dsk
+    std::mutex mtx; // lock to avoid a nasty bug when calling output()
     auto partitionGlue = [k, &modelCanon /* crashes if copied!*/, \
         &get_UFclass, &gluePartitions,
-        &out, &outLock, &nb_seqs_in_partition, &out_id, nbGluePartitions]
+        &out, &outLock, &nb_seqs_in_partition, &out_id, nbGluePartitions, &mtx]
             (const Sequence& sequence)
     {
         const string &seq = sequence.toString();
@@ -961,6 +970,10 @@ void bglue(Storage *storage,
             const string abundances = comment.substr(3);
             float mean_abundance = get_mean_abundance(abundances);
             uint32_t sum_abundances = get_sum_abundance(abundances);
+            
+            // for some reason i do need that lock_guard here.. even though output is itself lock guarded. maybe some lazyness in the evauation of the to_string(out_id++)? who kon
+            // anyway this fixes the problem, i'll understand it some other time.
+            std::lock_guard<std::mutex> lock(mtx);
             output(seq, out, std::to_string(out_id++) + " LN:i:" + to_string(seq.size()) + " KC:i:" + to_string(sum_abundances) + " km:f:" + to_string_with_precision(mean_abundance)); 
             // km is not a standard GFA field so i'm putting it in lower case as per the spec
             // maybe could optimize by writing to disk using queues, if that's ever a bottleneck
@@ -1014,7 +1027,6 @@ void bglue(Storage *storage,
 
     // glue all partitions using a thread pool
     ThreadPool pool(nb_threads);
-    std::mutex mtx; // lock to avoid a nasty bug when calling output()
     for (int partition = 0; partition < nbGluePartitions; partition++)
     {
         auto glue_partition = [&modelCanon, &ufkmers, partition, &gluePartition_prefix, nbGluePartitions, &copy_nb_seqs_in_partition,


=====================================
gatb-core/src/gatb/bcalm2/bglue_algo.hpp
=====================================
@@ -137,6 +137,7 @@ class UnbufferedFastaIterator
 void bglue(gatb::core::tools::storage::impl::Storage* storage, 
         std::string prefix,
         int kmerSize, 
+        int nb_glue_partitions, 
         int nb_threads, 
         bool verbose
         );


=====================================
gatb-core/src/gatb/bcalm2/ograph.cpp
=====================================
@@ -217,7 +217,7 @@ inline void graph3<span>::update_connected(kmerIndiceT<span> &ki)
 {
     if (ki.position == SEQ_LEFT)
         connected_left[ki.indice] = true;
-    else
+    if (ki.position == SEQ_RIGHT)
         connected_right[ki.indice] = true;
 }
 
@@ -249,25 +249,33 @@ void graph3<span>::debruijn(){
 		kL=left[iL];
 		kR=right[iR];
 
-        if (debug_index > 0) if (kL.indice == debug_index || kR.indice == debug_index ) std::cout << " kl / kR " << kL.indice << " " << kR.indice << " " << kL.kmmer << " " << kR.kmmer << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << std::endl;
+        //~ std::cout << " kl / kR " << kL.indice << " " << kR.indice << " " << kL.kmmer << " " << kR.kmmer << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << std::endl;
 
 		if(kL.kmmer==kR.kmmer){
             if (debug_index > 0) if (kL.indice == debug_index || kR.indice == debug_index ) std::cout << " identical, kl / kR " << kL.indice << " " << kR.indice << " unitigs " << unitigs[kL.indice] << " " << unitigs[kR.indice] << " positions "  << kL.position << " " << kR.position << std::endl;
-            update_connected(kL);
-            update_connected(kR);
+            if(isNumber (unitigs[kL.indice][0])){
+			}
+            if(not kL.indice==kR.indice){
+				update_connected(kL);
+				update_connected(kR);
+			}
 
             // found the same (k-1)-mer in the left and right array, it means that two sequences end with those and could be potentially compacted
 			bool go(true);
 			++iL;++iR;
 			if(left[iL].kmmer==kL.kmmer){
 				go=false;
-				update_connected(left[iL]);
-				while(left[++iL].kmmer<=kR.kmmer ){if(iL==sizeLeft){return;}}
+				if(not left[iL].indice==right[iR].indice){
+					update_connected(left[iL]);
+				}
+				while(left[++iL].kmmer<=kR.kmmer ){}
 			}
 			if(right[iR].kmmer==kL.kmmer){
 				go=false;
-				update_connected(right[iR]);
-				while(right[++iR].kmmer<=kL.kmmer ){if(iR==sizeRight){return;}}
+				if(not left[iL].indice==right[iR].indice){
+					update_connected(right[iR]);
+				}
+				while(right[++iR].kmmer<=kL.kmmer ){}
 			}
 			if(go){
 				compaction(kL.indice,kR.indice,kL.kmmer);
@@ -348,7 +356,7 @@ void graph3<span>::addtuple(tuple<string,uint,uint,uint>& tuple){
     // input tuple: <unitigs string, left minimizer, right minimizer, abundance>
 	unitigs[indiceUnitigs]=get<0>(tuple);
 	unitigs_abundances[indiceUnitigs].push_back(get<3>(tuple));
-    
+
     bool debug = false;
     string debug_kmer = "GTTTTTTAGATTCTGAGTGGAACGATGAATG";
 


=====================================
gatb-core/src/gatb/bcalm2/unionFind.hpp
=====================================
@@ -131,16 +131,52 @@ public:
         std::cout << "raw space of UF hash data: " << ( 2*getNumKeys * sizeof(T)  ) /1024/1024 << " MB" << std::endl; // 2x because each key of type T is associated to a value of type T
     }
 
-    // debug function
-    void dumpUF(std::string file)
+    // normalize the UF first: the class id is the element of the smallest id
+    // added to make the UF deterministic when populated by multiple threads
+    // requires 3xUF memory while doing this operation, so i'm not enabling it by default
+    void normalize()
+    {
+        std::vector<uint32_t> smallest_elt_in_class(size());
+        std::vector<uint32_t> mDataMirror(size());
+        for (uint32_t i=0; i<size(); ++i)
+        {
+            smallest_elt_in_class[find(i)] = size();
+            mDataMirror[i] = mData[i]; 
+        }
+        for (uint32_t i=0; i<size(); ++i)
+            smallest_elt_in_class[find(i)] = std::min(smallest_elt_in_class[find(i)],i);
+        for (uint32_t i=0; i<size(); ++i)
+            //mData[i] = smallest_for_class[find(i)]; // this is fishy to modify mData at same time we do the find
+            mDataMirror[i] = smallest_elt_in_class[find(i)]; // this is fishy to modify mData at same time we do the find
+        for (uint32_t i=0; i<size(); ++i)
+            mData[i] = mDataMirror[i];
+    }
+
+    void dump(std::string file)
     {
         std::ofstream dumpfile;
         dumpfile.open (file);
+        dumpfile << size()  << std::endl;
         for (uint32_t i=0; i<size(); ++i)
             dumpfile << i << " " << mData[i]  << std::endl;
         dumpfile.close();
     }
 
+    void load(std::string file)
+    {
+        std::ifstream dumpfile(file);
+        uint32_t uf_size;
+        dumpfile >> uf_size;
+        if (size() != uf_size) {std::cout << "error: loading a UF of the wrong size" << std::endl; exit(1);}
+        uint64_t osef, value;
+        for (uint32_t i=0; i<size(); ++i)
+        {
+            dumpfile >> osef >> value;
+            mData[i] = value;
+        }
+    }
+
+
 
     mutable std::vector<std::atomic<uint64_t>> mData;
 };


=====================================
gatb-core/src/gatb/debruijn/impl/Graph.cpp
=====================================
@@ -656,10 +656,11 @@ IOptionsParser* GraphTemplate<Node, Edge, GraphDataVariant>::getOptionsParser (b
     OptionsParser* parserDebug = new OptionsParser ("debug ");
 
     // those are only valid for GraphUnitigs, but GraphUnitigs doesn't have custom options (yet) so i'm adding here
+    parserDebug->push_front (new OptionOneParam ("-nb-glue-partitions",       "number of glue partitions (automatically calculated by default)", false, "0"));
     parserDebug->push_front (new OptionNoParam  ("-skip-links",       "same, but       skip     links"));
     parserDebug->push_front (new OptionNoParam  ("-redo-links",       "same, but       redo     links"));
     parserDebug->push_front (new OptionNoParam  ("-skip-bglue",       "same, but       skip     bglue"));
-    parserDebug->push_front (new OptionNoParam  ("-redo-bglue",       "same, but       redo     bglue     "));
+    parserDebug->push_front (new OptionNoParam  ("-redo-bglue",       "same, but       redo     bglue"));
     parserDebug->push_front (new OptionNoParam  ("-skip-bcalm",       "same, but       skip     bcalm"));
     parserDebug->push_front (new OptionNoParam  ("-redo-bcalm",       "debug function, redo the bcalm algo"));
 
@@ -3245,7 +3246,7 @@ struct queryAbundance_visitor : public boost::static_visitor<int>    {
         unsigned long hashIndex = getNodeIndex<span>(data, node);
     	if(hashIndex == ULLONG_MAX) return 0; // node was not found in the mphf 
 
-        unsigned char value = (*(data._abundance)).at(hashIndex);
+        int value = data._abundance->abundanceAt(hashIndex); // uses discretized abundance
 
         return value;
     }


=====================================
gatb-core/src/gatb/debruijn/impl/UnitigsConstructionAlgorithm.cpp
=====================================
@@ -102,13 +102,16 @@ void UnitigsConstructionAlgorithm<span>::execute ()
     int minimizer_type =
         getInput()->getInt(STR_MINIMIZER_TYPE);
     bool verbose = getInput()->getInt(STR_VERBOSE);
+    int nb_glue_partitions = 0;
+    if (getInput()->get("-nb-glue-partitions"))
+        nb_glue_partitions = getInput()->getInt("-nb-glue-partitions");
     
     unsigned int nbThreads = this->getDispatcher()->getExecutionUnitsNumber();
     if ((unsigned int)nb_threads > nbThreads)
         std::cout << "Uh. Unitigs graph construction called with nb_threads " << nb_threads << " but dispatcher has nbThreads " << nbThreads << std::endl;
 
     if (do_bcalm) bcalm2<span>(&_storage, unitigs_filename, kmerSize, abundance, minimizerSize, nbThreads, minimizer_type, verbose); 
-    if (do_bglue) bglue<span> (&_storage, unitigs_filename, kmerSize,                           nbThreads,                 verbose);
+    if (do_bglue) bglue<span> (&_storage, unitigs_filename, kmerSize, nb_glue_partitions,       nbThreads,                 verbose);
     if (do_links) link_tigs<span>(unitigs_filename, kmerSize, nbThreads, nb_unitigs, verbose);
 
     /** We gather some statistics. */


=====================================
gatb-core/src/gatb/kmer/impl/MPHFAlgorithm.cpp
=====================================
@@ -252,6 +252,7 @@ void MPHFAlgorithm<span,Abundance_t,NodeState_t>::populate ()
         if (abundance > max_abundance_discrete)
         {
             _nb_abundances_above_precision++;
+            //std::cout << "found abundance larger than discrete: " << abundance << std::endl;
             abundance = max_abundance_discrete;
         }
 


=====================================
gatb-core/src/gatb/kmer/impl/SortingCountAlgorithm.cpp
=====================================
@@ -222,7 +222,7 @@ IOptionsParser* SortingCountAlgorithm<span>::getOptionsParser (bool mandatory)
     parser->push_back (new OptionOneParam (STR_COMPRESS_LEVEL,    "h5 compression level (0:none, 9:best)",          false, "0"));
     parser->push_back (new OptionOneParam (STR_STORAGE_TYPE,      "storage type of kmer counts ('hdf5' or 'file')", false, "hdf5"  ));
 
-    IOptionsParser* devParser = new OptionsParser ("kmer count, algorithmic options");
+    IOptionsParser* devParser = new OptionsParser ("kmer count, advanced performance tweaks");
 
     devParser->push_back (new OptionOneParam (STR_MINIMIZER_TYPE,    "minimizer type (0=lexi, 1=freq)",                false, "0"));
     devParser->push_back (new OptionOneParam (STR_MINIMIZER_SIZE,    "size of a minimizer",                            false, "10"));


=====================================
gatb-core/src/gatb/template/TemplateSpecialization10.cpp.in
=====================================
@@ -23,6 +23,7 @@ template void bcalm2<${KSIZE}>(Storage* storage,
 template void bglue<${KSIZE}>(Storage* storage, 
         std::string prefix,
         int kmerSize, 
+        int nb_glue_partitions, 
         int nb_threads, 
         bool verbose
         );


=====================================
gatb-core/src/gatb/tools/collections/impl/MapMPHF.hpp
=====================================
@@ -194,9 +194,8 @@ namespace gatb        {
 						/** Get the value for a given key
 						 * \param[in] key : the key
 						 * \return the value associated to the key. */
-						int operator[] (const Key& key)  {
-							return floorf((_abundanceDiscretization [data[hash(key)]]  +  _abundanceDiscretization [data[hash(key)]+1])/2.0);
-							//return data[hash(key)];
+						Value& operator[] (const Key& key)  {
+							return data[hash(key)];
 						}
 						
 						/** Get the value for a given index
@@ -207,11 +206,17 @@ namespace gatb        {
 						
 						}
 						
-						
 						Value& at (const Key& key)  {
 							return data[hash(key)];
 						}
 						
+                        int abundanceAt (const Key& key)  {
+							return floorf((_abundanceDiscretization [data[hash(key)]]  +  _abundanceDiscretization [data[hash(key)]+1])/2.0);
+						}
+	
+                        int abundanceAt (typename Hash::Code code)  {
+							return floorf((_abundanceDiscretization [data[code]]  +  _abundanceDiscretization [data[code]+1])/2.0);
+						}
 						
 						/** Get the hash code of the given key. */
 						typename Hash::Code getCode (const Key& key) { return hash(key); }


=====================================
gatb-core/src/gatb/tools/compression/DnaCoder.cpp
=====================================
@@ -362,7 +362,10 @@ void DnaEncoder::execute(){
 	
 	if(_readSize < _kmerSize){
 		encodeNoAnchorRead();
-		smoothQuals();
+		if(! _leon->_isFasta)
+		{
+			smoothQuals();
+		}
 		endRead();
 		return;
 	}


=====================================
gatb-core/src/gatb/tools/misc/api/StringsRepository.hpp
=====================================
@@ -103,6 +103,7 @@ public:
     const char* uri_output_tmp ()  { return "-out-tmp";        }
     const char* verbose        ()  { return "-verbose";        }
     const char* help           ()  { return "-help";           }
+	const char* help_short     ()  { return "-h";              }
     const char* version        ()  { return "-version";        }
     const char* bloom_type     ()  { return "-bloom";          }
     const char* debloom_type   ()  { return "-debloom";        }
@@ -156,6 +157,7 @@ public:
 #define STR_URI_OUTPUT_TMP      gatb::core::tools::misc::StringRepository::singleton().uri_output_tmp ()
 #define STR_VERBOSE             gatb::core::tools::misc::StringRepository::singleton().verbose ()
 #define STR_HELP                gatb::core::tools::misc::StringRepository::singleton().help ()
+#define STR_HELP_SHORT          gatb::core::tools::misc::StringRepository::singleton().help_short ()
 #define STR_VERSION             gatb::core::tools::misc::StringRepository::singleton().version ()
 #define STR_BLOOM_TYPE          gatb::core::tools::misc::StringRepository::singleton().bloom_type()
 #define STR_DEBLOOM_TYPE        gatb::core::tools::misc::StringRepository::singleton().debloom_type()


=====================================
gatb-core/test/unit/src/bcalm/TestBcalm.cpp
=====================================
@@ -74,7 +74,7 @@ public:
 
 
     /********************************************************************************/
-    void bcalm_test1 () // i wanna test de UF because it was buggy in multithreads
+    void bcalm_test1 () // i wanna test de UF because there was a weird behavior in bcalm. turns out: it wasn't the UF that was faulty, but let's keep this test.
     {
         int nb_uf_elts = 3000000;
         unionFind<uint32_t> uf(nb_uf_elts);
@@ -90,9 +90,11 @@ public:
 
         };
 
-        //doJoins(0,nb_uf_elts/2);
-        //doJoins(nb_uf_elts/2,nb_uf_elts-1);
-        //
+        doJoins(nb_uf_elts/2,nb_uf_elts-1);
+        doJoins(0,nb_uf_elts/2);
+        
+        // what if it's not a multithread bug but an order of operations bug, somehow
+        /* // threaded
         std::thread first(doJoins,0,nb_uf_elts/3);
         std::thread second(doJoins,nb_uf_elts/3,2*(nb_uf_elts/3));
         std::thread third(doJoins,2*(nb_uf_elts/3),nb_uf_elts-1);
@@ -100,6 +102,7 @@ public:
         first.join();
         second.join();
         third.join();
+        */
 
         int foundclass = uf.find(0); // not always 0, depends which thread starts first
         for (int i = 1; i < nb_uf_elts; i++)


=====================================
gatb-core/test/unit/src/debruijn/TestDebruijn.cpp
=====================================
@@ -87,6 +87,7 @@ class TestDebruijn : public Test
     /********************************************************************************/
     CPPUNIT_TEST_SUITE_GATB (TestDebruijn);
 
+        CPPUNIT_TEST_GATB (debruijn_large_abundance_query);
         CPPUNIT_TEST_GATB (debruijn_test7); 
         CPPUNIT_TEST_GATB (debruijn_deletenode);
         //CPPUNIT_TEST_GATB (debruijn_checksum); // FIXME removed it because it's a damn long test
@@ -1248,6 +1249,38 @@ public:
         debruijn_deletenode2_fct (graph2);
     }
 
+    
+    /********************************************************************************/
+        
+    /** */
+    void debruijn_large_abundance_query ()
+    {
+        const char* sequence = "TTGCTCACATGTTCTTTCCTGCGTTATCCCG";
+        char *bigseq= (char *)calloc(strlen(sequence)*1001,1);
+        bigseq[0]='\0';
+        for (int i = 0; i < 1000; i++)
+            strcat(bigseq,sequence);
+
+        size_t kmerSize = strlen (sequence);
+
+        // We create the graph.
+        Graph graph = Graph::create (new BankStrings (bigseq, 0),  "-kmer-size %d  -abundance-min 1  -verbose 0 -max-memory %d", kmerSize, MAX_MEMORY);
+
+        GraphIterator<Node> it = graph.iterator();
+
+        // debugging
+        /*for (it.first(); !it.isDone(); it.next())
+        {
+            //std::cout << graph.toString (it.item()) << " test printing node abundance " << it.item().abundance << std::endl;
+        }*/
+
+        // random access to nodes
+        Node node = graph.buildNode ((char*)sequence);
+        CPPUNIT_ASSERT (graph.toString(node) == sequence);
+        int abundance = graph.queryAbundance(node);
+        //std::cout << graph.toString(node) << " test printing node abundance " << abundance << " expected abundance:" << 1000 << std::endl;
+        CPPUNIT_ASSERT (abundance > 600 && abundance < 2000); // allow for imprecision
+    }
 
 
 };



View it on GitLab: https://salsa.debian.org/med-team/gatb-core/commit/75cbd09b8831e9a75766136964ebe4f3a6566e6b

-- 
View it on GitLab: https://salsa.debian.org/med-team/gatb-core/commit/75cbd09b8831e9a75766136964ebe4f3a6566e6b
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190122/9001751e/attachment-0001.html>