[med-svn] [Git][med-team/megahit][upstream] New upstream version 1.2.9

Shayan Doust gitlab at salsa.debian.org
Fri Oct 25 13:20:39 BST 2019



Shayan Doust pushed to branch upstream at Debian Med / megahit


Commits:
f9cdfa4a by Shayan Doust at 2019-10-22T12:19:10Z
New upstream version 1.2.9
- - - - -


17 changed files:

- .travis.yml
- CHANGELOG.md
- CMakeLists.txt
- README.md
- + azure-pipelines.yml
- + codecov.yml
- src/assembly/low_depth_remover.h
- src/assembly/unitig_graph.cpp
- src/definitions.h
- src/localasm/local_assemble.cpp
- src/megahit
- src/sequence/io/binary_reader.h
- src/sequence/kmer.h
- src/sequence/sequence_package.h
- src/sorting/base_engine.cpp
- + test_data/empty.fa
- test_data/r4.fa


Changes:

=====================================
.travis.yml
=====================================
@@ -13,17 +13,4 @@ script:
   - sudo make install
   - megahit --test
   - megahit --test --kmin-1pass
-  - megahit --test --no-hw-accel
-after_success:
-  # Create lcov report
-  - wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
-  - tar zvxf lcov-1.14.tar.gz
-  - export PATH=lcov-1.14/bin/:${PATH}
-  - lcov --capture --directory . --output-file coverage.info
-  - lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
-  - lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
-  - lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
-  - lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
-  - lcov --list coverage.info # debug info
-  # Uploading report to CodeCov
-  - bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports"
\ No newline at end of file
+  - megahit --test --no-hw-accelo || echo "Codecov did not collect coverage reports"


=====================================
CHANGELOG.md
=====================================
@@ -1,3 +1,9 @@
+### 1.2.9 / 2019-10-13
+-   Fix segfault triggered by length-zero sequences
+-   Fix memory detection problem for some outdated MacOS versions
+-   Fix an incorrect assertion in unitig graph refreshing
+-   Added `--verbose` to output full log to the screen
+
 ### 1.2.8 / 2019-08-10
 -   Add intermediate `megahit_core_popcnt` for CPUs that have ABM but not BMI2
 -   Allow new assembly task with `--continue`


=====================================
CMakeLists.txt
=====================================
@@ -101,9 +101,11 @@ add_custom_target(
         simple_test
         COMMAND ./megahit --test -t 2
         COMMAND MEGAHIT_NUM_MERCY_FACTOR=1.5 ./megahit --test -t 4 --mem-flag 0 --no-hw-accel
-        COMMAND ./megahit --test -t 2 --kmin-1pass
-        COMMAND rm -rf test-random && python3 ../test_data/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
+        COMMAND ./megahit --test -t 2 --kmin-1pass --prune-level 3 --prune-depth 0
+        COMMAND rm -rf test-random && python3 ${TEST_DATA}/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
         COMMAND rm -rf test-fastg && ./megahit --test -t 2 --mem-flag 2 --keep-tmp-files -o test-fastg
+        COMMAND rm -rf test-empty && ./megahit -r ${TEST_DATA}/empty.fa -o test-empty
+        COMMAND rm -rf test-no-contig && ./megahit -r ${TEST_DATA}/r4.fa -o test-no-contig
         COMMAND ./megahit_toolkit contig2fastg 59 test-fastg/intermediate_contigs/k59.contigs.fa > 59.fastg
         COMMAND ./megahit_toolkit readstat < test-fastg/intermediate_contigs/k59.contigs.fa
 )


=====================================
README.md
=====================================
@@ -19,9 +19,9 @@ conda install -c bioconda megahit
 ### Pre-built binaries for x86_64 Linux
 
 ```sh
-wget https://github.com/voutcn/megahit/releases/download/v1.2.8/MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
-tar zvxf MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
-cd MEGAHIT-1.2.8-Linux-x86_64-static/bin/
+wget https://github.com/voutcn/megahit/releases/download/v1.2.9/MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
+tar zvxf MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
+cd MEGAHIT-1.2.9-Linux-x86_64-static/bin/
 ./megahit --test  # run on a toy dataset
 ./megahit -1 MY_PE_READ_1.fq.gz -2 MY_PE_READ_2.fq.gz -o MY_OUTPUT_DIR
 ```
@@ -82,4 +82,4 @@ Publications
 License
 -------
 
-This project is licensed under the GPLv3 License - see the [LICENSE](LICENSE) file for details
\ No newline at end of file
+This project is licensed under the GPLv3 License - see the [LICENSE](LICENSE) file for details


=====================================
azure-pipelines.yml
=====================================
@@ -0,0 +1,90 @@
+jobs:
+  - job: ubuntu_1604
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    strategy:
+      matrix:
+        python36:
+          python.version: '3.6'
+          build.type: 'Debug'
+          sanitizer: 'ON'
+          static: 'OFF'
+        Python27:
+          python.version: '2.7'
+          build.type: 'Release'
+          sanitizer: 'OFF'
+          static: 'ON'
+    steps:
+      - task: UsePythonVersion at 0
+        inputs:
+          versionSpec: '$(python.version)'
+          addToPath: true
+      - script: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DSTATIC_BUILD=$(static) ..
+          make simple_test -j `nproc`
+        displayName: 'build and test'
+
+  - job: macos
+    strategy:
+      matrix:
+        1013:
+          image: macos-10.13
+        latest:
+          image: macos-latest
+    pool:
+      vmImage: $(image)
+    steps:
+      - script: |
+          brew install cmake gcc at 9 zlib bzip2
+        displayName: 'install dependencies'
+      - script: |
+          mkdir build
+          cd build
+          CC=gcc-9 CXX=g++-9 cmake ..
+          make simple_test -j `sysctl -n hw.physicalcpu`
+        displayName: 'build and test'
+
+  - job: assembly
+    timeoutInMinutes: 0
+    strategy:
+      matrix:
+        codecov:
+          build.type: 'Release'
+          sanitizer: 'OFF'
+          coverage: 'ON'
+        sanitize:
+          build.type: 'Debug'
+          sanitizer: 'ON'
+          coverage: 'OFF'
+    pool:
+      vmImage: 'Ubuntu-16.04'
+    steps:
+      - script: |
+          mkdir build
+          cd build
+          cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DCOVERAGE=$(coverage) ..
+          make -j `nproc`
+          make simple_test
+          sudo make install
+        displayName: 'build and test'
+      - script: |
+          curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_1.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 1.fq.gz
+          curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_2.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 2.fq.gz
+          megahit --presets meta-large -1 1.fq.gz -2 2.fq.gz -m5e9 --verbose
+        displayName: 'assemble'
+      - script: |
+          if [ $(coverage) = 'ON' ]; then
+            wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
+            tar zvxf lcov-1.14.tar.gz
+            export PATH=lcov-1.14/bin/:${PATH}
+            lcov --capture --directory . --output-file coverage.info
+            lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
+            lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
+            lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
+            lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
+            lcov --list coverage.info # debug info
+            bash <(curl -s https://codecov.io/bash) -f coverage.info -t $(CODECOV_TOKEN) || echo "Codecov did not collect coverage reports"
+          fi
+        displayName: 'codecov'


=====================================
codecov.yml
=====================================
@@ -0,0 +1,8 @@
+coverage:
+  status:
+    patch:
+      default:
+        target: 0%
+    project:
+      default:
+        target: 0%
\ No newline at end of file


=====================================
src/assembly/low_depth_remover.h
=====================================
@@ -16,6 +16,5 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len,
 uint32_t IterateLocalLowDepth(UnitigGraph &graph, double min_depth,
                               uint32_t min_len, uint32_t local_width,
                               double local_ratio, bool permanent_rm = false);
-uint32_t RemoveLowDepth(UnitigGraph &graph, double min_depth);
 
 #endif  // MEGAHIT_LOW_DEPTH_REMOVER_H


=====================================
src/assembly/unitig_graph.cpp
=====================================
@@ -312,7 +312,6 @@ void UnitigGraph::Refresh(bool set_changed) {
       while (true) {
         next_adapter = NextSimplePathAdapter(next_adapter);
         assert(next_adapter.IsValid());
-        assert(!(next_adapter.GetFlag() & kDeleted));
         if (next_adapter.b() == adapter.b()) {
           break;
         }


=====================================
src/definitions.h
=====================================
@@ -25,7 +25,7 @@
 #include <stdint.h>
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "v1.2.8"
+#define PACKAGE_VERSION "v1.2.9"
 #endif
 
 #include "sdbg/sdbg_def.h"


=====================================
src/localasm/local_assemble.cpp
=====================================
@@ -224,9 +224,11 @@ void MapToContigs(const HashMapper &mapper,
 
 void AssembleAndOutput(const HashMapper &mapper, const SeqPackage &read_pkg,
                        MappingResultCollector &result_collector,
-                       const std::string &output_file, int32_t local_range,
+                       const std::string &output_file,
+                       const int32_t local_range,
                        const LocalAsmOption &opt) {
-  size_t min_num_reads = local_range / read_pkg.max_length();
+  const size_t min_num_reads = read_pkg.max_length() > 0 ?
+      local_range / read_pkg.max_length(): 1;
   xinfo("Minimum number of reads to do local assembly: {}\n", min_num_reads);
 
   Sequence seq, contig_end;


=====================================
src/megahit
=====================================
@@ -195,6 +195,7 @@ class Options:
         self.pe12 = []
         self.se = []
         self.presets = ''
+        self.verbose = False
 
     @property
     def log_file_name(self):
@@ -321,6 +322,7 @@ def parse_option(argv):
                                     'mem-flag=',
                                     'continue',
                                     'version',
+                                    'verbose',
                                     'out-prefix=',
                                     'presets=',
                                     'test',
@@ -398,6 +400,8 @@ def parse_option(argv):
         elif option in ('-v', '--version'):
             print(software_info.megahit_version)
             exit(0)
+        elif option == '--verbose':
+            opt.verbose = True
         elif option == '--continue':
             opt.continue_mode = True
         elif option == '--out-prefix':
@@ -591,11 +595,19 @@ def check_reads():
 
 
 def detect_available_mem():
-    psize = os.sysconf('SC_PAGE_SIZE')
-    pcount = os.sysconf('SC_PHYS_PAGES')
-    if psize < 0 or pcount < 0:
-        raise SystemError
-    return psize * pcount
+    try:
+        psize = os.sysconf('SC_PAGE_SIZE')
+        pcount = os.sysconf('SC_PHYS_PAGES')
+        if psize < 0 or pcount < 0:
+            raise SystemError
+        return psize * pcount
+    except ValueError:
+        if sys.platform.find("darwin") != -1:
+            return int(float(os.popen("sysctl hw.memsize").readlines()[0].split()[1]))
+        elif sys.platform.find("linux") != -1:
+            return int(float(os.popen("free").readlines()[1].split()[1]) * 1024)
+        else:
+            raise
 
 
 def cpu_dispatch():
@@ -926,6 +938,8 @@ def merge_final(final_k):
 
 
 def run_sub_command(cmd, msg, verbose=False):
+    if opt.verbose:
+        verbose = True
     logger.info(msg)
     logger.debug('command %s' % ' '.join(cmd))
 


=====================================
src/sequence/io/binary_reader.h
=====================================
@@ -12,7 +12,8 @@
 
 class BinaryReader : public BaseSequenceReader {
  public:
-  explicit BinaryReader(const std::string &filename) : is_(filename) {
+  explicit BinaryReader(const std::string &filename)
+      : is_(filename), buf_(120) {
     if (is_.bad()) {
       throw std::invalid_argument("Failed to open file " + filename);
     }
@@ -33,14 +34,14 @@ class BinaryReader : public BaseSequenceReader {
       if (buf_.size() < num_words) {
         buf_.resize(num_words);
       }
-      auto bytes_read = reader_.read(&buf_[0], num_words);
+      auto bytes_read = reader_.read(buf_.data(), num_words);
       assert(bytes_read == num_words * sizeof(buf_[0]));
       (void)(bytes_read);
 
       if (!reverse) {
-        pkg->AppendCompactSequence(&buf_[0], read_len);
+        pkg->AppendCompactSequence(buf_.data(), read_len);
       } else {
-        pkg->AppendReversedCompactSequence(&buf_[0], read_len);
+        pkg->AppendReversedCompactSequence(buf_.data(), read_len);
       }
 
       num_bases += read_len;


=====================================
src/sequence/kmer.h
=====================================
@@ -22,7 +22,10 @@ class Kmer {
   using word_type = TWord;
   static const unsigned kNumWords = NWords;
 
-  Kmer() { std::memset(data_, 0, sizeof(data_)); }
+  Kmer() {
+    static_assert(sizeof(*this) == sizeof(TWord) * NWords, "");
+    std::memset(data_, 0, sizeof(data_));
+  }
 
   Kmer(const Kmer &kmer) { std::memcpy(data_, kmer.data_, sizeof(data_)); }
 
@@ -214,7 +217,7 @@ class Kmer {
 
  private:
   word_type data_[kNumWords];
-} __attribute__((packed));
+};
 
 namespace std {
 template <const unsigned NumWords, typename T>


=====================================
src/sequence/sequence_package.h
=====================================
@@ -259,6 +259,12 @@ class SequencePackage {
   }
 
   void AppendStringSequence(const char *from, const char *to, unsigned len) {
+    if (len == 0) {
+      // Fake a sequence whose length is 1, as we need all sequences' length > 0
+      // to make `GetSeqID` working
+      auto fake_sequence = "A";
+      return AppendStringSequence(fake_sequence, fake_sequence + 1, 1);
+    }
     UpdateLength(len);
     std::ptrdiff_t step = from < to ? 1 : -1;
     for (auto ptr = from; ptr != to; ptr += step) {
@@ -267,7 +273,14 @@ class SequencePackage {
   }
 
   void AppendCompactSequence(const TWord *ptr, unsigned len, bool rev) {
+    if (len == 0) {
+      // Fake a sequence whose length is 1, as we need all sequences' length > 0
+      // to make `GetSeqID` working
+      TWord fake_sequence = 0;
+      return AppendCompactSequence(&fake_sequence, 1, false);
+    }
     UpdateLength(len);
+
     if (rev) {
       auto rptr = ptr + DivCeiling(len, kBasesPerWord) - 1;
       unsigned bases_in_last_word = len % kBasesPerWord;


=====================================
src/sorting/base_engine.cpp
=====================================
@@ -218,7 +218,8 @@ void BaseSequenceSortingEngine::Lv0PrepareThreadPartition() {
     int64_t average = meta_.num_sequences / n_threads_;
     meta.seq_from = t * average;
     meta.seq_to = t < n_threads_ - 1 ? (t + 1) * average : meta_.num_sequences;
-    meta.offset_base = Lv0EncodeDiffBase(meta.seq_from);
+    meta.offset_base = meta.seq_from < meta_.num_sequences ?
+        Lv0EncodeDiffBase(meta.seq_from) : std::numeric_limits<int64_t>::max();
   }
 
   for (unsigned i = 0; i < kNumBuckets; ++i) {


=====================================
test_data/empty.fa
=====================================


=====================================
test_data/r4.fa
=====================================
@@ -1,2 +1,4 @@
 >megahit_ref_example_42_498_1:0:0_2:0:0_12b/1
 GGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGGGGGGGCAGAGATGACGGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCG
+>1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN



View it on GitLab: https://salsa.debian.org/med-team/megahit/commit/f9cdfa4a0452326b2c9c514f866eaa85aacdd6e5

-- 
View it on GitLab: https://salsa.debian.org/med-team/megahit/commit/f9cdfa4a0452326b2c9c514f866eaa85aacdd6e5
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191025/3bf21c9a/attachment-0001.html>


More information about the debian-med-commit mailing list