[med-svn] [Git][med-team/megahit][upstream] New upstream version 1.2.9
Shayan Doust
gitlab at salsa.debian.org
Fri Oct 25 13:20:39 BST 2019
Shayan Doust pushed to branch upstream at Debian Med / megahit
Commits:
f9cdfa4a by Shayan Doust at 2019-10-22T12:19:10Z
New upstream version 1.2.9
- - - - -
17 changed files:
- .travis.yml
- CHANGELOG.md
- CMakeLists.txt
- README.md
- + azure-pipelines.yml
- + codecov.yml
- src/assembly/low_depth_remover.h
- src/assembly/unitig_graph.cpp
- src/definitions.h
- src/localasm/local_assemble.cpp
- src/megahit
- src/sequence/io/binary_reader.h
- src/sequence/kmer.h
- src/sequence/sequence_package.h
- src/sorting/base_engine.cpp
- + test_data/empty.fa
- test_data/r4.fa
Changes:
=====================================
.travis.yml
=====================================
@@ -13,17 +13,4 @@ script:
- sudo make install
- megahit --test
- megahit --test --kmin-1pass
- - megahit --test --no-hw-accel
-after_success:
- # Create lcov report
- - wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
- - tar zvxf lcov-1.14.tar.gz
- - export PATH=lcov-1.14/bin/:${PATH}
- - lcov --capture --directory . --output-file coverage.info
- - lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
- - lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
- - lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
- - lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
- - lcov --list coverage.info # debug info
- # Uploading report to CodeCov
- - bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports"
\ No newline at end of file
+ - megahit --test --no-hw-accelo || echo "Codecov did not collect coverage reports"
=====================================
CHANGELOG.md
=====================================
@@ -1,3 +1,9 @@
+### 1.2.9 / 2019-10-13
+- Fix segfault triggered by length-zero sequences
+- Fix memory detection problem for some outdated MacOS versions
+- Fix an incorrect assertion in unitig graph refreshing
+- Added `--verbose` to output full log to the screen
+
### 1.2.8 / 2019-08-10
- Add intermediate `megahit_core_popcnt` for CPUs that have ABM but not BMI2
- Allow new assembly task with `--continue`
=====================================
CMakeLists.txt
=====================================
@@ -101,9 +101,11 @@ add_custom_target(
simple_test
COMMAND ./megahit --test -t 2
COMMAND MEGAHIT_NUM_MERCY_FACTOR=1.5 ./megahit --test -t 4 --mem-flag 0 --no-hw-accel
- COMMAND ./megahit --test -t 2 --kmin-1pass
- COMMAND rm -rf test-random && python3 ../test_data/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
+ COMMAND ./megahit --test -t 2 --kmin-1pass --prune-level 3 --prune-depth 0
+ COMMAND rm -rf test-random && python3 ${TEST_DATA}/generate_random_fasta.py > random.fa && ./megahit -r random.fa --k-list 255 --min-count 1 -o test-random
COMMAND rm -rf test-fastg && ./megahit --test -t 2 --mem-flag 2 --keep-tmp-files -o test-fastg
+ COMMAND rm -rf test-empty && ./megahit -r ${TEST_DATA}/empty.fa -o test-empty
+ COMMAND rm -rf test-no-contig && ./megahit -r ${TEST_DATA}/r4.fa -o test-no-contig
COMMAND ./megahit_toolkit contig2fastg 59 test-fastg/intermediate_contigs/k59.contigs.fa > 59.fastg
COMMAND ./megahit_toolkit readstat < test-fastg/intermediate_contigs/k59.contigs.fa
)
=====================================
README.md
=====================================
@@ -19,9 +19,9 @@ conda install -c bioconda megahit
### Pre-built binaries for x86_64 Linux
```sh
-wget https://github.com/voutcn/megahit/releases/download/v1.2.8/MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
-tar zvxf MEGAHIT-1.2.8-Linux-x86_64-static.tar.gz
-cd MEGAHIT-1.2.8-Linux-x86_64-static/bin/
+wget https://github.com/voutcn/megahit/releases/download/v1.2.9/MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
+tar zvxf MEGAHIT-1.2.9-Linux-x86_64-static.tar.gz
+cd MEGAHIT-1.2.9-Linux-x86_64-static/bin/
./megahit --test # run on a toy dataset
./megahit -1 MY_PE_READ_1.fq.gz -2 MY_PE_READ_2.fq.gz -o MY_OUTPUT_DIR
```
@@ -82,4 +82,4 @@ Publications
License
-------
-This project is licensed under the GPLv3 License - see the [LICENSE](LICENSE) file for details
\ No newline at end of file
+This project is licensed under the GPLv3 License - see the [LICENSE](LICENSE) file for details
=====================================
azure-pipelines.yml
=====================================
@@ -0,0 +1,90 @@
+jobs:
+ - job: ubuntu_1604
+ pool:
+ vmImage: 'Ubuntu-16.04'
+ strategy:
+ matrix:
+ python36:
+ python.version: '3.6'
+ build.type: 'Debug'
+ sanitizer: 'ON'
+ static: 'OFF'
+ Python27:
+ python.version: '2.7'
+ build.type: 'Release'
+ sanitizer: 'OFF'
+ static: 'ON'
+ steps:
+ - task: UsePythonVersion at 0
+ inputs:
+ versionSpec: '$(python.version)'
+ addToPath: true
+ - script: |
+ mkdir build
+ cd build
+ cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DSTATIC_BUILD=$(static) ..
+ make simple_test -j `nproc`
+ displayName: 'build and test'
+
+ - job: macos
+ strategy:
+ matrix:
+ 1013:
+ image: macos-10.13
+ latest:
+ image: macos-latest
+ pool:
+ vmImage: $(image)
+ steps:
+ - script: |
+ brew install cmake gcc at 9 zlib bzip2
+ displayName: 'install dependencies'
+ - script: |
+ mkdir build
+ cd build
+ CC=gcc-9 CXX=g++-9 cmake ..
+ make simple_test -j `sysctl -n hw.physicalcpu`
+ displayName: 'build and test'
+
+ - job: assembly
+ timeoutInMinutes: 0
+ strategy:
+ matrix:
+ codecov:
+ build.type: 'Release'
+ sanitizer: 'OFF'
+ coverage: 'ON'
+ sanitize:
+ build.type: 'Debug'
+ sanitizer: 'ON'
+ coverage: 'OFF'
+ pool:
+ vmImage: 'Ubuntu-16.04'
+ steps:
+ - script: |
+ mkdir build
+ cd build
+ cmake -DCMAKE_BUILD_TYPE=$(build.type) -DSANITIZER=$(sanitizer) -DCOVERAGE=$(coverage) ..
+ make -j `nproc`
+ make simple_test
+ sudo make install
+ displayName: 'build and test'
+ - script: |
+ curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_1.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 1.fq.gz
+ curl -o- ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR752/007/SRR7521507/SRR7521507_2.fastq.gz | gzip -cd | head -4000000 | gzip -1 > 2.fq.gz
+ megahit --presets meta-large -1 1.fq.gz -2 2.fq.gz -m5e9 --verbose
+ displayName: 'assemble'
+ - script: |
+ if [ $(coverage) = 'ON' ]; then
+ wget http://downloads.sourceforge.net/ltp/lcov-1.14.tar.gz
+ tar zvxf lcov-1.14.tar.gz
+ export PATH=lcov-1.14/bin/:${PATH}
+ lcov --capture --directory . --output-file coverage.info
+ lcov --remove coverage.info '/usr/*' --output-file coverage.info # filter system-files
+ lcov --remove coverage.info '*xxhash/*' --output-file coverage.info # filter xxhash-files
+ lcov --remove coverage.info '*parallel_hashmap/*' --output-file coverage.info # filter parallel-hashmap-files
+ lcov --remove coverage.info '*pprintpp/*' --output-file coverage.info # filter pprintpp files
+ lcov --list coverage.info # debug info
+ bash <(curl -s https://codecov.io/bash) -f coverage.info -t $(CODECOV_TOKEN) || echo "Codecov did not collect coverage reports"
+ fi
+ displayName: 'codecov'
=====================================
codecov.yml
=====================================
@@ -0,0 +1,8 @@
+coverage:
+ status:
+ patch:
+ default:
+ target: 0%
+ project:
+ default:
+ target: 0%
\ No newline at end of file
=====================================
src/assembly/low_depth_remover.h
=====================================
@@ -16,6 +16,5 @@ bool RemoveLocalLowDepth(UnitigGraph &graph, double min_depth, uint32_t max_len,
uint32_t IterateLocalLowDepth(UnitigGraph &graph, double min_depth,
uint32_t min_len, uint32_t local_width,
double local_ratio, bool permanent_rm = false);
-uint32_t RemoveLowDepth(UnitigGraph &graph, double min_depth);
#endif // MEGAHIT_LOW_DEPTH_REMOVER_H
=====================================
src/assembly/unitig_graph.cpp
=====================================
@@ -312,7 +312,6 @@ void UnitigGraph::Refresh(bool set_changed) {
while (true) {
next_adapter = NextSimplePathAdapter(next_adapter);
assert(next_adapter.IsValid());
- assert(!(next_adapter.GetFlag() & kDeleted));
if (next_adapter.b() == adapter.b()) {
break;
}
=====================================
src/definitions.h
=====================================
@@ -25,7 +25,7 @@
#include <stdint.h>
#ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "v1.2.8"
+#define PACKAGE_VERSION "v1.2.9"
#endif
#include "sdbg/sdbg_def.h"
=====================================
src/localasm/local_assemble.cpp
=====================================
@@ -224,9 +224,11 @@ void MapToContigs(const HashMapper &mapper,
void AssembleAndOutput(const HashMapper &mapper, const SeqPackage &read_pkg,
MappingResultCollector &result_collector,
- const std::string &output_file, int32_t local_range,
+ const std::string &output_file,
+ const int32_t local_range,
const LocalAsmOption &opt) {
- size_t min_num_reads = local_range / read_pkg.max_length();
+ const size_t min_num_reads = read_pkg.max_length() > 0 ?
+ local_range / read_pkg.max_length(): 1;
xinfo("Minimum number of reads to do local assembly: {}\n", min_num_reads);
Sequence seq, contig_end;
=====================================
src/megahit
=====================================
@@ -195,6 +195,7 @@ class Options:
self.pe12 = []
self.se = []
self.presets = ''
+ self.verbose = False
@property
def log_file_name(self):
@@ -321,6 +322,7 @@ def parse_option(argv):
'mem-flag=',
'continue',
'version',
+ 'verbose',
'out-prefix=',
'presets=',
'test',
@@ -398,6 +400,8 @@ def parse_option(argv):
elif option in ('-v', '--version'):
print(software_info.megahit_version)
exit(0)
+ elif option == '--verbose':
+ opt.verbose = True
elif option == '--continue':
opt.continue_mode = True
elif option == '--out-prefix':
@@ -591,11 +595,19 @@ def check_reads():
def detect_available_mem():
- psize = os.sysconf('SC_PAGE_SIZE')
- pcount = os.sysconf('SC_PHYS_PAGES')
- if psize < 0 or pcount < 0:
- raise SystemError
- return psize * pcount
+ try:
+ psize = os.sysconf('SC_PAGE_SIZE')
+ pcount = os.sysconf('SC_PHYS_PAGES')
+ if psize < 0 or pcount < 0:
+ raise SystemError
+ return psize * pcount
+ except ValueError:
+ if sys.platform.find("darwin") != -1:
+ return int(float(os.popen("sysctl hw.memsize").readlines()[0].split()[1]))
+ elif sys.platform.find("linux") != -1:
+ return int(float(os.popen("free").readlines()[1].split()[1]) * 1024)
+ else:
+ raise
def cpu_dispatch():
@@ -926,6 +938,8 @@ def merge_final(final_k):
def run_sub_command(cmd, msg, verbose=False):
+ if opt.verbose:
+ verbose = True
logger.info(msg)
logger.debug('command %s' % ' '.join(cmd))
=====================================
src/sequence/io/binary_reader.h
=====================================
@@ -12,7 +12,8 @@
class BinaryReader : public BaseSequenceReader {
public:
- explicit BinaryReader(const std::string &filename) : is_(filename) {
+ explicit BinaryReader(const std::string &filename)
+ : is_(filename), buf_(120) {
if (is_.bad()) {
throw std::invalid_argument("Failed to open file " + filename);
}
@@ -33,14 +34,14 @@ class BinaryReader : public BaseSequenceReader {
if (buf_.size() < num_words) {
buf_.resize(num_words);
}
- auto bytes_read = reader_.read(&buf_[0], num_words);
+ auto bytes_read = reader_.read(buf_.data(), num_words);
assert(bytes_read == num_words * sizeof(buf_[0]));
(void)(bytes_read);
if (!reverse) {
- pkg->AppendCompactSequence(&buf_[0], read_len);
+ pkg->AppendCompactSequence(buf_.data(), read_len);
} else {
- pkg->AppendReversedCompactSequence(&buf_[0], read_len);
+ pkg->AppendReversedCompactSequence(buf_.data(), read_len);
}
num_bases += read_len;
=====================================
src/sequence/kmer.h
=====================================
@@ -22,7 +22,10 @@ class Kmer {
using word_type = TWord;
static const unsigned kNumWords = NWords;
- Kmer() { std::memset(data_, 0, sizeof(data_)); }
+ Kmer() {
+ static_assert(sizeof(*this) == sizeof(TWord) * NWords, "");
+ std::memset(data_, 0, sizeof(data_));
+ }
Kmer(const Kmer &kmer) { std::memcpy(data_, kmer.data_, sizeof(data_)); }
@@ -214,7 +217,7 @@ class Kmer {
private:
word_type data_[kNumWords];
-} __attribute__((packed));
+};
namespace std {
template <const unsigned NumWords, typename T>
=====================================
src/sequence/sequence_package.h
=====================================
@@ -259,6 +259,12 @@ class SequencePackage {
}
void AppendStringSequence(const char *from, const char *to, unsigned len) {
+ if (len == 0) {
+ // Fake a sequence whose length is 1, as we need all sequences' length > 0
+ // to make `GetSeqID` working
+ auto fake_sequence = "A";
+ return AppendStringSequence(fake_sequence, fake_sequence + 1, 1);
+ }
UpdateLength(len);
std::ptrdiff_t step = from < to ? 1 : -1;
for (auto ptr = from; ptr != to; ptr += step) {
@@ -267,7 +273,14 @@ class SequencePackage {
}
void AppendCompactSequence(const TWord *ptr, unsigned len, bool rev) {
+ if (len == 0) {
+ // Fake a sequence whose length is 1, as we need all sequences' length > 0
+ // to make `GetSeqID` working
+ TWord fake_sequence = 0;
+ return AppendCompactSequence(&fake_sequence, 1, false);
+ }
UpdateLength(len);
+
if (rev) {
auto rptr = ptr + DivCeiling(len, kBasesPerWord) - 1;
unsigned bases_in_last_word = len % kBasesPerWord;
=====================================
src/sorting/base_engine.cpp
=====================================
@@ -218,7 +218,8 @@ void BaseSequenceSortingEngine::Lv0PrepareThreadPartition() {
int64_t average = meta_.num_sequences / n_threads_;
meta.seq_from = t * average;
meta.seq_to = t < n_threads_ - 1 ? (t + 1) * average : meta_.num_sequences;
- meta.offset_base = Lv0EncodeDiffBase(meta.seq_from);
+ meta.offset_base = meta.seq_from < meta_.num_sequences ?
+ Lv0EncodeDiffBase(meta.seq_from) : std::numeric_limits<int64_t>::max();
}
for (unsigned i = 0; i < kNumBuckets; ++i) {
=====================================
test_data/empty.fa
=====================================
=====================================
test_data/r4.fa
=====================================
@@ -1,2 +1,4 @@
>megahit_ref_example_42_498_1:0:0_2:0:0_12b/1
GGTTTTTTCAATCATCGCCACCAGGTGGTTGGTGATTTTGGGGGGGGCAGAGATGACGGTGGCCACCTGCCCCTGCCTGGCATTGCTTTCCAGAATATCG
+>1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
View it on GitLab: https://salsa.debian.org/med-team/megahit/commit/f9cdfa4a0452326b2c9c514f866eaa85aacdd6e5
--
View it on GitLab: https://salsa.debian.org/med-team/megahit/commit/f9cdfa4a0452326b2c9c514f866eaa85aacdd6e5
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20191025/3bf21c9a/attachment-0001.html>
More information about the debian-med-commit
mailing list