[med-svn] [Git][med-team/seqwish][upstream] New upstream version 0.7.1
Steffen Möller (@moeller)
gitlab at salsa.debian.org
Mon Aug 30 20:08:51 BST 2021
Steffen Möller pushed to branch upstream at Debian Med / seqwish
Commits:
d133ab50 by Steffen Moeller at 2021-08-30T18:26:03+02:00
New upstream version 0.7.1
- - - - -
6 changed files:
- .gitmodules
- .travis.yml
- CMakeLists.txt
- src/alignments.cpp
- src/main.cpp
- src/seqindex.cpp
Changes:
=====================================
.gitmodules
=====================================
@@ -7,9 +7,6 @@
[submodule "deps/iitii"]
path = deps/iitii
url = https://github.com/ekg/iitii.git
-[submodule "deps/mmap_allocator"]
- path = deps/mmap_allocator
- url = https://github.com/ekg/mmap_allocator.git
[submodule "deps/sdsl-lite"]
path = deps/sdsl-lite
url = https://github.com/simongog/sdsl-lite.git
=====================================
.travis.yml
=====================================
@@ -8,7 +8,7 @@ before_install:
- git submodule update --init --recursive
- sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
- sudo apt-get update -qq
- - sudo apt-get install -qq gcc-9 g++-9
+ - sudo apt-get install -qq gcc-9 g++-9 libjemalloc-dev
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9
script:
- sed -i 's/CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -mcx16 -g/CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O -mcx16 -g -fsanitize=address/g' CMakeLists.txt
=====================================
CMakeLists.txt
=====================================
@@ -77,14 +77,6 @@ ExternalProject_Add(iitii
ExternalProject_Get_property(iitii SOURCE_DIR)
set(iitii_INCLUDE "${SOURCE_DIR}/src")
-ExternalProject_Add(mmap_allocator
- SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/mmap_allocator"
- CMAKE_ARGS "${CMAKE_ARGS};-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
- UPDATE_COMMAND "")
-ExternalProject_Get_property(mmap_allocator INSTALL_DIR)
-set(mmap_allocator_INCLUDE "${INSTALL_DIR}/include/mmap_allocator")
-set(mmap_allocator_LIB "${INSTALL_DIR}/lib")
-
# In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
ExternalProject_Add(ips4o
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/ips4o"
@@ -184,7 +176,6 @@ add_dependencies(seqwish sdsl-lite)
add_dependencies(seqwish gzipreader)
add_dependencies(seqwish mmmulti)
add_dependencies(seqwish iitii)
-add_dependencies(seqwish mmap_allocator)
add_dependencies(seqwish ips4o)
add_dependencies(seqwish bbhash)
add_dependencies(seqwish atomicbitvector)
@@ -200,7 +191,6 @@ target_include_directories(seqwish PUBLIC
"${ips4o_INCLUDE}"
"${mmmulti_INCLUDE}"
"${iitii_INCLUDE}"
- "${mmap_allocator_INCLUDE}"
"${bbhash_INCLUDE}"
"${atomicbitvector_INCLUDE}"
"${atomicqueue_INCLUDE}"
@@ -211,9 +201,9 @@ target_link_libraries(seqwish
"${sdsl-lite_LIB}/libsdsl.a"
"${sdsl-lite-divsufsort_LIB}/libdivsufsort.a"
"${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a"
- "${mmap_allocator_LIB}/libmmap_allocator.a"
"-latomic"
Threads::Threads
+ jemalloc
z)
if (BUILD_STATIC)
#set(CMAKE_EXE_LINKER_FLAGS "-static")
=====================================
src/alignments.cpp
=====================================
@@ -31,6 +31,7 @@ void paf_worker(
switch (c.op) {
case 'M':
case '=':
+ case 'X':
{
pos_t q_pos_match_start = q_pos;
pos_t t_pos_match_start = t_pos;
@@ -50,7 +51,10 @@ void paf_worker(
}
};
for (size_t i = 0; i < c.len; ++i) {
- if (seqidx.at_pos(q_pos) == seqidx.at_pos(t_pos)
+ char query_base = seqidx.at_pos(q_pos);
+ char target_base = seqidx.at_pos(t_pos);
+ if (query_base == target_base
+ && query_base != 'N'
&& offset(q_pos) != offset(t_pos)) { // guard against self mappings
if (match_len == 0) {
q_pos_match_start = q_pos;
@@ -71,10 +75,6 @@ void paf_worker(
add_match();
}
break;
- case 'X':
- incr_pos(q_pos, c.len);
- incr_pos(t_pos, c.len);
- break;
case 'I':
//std::cerr << "ins " << c.len << std::endl;
incr_pos(q_pos, c.len);
=====================================
src/main.cpp
=====================================
@@ -80,15 +80,23 @@ int main(int argc, char** argv) {
std::cerr << "[seqwish] ERROR: input alignment file " << args::get(paf_alns) << " does not exist" << std::endl;
return 4;
}else {
+ // Check if the first non-empty line has the CIGAR
+
igzstream paf_in(p.first.c_str());
- std::string line;
- std::getline(paf_in, line);
+ while (!paf_in.eof()) {
+ std::string line;
+ std::getline(paf_in, line);
+
+ if (!line.empty()) {
+ paf_row_t paf(line);
- paf_row_t paf(line);
- if (paf.cigar.empty()){
- std::cerr << "[seqwish] WARNING: input alignment file " << p.first << " does not have CIGAR strings. "
- << "The resulting graph will only represent the input sequences." << std::endl;
+ if (paf.cigar.empty()){
+ std::cerr << "[seqwish] WARNING: input alignment file " << p.first << " does not have CIGAR strings. "
+ << "The resulting graph will only represent the input sequences." << std::endl;
+ }
+ break;
+ }
}
}
}
=====================================
src/seqindex.cpp
=====================================
@@ -33,13 +33,11 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
}
size_t seq_bytes_written = 0;
size_t seq_names_bytes_written = 0;
+ bool notified_empty_seqs = false;
while (in.good()) {
- seqname_offset.push_back(seq_names_bytes_written);
- seq_offset.push_back(seq_bytes_written);
line[0] = '>';
- line = line.substr(0, line.find(" "));
- seqnames << line << " ";
- seq_names_bytes_written += line.size() + 1;
+ std::string seq_name = line.substr(0, line.find(" "));
+
std::string seq;
// get the sequence
if (input_is_fasta) {
@@ -52,16 +50,29 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
}
}
} else if (input_is_fastq) {
- std::getline(in, seq); // sequence
+ std::getline(in, seq); // sequence
std::getline(in, line); // delimiter
std::getline(in, line); // quality
std::getline(in, line);
}
- // force the sequence to be upper-case
- std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { return std::toupper(c); });
- seqout << seq;
- // record where the sequence starts
- seq_bytes_written += seq.size();
+ if (seq.empty()){
+ if (!notified_empty_seqs){
+ notified_empty_seqs = true;
+ std::cerr << "[seqwish] WARNING: input FASTA file contains empty sequences, which will be ignored." << std::endl;
+ }
+ } else {
+ seqname_offset.push_back(seq_names_bytes_written);
+ seq_offset.push_back(seq_bytes_written);
+
+ seqnames << seq_name << " ";
+ seq_names_bytes_written += seq_name.size() + 1;
+
+ // force the sequence to be upper-case
+ std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { return std::toupper(c); });
+ seqout << seq;
+ // record where the sequence starts
+ seq_bytes_written += seq.size();
+ }
}
in.close();
// add the last value so we can get sequence length for the last sequence and name
@@ -96,7 +107,7 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
std::remove(seqnamefile.c_str());
if (duplicated_ids){
- std::cerr << "[seqwish] ERROR: the input sequences have duplicated IDs." << std::endl;
+ std::cerr << "[seqwish] ERROR: input sequences have duplicated IDs." << std::endl;
exit(1);
}
View it on GitLab: https://salsa.debian.org/med-team/seqwish/-/commit/d133ab50e5795f04776d0da701b05c11bba69938
--
View it on GitLab: https://salsa.debian.org/med-team/seqwish/-/commit/d133ab50e5795f04776d0da701b05c11bba69938
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210830/4a77d6ca/attachment-0001.htm>
More information about the debian-med-commit
mailing list