[med-svn] [Git][med-team/seqwish][master] 5 commits: Cosmetics.
Steffen Möller (@moeller)
gitlab at salsa.debian.org
Mon Aug 30 20:08:45 BST 2021
Steffen Möller pushed to branch master at Debian Med / seqwish
Commits:
ee93fa0b by Steffen Moeller at 2021-08-30T18:25:38+02:00
Cosmetics.
- - - - -
4f04e125 by Steffen Moeller at 2021-08-30T18:26:01+02:00
routine-update: New upstream version
- - - - -
d133ab50 by Steffen Moeller at 2021-08-30T18:26:03+02:00
New upstream version 0.7.1
- - - - -
dbed67e1 by Steffen Moeller at 2021-08-30T18:26:07+02:00
Update upstream source from tag 'upstream/0.7.1'
Update to upstream version '0.7.1'
with Debian dir fad427772418b13715a7d6e42e850b1919e1753b
- - - - -
f70bd82e by Steffen Moeller at 2021-08-30T21:08:04+02:00
(cow-)builds.
- - - - -
13 changed files:
- .gitmodules
- .travis.yml
- CMakeLists.txt
- debian/changelog
- debian/control
- + debian/createmanpages
- + debian/manpages
- debian/patches/use_debian_packaged_libs.patch
- debian/rules
- + debian/seqwish.1
- src/alignments.cpp
- src/main.cpp
- src/seqindex.cpp
Changes:
=====================================
.gitmodules
=====================================
@@ -7,9 +7,6 @@
[submodule "deps/iitii"]
path = deps/iitii
url = https://github.com/ekg/iitii.git
-[submodule "deps/mmap_allocator"]
- path = deps/mmap_allocator
- url = https://github.com/ekg/mmap_allocator.git
[submodule "deps/sdsl-lite"]
path = deps/sdsl-lite
url = https://github.com/simongog/sdsl-lite.git
=====================================
.travis.yml
=====================================
@@ -8,7 +8,7 @@ before_install:
- git submodule update --init --recursive
- sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
- sudo apt-get update -qq
- - sudo apt-get install -qq gcc-9 g++-9
+ - sudo apt-get install -qq gcc-9 g++-9 libjemalloc-dev
- sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9
script:
- sed -i 's/CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -mcx16 -g/CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O -mcx16 -g -fsanitize=address/g' CMakeLists.txt
=====================================
CMakeLists.txt
=====================================
@@ -77,14 +77,6 @@ ExternalProject_Add(iitii
ExternalProject_Get_property(iitii SOURCE_DIR)
set(iitii_INCLUDE "${SOURCE_DIR}/src")
-ExternalProject_Add(mmap_allocator
- SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/mmap_allocator"
- CMAKE_ARGS "${CMAKE_ARGS};-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
- UPDATE_COMMAND "")
-ExternalProject_Get_property(mmap_allocator INSTALL_DIR)
-set(mmap_allocator_INCLUDE "${INSTALL_DIR}/include/mmap_allocator")
-set(mmap_allocator_LIB "${INSTALL_DIR}/lib")
-
# In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
ExternalProject_Add(ips4o
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/ips4o"
@@ -184,7 +176,6 @@ add_dependencies(seqwish sdsl-lite)
add_dependencies(seqwish gzipreader)
add_dependencies(seqwish mmmulti)
add_dependencies(seqwish iitii)
-add_dependencies(seqwish mmap_allocator)
add_dependencies(seqwish ips4o)
add_dependencies(seqwish bbhash)
add_dependencies(seqwish atomicbitvector)
@@ -200,7 +191,6 @@ target_include_directories(seqwish PUBLIC
"${ips4o_INCLUDE}"
"${mmmulti_INCLUDE}"
"${iitii_INCLUDE}"
- "${mmap_allocator_INCLUDE}"
"${bbhash_INCLUDE}"
"${atomicbitvector_INCLUDE}"
"${atomicqueue_INCLUDE}"
@@ -211,9 +201,9 @@ target_link_libraries(seqwish
"${sdsl-lite_LIB}/libsdsl.a"
"${sdsl-lite-divsufsort_LIB}/libdivsufsort.a"
"${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a"
- "${mmap_allocator_LIB}/libmmap_allocator.a"
"-latomic"
Threads::Threads
+ jemalloc
z)
if (BUILD_STATIC)
#set(CMAKE_EXE_LINKER_FLAGS "-static")
=====================================
debian/changelog
=====================================
@@ -1,11 +1,7 @@
-seqwish (0.7-1) UNRELEASED; urgency=medium
+seqwish (0.7.1-1) UNRELEASED; urgency=medium
[ Andreas Tille ]
* Initial release (Closes: #<bug>)
- TODO:
- [submodule "deps/gzip_reader"] -> missing
- path = deps/gzip_reader
- url = https://github.com/gatoravi/gzip_reader.git
[ routine-update ]
* Standards-Version: 4.5.1
@@ -15,4 +11,10 @@ seqwish (0.7-1) UNRELEASED; urgency=medium
* Set upstream metadata fields: Bug-Database, Bug-Submit, Repository,
Repository-Browse.
- -- Steffen Möller <moeller at debian.org> Sun, 20 Jun 2021 23:28:06 +0200
+ [ Steffen Moeller ]
+ * New upstream version
+ Missing dependencies:
+ ska
+ mio
+
+ -- Steffen Moeller <moeller at debian.org> Mon, 30 Aug 2021 18:26:01 +0200
=====================================
debian/control
=====================================
@@ -1,12 +1,14 @@
Source: seqwish
Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
+Uploaders: Andreas Tille <tille at debian.org>,
+ Steffen Moeller <moeller at debian.org>
Section: science
Priority: optional
Build-Depends: debhelper-compat (= 13),
cmake,
libsdsl-dev,
libargs-dev,
+ libatomicbitvector-dev,
libatomic-queue-dev,
libbbhash-dev,
libflathashmap-dev,
@@ -14,8 +16,11 @@ Build-Depends: debhelper-compat (= 13),
libiitii-dev,
libips4o-dev,
libmmmulti-dev,
- libparyfor-dev
-Standards-Version: 4.5.1
+ libparyfor-dev,
+ libomp-dev,
+ zlib1g-dev,
+ libjemalloc-dev
+Standards-Version: 4.6.0
Vcs-Browser: https://salsa.debian.org/med-team/seqwish
Vcs-Git: https://salsa.debian.org/med-team/seqwish.git
Homepage: https://github.com/ekg/seqwish
=====================================
debian/createmanpages
=====================================
@@ -0,0 +1,3 @@
+#!/bin/bash
+set -e
+help2man -n "$(grep Description debian/control | cut -f2 -d:)" --no-info --version-string=$(dpkg-parsechangelog | grep ^Version | cut -f2 -d' '| cut -f1 -d-) -s 1 --no-discard-stderr bin/seqwish > debian/seqwish.1
=====================================
debian/manpages
=====================================
@@ -0,0 +1 @@
+debian/seqwish.1
=====================================
debian/patches/use_debian_packaged_libs.patch
=====================================
@@ -45,8 +45,8 @@ Index: seqwish/CMakeLists.txt
ExternalProject_Get_property(gzipreader SOURCE_DIR)
set(gzipreader_INCLUDE "${SOURCE_DIR}")
+else()
-+message("Use Debian packaged libgzipreader")
-+message("=================================")
++message("Use Debian packaged libgzstream-dev, aka gzip_reader")
++message("====================================================")
+# FIXME: Not yet tested
+# add_custom_target(libgzipreader)
+endif()
@@ -70,22 +70,7 @@ Index: seqwish/CMakeLists.txt
# iitii, like cgranges but with interpolation and memory mapping
ExternalProject_Add(iitii
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/iitii"
-@@ -84,7 +113,14 @@ ExternalProject_Add(mmap_allocator
- ExternalProject_Get_property(mmap_allocator INSTALL_DIR)
- set(mmap_allocator_INCLUDE "${INSTALL_DIR}/include/mmap_allocator")
- set(mmap_allocator_LIB "${INSTALL_DIR}/lib")
-+else()
-+message("Use Debian packaged libiitii (implicitly injecting mmap_allocator)")
-+message("==================================================================")
-+# FIXME: Not yet tested
-+# add_custom_target(libiitii)
-+endif()
-
-+if(!DEBIAN_BUILD)
- # In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
- ExternalProject_Add(ips4o
- SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/ips4o"
-@@ -113,7 +149,9 @@ ExternalProject_Add(atomicbitvector
+@@ -105,7 +134,9 @@ ExternalProject_Add(atomicbitvector
CONFIGURE_COMMAND "")
ExternalProject_Get_property(atomicbitvector SOURCE_DIR)
set(atomicbitvector_INCLUDE "${SOURCE_DIR}/include")
@@ -95,16 +80,78 @@ Index: seqwish/CMakeLists.txt
# atomic queue library
ExternalProject_Add(atomicqueue
SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/atomic_queue"
-@@ -123,6 +161,12 @@ ExternalProject_Add(atomicqueue
+@@ -115,7 +146,14 @@ ExternalProject_Add(atomicqueue
CONFIGURE_COMMAND "")
ExternalProject_Get_property(atomicqueue SOURCE_DIR)
set(atomicqueue_INCLUDE "${SOURCE_DIR}")
+else()
+message("Use Debian packaged atomicqueue")
-+message("==================================================================")
++message("===============================")
+# FIXME: Not yet tested
+# add_custom_target(libatomic-queue)
+endif()
++if(!DEBIAN_BUILD)
# ska
ExternalProject_Add(ska
+ SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/flat_hash_map"
+@@ -125,7 +163,12 @@ ExternalProject_Add(ska
+ CONFIGURE_COMMAND "")
+ ExternalProject_Get_property(ska SOURCE_DIR)
+ set(ska_INCLUDE "${SOURCE_DIR}")
++else()
++message("Use Debian packaged flat_hash_map")
++message("=================================")
++endif()
+
++if(!DEBIAN_BUILD)
+ # paryfor parallel_for
+ ExternalProject_Add(paryfor
+ #GIT_REPOSITORY "https://github.com/ekg/paryfor"
+@@ -137,7 +180,12 @@ ExternalProject_Add(paryfor
+ CONFIGURE_COMMAND "")
+ ExternalProject_Get_property(paryfor SOURCE_DIR)
+ set(paryfor_INCLUDE "${SOURCE_DIR}")
++else()
++message("Use Debian packaged paryfor")
++message("===========================")
++endif()
+
++if(!DEBIAN_BUILD)
+ #add_subdirectory(deps/mmmulti/deps/mio)
+ ExternalProject_Add(mio
+ SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/mmmulti/deps/mio"
+@@ -147,6 +195,10 @@ ExternalProject_Add(mio
+ CONFIGURE_COMMAND "")
+ ExternalProject_Get_property(mio SOURCE_DIR)
+ set(mio_INCLUDE "${SOURCE_DIR}/include")
++else()
++message("Use Debian packaged mio")
++message("=======================")
++endif()
+
+ #set(CMAKE_BUILD_TYPE Debug)
+ set(CMAKE_BUILD_TYPE Release)
+@@ -198,16 +250,18 @@ target_include_directories(seqwish PUBLI
+ "${paryfor_INCLUDE}"
+ "${mio_INCLUDE}")
+ target_link_libraries(seqwish
+- "${sdsl-lite_LIB}/libsdsl.a"
+- "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a"
+- "${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a"
++ "-lsdsl"
++ "-ldivsufsort"
++ "-ldivsufsort64"
+ "-latomic"
++ "-lgzstream"
++ "-lgomp"
+ Threads::Threads
+ jemalloc
+ z)
+ if (BUILD_STATIC)
+ #set(CMAKE_EXE_LINKER_FLAGS "-static")
+- set(CMAKE_EXE_LINKER_FLAGS "-static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive")
++ set(CMAKE_EXE_LINKER_FLAGS "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive")
+ endif()
+
+ install(TARGETS seqwish DESTINATION bin)
=====================================
debian/rules
=====================================
@@ -1,21 +1,15 @@
#!/usr/bin/make -f
-# DH_VERBOSE := 1
+DH_VERBOSE := 1
export LC_ALL=C.UTF-8
include /usr/share/dpkg/default.mk
-# this provides:
-# DEB_SOURCE: the source package name
-# DEB_VERSION: the full version of the package (epoch + upstream vers. + revision)
-# DEB_VERSION_EPOCH_UPSTREAM: the package's version without the Debian revision
-# DEB_VERSION_UPSTREAM_REVISION: the package's version without the Debian epoch
-# DEB_VERSION_UPSTREAM: the package's upstream version
-# DEB_DISTRIBUTION: the distribution(s) listed in the current entry of debian/changelog
-# SOURCE_DATE_EPOCH: the source release date as seconds since the epoch, as
-# specified by <https://reproducible-builds.org/specs/source-date-epoch/>
-
-# for hardening you might like to uncomment this:
-# export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+
+#export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+
+export CFLAGS += -I/usr/include/atomic_queue -pthread
+export CXXFLAGS += -I/usr/include/atomic_queue -pthread
+export LDFLAGS += -pthread -lgomp
CMAKE_EXTRA_FLAGS += -DDEBIAN_BUILD=1
@@ -23,10 +17,14 @@ CMAKE_EXTRA_FLAGS += -DDEBIAN_BUILD=1
dh $@
override_dh_auto_configure:
- dh_auto_configure -- -DCMAKE_BUILD_TYPE=RelWithDebInfo $(CMAKE_EXTRA_FLAGS)
+ dh_auto_configure -- -DCMAKE_BUILD_TYPE=RelWithDebInfo $(CMAKE_EXTRA_FLAGS) || echo "I: Experienced error - ignored"
### When overriding auto_test make sure DEB_BUILD_OPTIONS will be respected
-#override_dh_auto_test:
-#ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-# do_stuff_for_testing
-#endif
+override_dh_auto_test:
+ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
+ dh_auto_test
+endif
+
+override_dh_auto_clean:
+ dh_auto_clean
+ rm -f bin/seqwish
=====================================
debian/seqwish.1
=====================================
@@ -0,0 +1,75 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.48.5.
+.TH SEQWISH "1" "August 2021" "seqwish 0.7.1" "User Commands"
+.SH NAME
+seqwish \- alignment to variation graph inducer
+.SH DESCRIPTION
+.IP
+bin/seqwish {OPTIONS}
+.IP
+seqwish: a variation graph inducer
+.IP
+OPTIONS:
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display this help menu
+.TP
+\fB\-p[FILE]\fR, \fB\-\-paf\-alns\fR=\fI\,[FILE]\/\fR
+Induce the graph from these PAF
+formatted alignments. Optionally, a list
+of filenames and minimum match lengths:
+[file_1]:[min_match_length_1],... This
+allows the differential filtering of
+short matches from some but not all
+inputs, in effect allowing `\-k` to be
+specified differently for each input.
+.TP
+\fB\-s[FILE]\fR, \fB\-\-seqs\fR=\fI\,[FILE]\/\fR
+The sequences used to generate the
+alignments (FASTA, FASTQ, .seq)
+.TP
+\fB\-b[BASE]\fR, \fB\-\-base\fR=\fI\,[BASE]\/\fR
+Build graph using this basename
+.TP
+\fB\-g[FILE]\fR, \fB\-\-gfa\fR=\fI\,[FILE]\/\fR
+Write the graph in GFA to FILE
+.TP
+\fB\-m[FILE]\fR, \fB\-\-match\-list\fR=\fI\,[FILE]\/\fR
+Use the sequence match list in FILE to
+subset the input alignments
+.TP
+\fB\-o[BASE]\fR, \fB\-\-vgp\-out\fR=\fI\,[BASE]\/\fR
+Write the graph in VGP format with
+basename FILE
+.TP
+\fB\-t[N]\fR, \fB\-\-threads\fR=\fI\,[N]\/\fR
+Use this many threads during parallel
+steps
+.TP
+\fB\-r[N]\fR, \fB\-\-repeat\-max\fR=\fI\,[N]\/\fR
+Limit transitive closure to include no
+more than N copies of a given input base
+.TP
+\fB\-l[N]\fR, \fB\-\-min\-repeat\-distance\fR=\fI\,[N]\/\fR
+Prevent transitive closure for bases at
+least this far apart in input sequences
+.TP
+\fB\-k[N]\fR, \fB\-\-min\-match\-len\fR=\fI\,[N]\/\fR
+Filter exact matches below this length.
+This can smooth the graph locally and
+prevent the formation of complex local
+graph topologies from forming due to
+differential alignments.
+.TP
+\fB\-B[N]\fR, \fB\-\-transclose\-batch\fR=\fI\,[N]\/\fR
+Number of bp to use for transitive
+closure batch (default 1M)
+.TP
+\fB\-T\fR, \fB\-\-keep\-temp\fR
+keep intermediate files generated during
+graph induction
+.TP
+\fB\-P\fR, \fB\-\-show\-progress\fR
+log algorithm progress
+.TP
+\fB\-V\fR, \fB\-\-verbose\-debug\fR
+enable verbose debugging
=====================================
src/alignments.cpp
=====================================
@@ -31,6 +31,7 @@ void paf_worker(
switch (c.op) {
case 'M':
case '=':
+ case 'X':
{
pos_t q_pos_match_start = q_pos;
pos_t t_pos_match_start = t_pos;
@@ -50,7 +51,10 @@ void paf_worker(
}
};
for (size_t i = 0; i < c.len; ++i) {
- if (seqidx.at_pos(q_pos) == seqidx.at_pos(t_pos)
+ char query_base = seqidx.at_pos(q_pos);
+ char target_base = seqidx.at_pos(t_pos);
+ if (query_base == target_base
+ && query_base != 'N'
&& offset(q_pos) != offset(t_pos)) { // guard against self mappings
if (match_len == 0) {
q_pos_match_start = q_pos;
@@ -71,10 +75,6 @@ void paf_worker(
add_match();
}
break;
- case 'X':
- incr_pos(q_pos, c.len);
- incr_pos(t_pos, c.len);
- break;
case 'I':
//std::cerr << "ins " << c.len << std::endl;
incr_pos(q_pos, c.len);
=====================================
src/main.cpp
=====================================
@@ -80,15 +80,23 @@ int main(int argc, char** argv) {
std::cerr << "[seqwish] ERROR: input alignment file " << args::get(paf_alns) << " does not exist" << std::endl;
return 4;
}else {
+ // Check if the first non-empty line has the CIGAR
+
igzstream paf_in(p.first.c_str());
- std::string line;
- std::getline(paf_in, line);
+ while (!paf_in.eof()) {
+ std::string line;
+ std::getline(paf_in, line);
+
+ if (!line.empty()) {
+ paf_row_t paf(line);
- paf_row_t paf(line);
- if (paf.cigar.empty()){
- std::cerr << "[seqwish] WARNING: input alignment file " << p.first << " does not have CIGAR strings. "
- << "The resulting graph will only represent the input sequences." << std::endl;
+ if (paf.cigar.empty()){
+ std::cerr << "[seqwish] WARNING: input alignment file " << p.first << " does not have CIGAR strings. "
+ << "The resulting graph will only represent the input sequences." << std::endl;
+ }
+ break;
+ }
}
}
}
=====================================
src/seqindex.cpp
=====================================
@@ -33,13 +33,11 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
}
size_t seq_bytes_written = 0;
size_t seq_names_bytes_written = 0;
+ bool notified_empty_seqs = false;
while (in.good()) {
- seqname_offset.push_back(seq_names_bytes_written);
- seq_offset.push_back(seq_bytes_written);
line[0] = '>';
- line = line.substr(0, line.find(" "));
- seqnames << line << " ";
- seq_names_bytes_written += line.size() + 1;
+ std::string seq_name = line.substr(0, line.find(" "));
+
std::string seq;
// get the sequence
if (input_is_fasta) {
@@ -52,16 +50,29 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
}
}
} else if (input_is_fastq) {
- std::getline(in, seq); // sequence
+ std::getline(in, seq); // sequence
std::getline(in, line); // delimiter
std::getline(in, line); // quality
std::getline(in, line);
}
- // force the sequence to be upper-case
- std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { return std::toupper(c); });
- seqout << seq;
- // record where the sequence starts
- seq_bytes_written += seq.size();
+ if (seq.empty()){
+ if (!notified_empty_seqs){
+ notified_empty_seqs = true;
+ std::cerr << "[seqwish] WARNING: input FASTA file contains empty sequences, which will be ignored." << std::endl;
+ }
+ } else {
+ seqname_offset.push_back(seq_names_bytes_written);
+ seq_offset.push_back(seq_bytes_written);
+
+ seqnames << seq_name << " ";
+ seq_names_bytes_written += seq_name.size() + 1;
+
+ // force the sequence to be upper-case
+ std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { return std::toupper(c); });
+ seqout << seq;
+ // record where the sequence starts
+ seq_bytes_written += seq.size();
+ }
}
in.close();
// add the last value so we can get sequence length for the last sequence and name
@@ -96,7 +107,7 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
std::remove(seqnamefile.c_str());
if (duplicated_ids){
- std::cerr << "[seqwish] ERROR: the input sequences have duplicated IDs." << std::endl;
+ std::cerr << "[seqwish] ERROR: input sequences have duplicated IDs." << std::endl;
exit(1);
}
View it on GitLab: https://salsa.debian.org/med-team/seqwish/-/compare/d566dc0d67b8cadf33b597fb124d6d2d97581b46...f70bd82ee3d16a977fb1215cf7da755cdb7570df
--
View it on GitLab: https://salsa.debian.org/med-team/seqwish/-/compare/d566dc0d67b8cadf33b597fb124d6d2d97581b46...f70bd82ee3d16a977fb1215cf7da755cdb7570df
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210830/80c14e0b/attachment-0001.htm>
More information about the debian-med-commit
mailing list