[med-svn] [Git][med-team/seqwish][master] 5 commits: Cosmetics.

Steffen Möller (@moeller) gitlab at salsa.debian.org
Mon Aug 30 20:08:45 BST 2021



Steffen Möller pushed to branch master at Debian Med / seqwish


Commits:
ee93fa0b by Steffen Moeller at 2021-08-30T18:25:38+02:00
Cosmetics.

- - - - -
4f04e125 by Steffen Moeller at 2021-08-30T18:26:01+02:00
routine-update: New upstream version

- - - - -
d133ab50 by Steffen Moeller at 2021-08-30T18:26:03+02:00
New upstream version 0.7.1
- - - - -
dbed67e1 by Steffen Moeller at 2021-08-30T18:26:07+02:00
Update upstream source from tag 'upstream/0.7.1'

Update to upstream version '0.7.1'
with Debian dir fad427772418b13715a7d6e42e850b1919e1753b
- - - - -
f70bd82e by Steffen Moeller at 2021-08-30T21:08:04+02:00
(cow-)builds.

- - - - -


13 changed files:

- .gitmodules
- .travis.yml
- CMakeLists.txt
- debian/changelog
- debian/control
- + debian/createmanpages
- + debian/manpages
- debian/patches/use_debian_packaged_libs.patch
- debian/rules
- + debian/seqwish.1
- src/alignments.cpp
- src/main.cpp
- src/seqindex.cpp


Changes:

=====================================
.gitmodules
=====================================
@@ -7,9 +7,6 @@
 [submodule "deps/iitii"]
 	path = deps/iitii
 	url = https://github.com/ekg/iitii.git
-[submodule "deps/mmap_allocator"]
-	path = deps/mmap_allocator
-	url = https://github.com/ekg/mmap_allocator.git
 [submodule "deps/sdsl-lite"]
 	path = deps/sdsl-lite
 	url = https://github.com/simongog/sdsl-lite.git


=====================================
.travis.yml
=====================================
@@ -8,7 +8,7 @@ before_install:
   - git submodule update --init --recursive
   - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
   - sudo apt-get update -qq
-  - sudo apt-get install -qq gcc-9 g++-9
+  - sudo apt-get install -qq gcc-9 g++-9 libjemalloc-dev
   - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 60 --slave /usr/bin/g++ g++ /usr/bin/g++-9
 script:
   - sed -i 's/CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3 -mcx16 -g/CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O -mcx16 -g -fsanitize=address/g' CMakeLists.txt


=====================================
CMakeLists.txt
=====================================
@@ -77,14 +77,6 @@ ExternalProject_Add(iitii
 ExternalProject_Get_property(iitii SOURCE_DIR)
 set(iitii_INCLUDE "${SOURCE_DIR}/src")
 
-ExternalProject_Add(mmap_allocator
-  SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/mmap_allocator"
-  CMAKE_ARGS "${CMAKE_ARGS};-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>"
-  UPDATE_COMMAND "")
-ExternalProject_Get_property(mmap_allocator INSTALL_DIR)
-set(mmap_allocator_INCLUDE "${INSTALL_DIR}/include/mmap_allocator")
-set(mmap_allocator_LIB "${INSTALL_DIR}/lib")
-
 # In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
 ExternalProject_Add(ips4o
   SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/ips4o"
@@ -184,7 +176,6 @@ add_dependencies(seqwish sdsl-lite)
 add_dependencies(seqwish gzipreader)
 add_dependencies(seqwish mmmulti)
 add_dependencies(seqwish iitii)
-add_dependencies(seqwish mmap_allocator)
 add_dependencies(seqwish ips4o)
 add_dependencies(seqwish bbhash)
 add_dependencies(seqwish atomicbitvector)
@@ -200,7 +191,6 @@ target_include_directories(seqwish PUBLIC
   "${ips4o_INCLUDE}"
   "${mmmulti_INCLUDE}"
   "${iitii_INCLUDE}"
-  "${mmap_allocator_INCLUDE}"
   "${bbhash_INCLUDE}"
   "${atomicbitvector_INCLUDE}"
   "${atomicqueue_INCLUDE}"
@@ -211,9 +201,9 @@ target_link_libraries(seqwish
   "${sdsl-lite_LIB}/libsdsl.a"
   "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a"
   "${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a"
-  "${mmap_allocator_LIB}/libmmap_allocator.a"
   "-latomic"
   Threads::Threads
+  jemalloc
   z)
 if (BUILD_STATIC)
   #set(CMAKE_EXE_LINKER_FLAGS "-static")


=====================================
debian/changelog
=====================================
@@ -1,11 +1,7 @@
-seqwish (0.7-1) UNRELEASED; urgency=medium
+seqwish (0.7.1-1) UNRELEASED; urgency=medium
 
   [ Andreas Tille ]
   * Initial release (Closes: #<bug>)
-   TODO:
-   [submodule "deps/gzip_reader"] -> missing
-	  path = deps/gzip_reader
-	  url = https://github.com/gatoravi/gzip_reader.git
 
   [ routine-update ]
   * Standards-Version: 4.5.1
@@ -15,4 +11,10 @@ seqwish (0.7-1) UNRELEASED; urgency=medium
   * Set upstream metadata fields: Bug-Database, Bug-Submit, Repository,
     Repository-Browse.
 
- -- Steffen Möller <moeller at debian.org>  Sun, 20 Jun 2021 23:28:06 +0200
+  [ Steffen Moeller ]
+  * New upstream version
+    Missing dependencies:
+     ska
+     mio
+
+ -- Steffen Moeller <moeller at debian.org>  Mon, 30 Aug 2021 18:26:01 +0200


=====================================
debian/control
=====================================
@@ -1,12 +1,14 @@
 Source: seqwish
 Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
+Uploaders: Andreas Tille <tille at debian.org>,
+           Steffen Moeller <moeller at debian.org>
 Section: science
 Priority: optional
 Build-Depends: debhelper-compat (= 13),
                cmake,
                libsdsl-dev,
                libargs-dev,
+               libatomicbitvector-dev,
                libatomic-queue-dev,
                libbbhash-dev,
                libflathashmap-dev,
@@ -14,8 +16,11 @@ Build-Depends: debhelper-compat (= 13),
                libiitii-dev,
                libips4o-dev,
                libmmmulti-dev,
-               libparyfor-dev
-Standards-Version: 4.5.1
+               libparyfor-dev,
+               libomp-dev,
+               zlib1g-dev,
+               libjemalloc-dev
+Standards-Version: 4.6.0
 Vcs-Browser: https://salsa.debian.org/med-team/seqwish
 Vcs-Git: https://salsa.debian.org/med-team/seqwish.git
 Homepage: https://github.com/ekg/seqwish


=====================================
debian/createmanpages
=====================================
@@ -0,0 +1,3 @@
+#!/bin/bash
+set -e
+help2man -n "$(grep Description debian/control | cut -f2 -d:)" --no-info --version-string=$(dpkg-parsechangelog | grep ^Version | cut -f2 -d' '| cut -f1 -d-) -s 1 --no-discard-stderr bin/seqwish > debian/seqwish.1


=====================================
debian/manpages
=====================================
@@ -0,0 +1 @@
+debian/seqwish.1


=====================================
debian/patches/use_debian_packaged_libs.patch
=====================================
@@ -45,8 +45,8 @@ Index: seqwish/CMakeLists.txt
  ExternalProject_Get_property(gzipreader SOURCE_DIR)
  set(gzipreader_INCLUDE "${SOURCE_DIR}")
 +else()
-+message("Use Debian packaged libgzipreader")
-+message("=================================")
++message("Use Debian packaged libgzstream-dev, aka gzip_reader")
++message("====================================================")
 +# FIXME: Not yet tested
 +# add_custom_target(libgzipreader)
 +endif()
@@ -70,22 +70,7 @@ Index: seqwish/CMakeLists.txt
  # iitii, like cgranges but with interpolation and memory mapping
  ExternalProject_Add(iitii
    SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/iitii"
-@@ -84,7 +113,14 @@ ExternalProject_Add(mmap_allocator
- ExternalProject_Get_property(mmap_allocator INSTALL_DIR)
- set(mmap_allocator_INCLUDE "${INSTALL_DIR}/include/mmap_allocator")
- set(mmap_allocator_LIB "${INSTALL_DIR}/lib")
-+else()
-+message("Use Debian packaged libiitii (implicitly injecting mmap_allocator)")
-+message("==================================================================")
-+# FIXME: Not yet tested
-+# add_custom_target(libiitii)
-+endif()
- 
-+if(!DEBIAN_BUILD)
- # In-place Parallel Super Scalar Samplesort (IPS⁴o), header only
- ExternalProject_Add(ips4o
-   SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/ips4o"
-@@ -113,7 +149,9 @@ ExternalProject_Add(atomicbitvector
+@@ -105,7 +134,9 @@ ExternalProject_Add(atomicbitvector
    CONFIGURE_COMMAND "")
  ExternalProject_Get_property(atomicbitvector SOURCE_DIR)
  set(atomicbitvector_INCLUDE "${SOURCE_DIR}/include")
@@ -95,16 +80,78 @@ Index: seqwish/CMakeLists.txt
  # atomic queue library
  ExternalProject_Add(atomicqueue
    SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/atomic_queue"
-@@ -123,6 +161,12 @@ ExternalProject_Add(atomicqueue
+@@ -115,7 +146,14 @@ ExternalProject_Add(atomicqueue
    CONFIGURE_COMMAND "")
  ExternalProject_Get_property(atomicqueue SOURCE_DIR)
  set(atomicqueue_INCLUDE "${SOURCE_DIR}")
 +else()
 +message("Use Debian packaged atomicqueue")
-+message("==================================================================")
++message("===============================")
 +# FIXME: Not yet tested
 +# add_custom_target(libatomic-queue)
 +endif()
  
++if(!DEBIAN_BUILD)
  # ska
  ExternalProject_Add(ska
+   SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/flat_hash_map"
+@@ -125,7 +163,12 @@ ExternalProject_Add(ska
+   CONFIGURE_COMMAND "")
+ ExternalProject_Get_property(ska SOURCE_DIR)
+ set(ska_INCLUDE "${SOURCE_DIR}")
++else()
++message("Use Debian packaged flat_hash_map")
++message("=================================")
++endif()
+ 
++if(!DEBIAN_BUILD)
+ # paryfor parallel_for
+ ExternalProject_Add(paryfor
+   #GIT_REPOSITORY "https://github.com/ekg/paryfor"
+@@ -137,7 +180,12 @@ ExternalProject_Add(paryfor
+   CONFIGURE_COMMAND "")
+ ExternalProject_Get_property(paryfor SOURCE_DIR)
+ set(paryfor_INCLUDE "${SOURCE_DIR}")
++else()
++message("Use Debian packaged paryfor")
++message("===========================")
++endif()
+ 
++if(!DEBIAN_BUILD)
+ #add_subdirectory(deps/mmmulti/deps/mio)
+ ExternalProject_Add(mio
+         SOURCE_DIR "${CMAKE_SOURCE_DIR}/deps/mmmulti/deps/mio"
+@@ -147,6 +195,10 @@ ExternalProject_Add(mio
+         CONFIGURE_COMMAND "")
+ ExternalProject_Get_property(mio SOURCE_DIR)
+ set(mio_INCLUDE "${SOURCE_DIR}/include")
++else()
++message("Use Debian packaged mio")
++message("=======================")
++endif()
+ 
+ #set(CMAKE_BUILD_TYPE Debug)
+ set(CMAKE_BUILD_TYPE Release)
+@@ -198,16 +250,18 @@ target_include_directories(seqwish PUBLI
+   "${paryfor_INCLUDE}"
+   "${mio_INCLUDE}")
+ target_link_libraries(seqwish
+-  "${sdsl-lite_LIB}/libsdsl.a"
+-  "${sdsl-lite-divsufsort_LIB}/libdivsufsort.a"
+-  "${sdsl-lite-divsufsort_LIB}/libdivsufsort64.a"
++  "-lsdsl"
++  "-ldivsufsort"
++  "-ldivsufsort64"
+   "-latomic"
++  "-lgzstream"
++  "-lgomp"
+   Threads::Threads
+   jemalloc
+   z)
+ if (BUILD_STATIC)
+   #set(CMAKE_EXE_LINKER_FLAGS "-static")
+-  set(CMAKE_EXE_LINKER_FLAGS "-static -Wl,--whole-archive -lpthread -Wl,--no-whole-archive")
++  set(CMAKE_EXE_LINKER_FLAGS "-Wl,--whole-archive -lpthread -Wl,--no-whole-archive")
+ endif()
+ 
+ install(TARGETS seqwish DESTINATION bin)


=====================================
debian/rules
=====================================
@@ -1,21 +1,15 @@
 #!/usr/bin/make -f
 
-# DH_VERBOSE := 1
+DH_VERBOSE := 1
 export LC_ALL=C.UTF-8
 
 include /usr/share/dpkg/default.mk
-# this provides:
-# DEB_SOURCE: the source package name
-# DEB_VERSION: the full version of the package (epoch + upstream vers. + revision)
-# DEB_VERSION_EPOCH_UPSTREAM: the package's version without the Debian revision
-# DEB_VERSION_UPSTREAM_REVISION: the package's version without the Debian epoch
-# DEB_VERSION_UPSTREAM: the package's upstream version
-# DEB_DISTRIBUTION: the distribution(s) listed in the current entry of debian/changelog
-# SOURCE_DATE_EPOCH: the source release date as seconds since the epoch, as
-#                    specified by <https://reproducible-builds.org/specs/source-date-epoch/>
-
-# for hardening you might like to uncomment this:
-# export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+
+#export DEB_BUILD_MAINT_OPTIONS=hardening=+all
+
+export CFLAGS += -I/usr/include/atomic_queue -pthread 
+export CXXFLAGS += -I/usr/include/atomic_queue -pthread 
+export LDFLAGS += -pthread -lgomp
 
 CMAKE_EXTRA_FLAGS += -DDEBIAN_BUILD=1
 
@@ -23,10 +17,14 @@ CMAKE_EXTRA_FLAGS += -DDEBIAN_BUILD=1
 	dh $@
 
 override_dh_auto_configure:
-	dh_auto_configure -- -DCMAKE_BUILD_TYPE=RelWithDebInfo $(CMAKE_EXTRA_FLAGS)
+	dh_auto_configure -- -DCMAKE_BUILD_TYPE=RelWithDebInfo $(CMAKE_EXTRA_FLAGS) || echo "I: Experienced error - ignored"
 
 ### When overriding auto_test make sure DEB_BUILD_OPTIONS will be respected
-#override_dh_auto_test:
-#ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-#	do_stuff_for_testing
-#endif
+override_dh_auto_test:
+ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
+	dh_auto_test
+endif
+
+override_dh_auto_clean:
+	dh_auto_clean
+	rm -f bin/seqwish


=====================================
debian/seqwish.1
=====================================
@@ -0,0 +1,75 @@
+.\" DO NOT MODIFY THIS FILE!  It was generated by help2man 1.48.5.
+.TH SEQWISH "1" "August 2021" "seqwish 0.7.1" "User Commands"
+.SH NAME
+seqwish \-  alignment to variation graph inducer
+.SH DESCRIPTION
+.IP
+bin/seqwish {OPTIONS}
+.IP
+seqwish: a variation graph inducer
+.IP
+OPTIONS:
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display this help menu
+.TP
+\fB\-p[FILE]\fR, \fB\-\-paf\-alns\fR=\fI\,[FILE]\/\fR
+Induce the graph from these PAF
+formatted alignments. Optionally, a list
+of filenames and minimum match lengths:
+[file_1]:[min_match_length_1],... This
+allows the differential filtering of
+short matches from some but not all
+inputs, in effect allowing `\-k` to be
+specified differently for each input.
+.TP
+\fB\-s[FILE]\fR, \fB\-\-seqs\fR=\fI\,[FILE]\/\fR
+The sequences used to generate the
+alignments (FASTA, FASTQ, .seq)
+.TP
+\fB\-b[BASE]\fR, \fB\-\-base\fR=\fI\,[BASE]\/\fR
+Build graph using this basename
+.TP
+\fB\-g[FILE]\fR, \fB\-\-gfa\fR=\fI\,[FILE]\/\fR
+Write the graph in GFA to FILE
+.TP
+\fB\-m[FILE]\fR, \fB\-\-match\-list\fR=\fI\,[FILE]\/\fR
+Use the sequence match list in FILE to
+subset the input alignments
+.TP
+\fB\-o[BASE]\fR, \fB\-\-vgp\-out\fR=\fI\,[BASE]\/\fR
+Write the graph in VGP format with
+basename FILE
+.TP
+\fB\-t[N]\fR, \fB\-\-threads\fR=\fI\,[N]\/\fR
+Use this many threads during parallel
+steps
+.TP
+\fB\-r[N]\fR, \fB\-\-repeat\-max\fR=\fI\,[N]\/\fR
+Limit transitive closure to include no
+more than N copies of a given input base
+.TP
+\fB\-l[N]\fR, \fB\-\-min\-repeat\-distance\fR=\fI\,[N]\/\fR
+Prevent transitive closure for bases at
+least this far apart in input sequences
+.TP
+\fB\-k[N]\fR, \fB\-\-min\-match\-len\fR=\fI\,[N]\/\fR
+Filter exact matches below this length.
+This can smooth the graph locally and
+prevent the formation of complex local
+graph topologies from forming due to
+differential alignments.
+.TP
+\fB\-B[N]\fR, \fB\-\-transclose\-batch\fR=\fI\,[N]\/\fR
+Number of bp to use for transitive
+closure batch (default 1M)
+.TP
+\fB\-T\fR, \fB\-\-keep\-temp\fR
+keep intermediate files generated during
+graph induction
+.TP
+\fB\-P\fR, \fB\-\-show\-progress\fR
+log algorithm progress
+.TP
+\fB\-V\fR, \fB\-\-verbose\-debug\fR
+enable verbose debugging


=====================================
src/alignments.cpp
=====================================
@@ -31,6 +31,7 @@ void paf_worker(
             switch (c.op) {
             case 'M':
             case '=':
+            case 'X':
             {
                 pos_t q_pos_match_start = q_pos;
                 pos_t t_pos_match_start = t_pos;
@@ -50,7 +51,10 @@ void paf_worker(
                         }
                     };
                 for (size_t i = 0; i < c.len; ++i) {
-                    if (seqidx.at_pos(q_pos) == seqidx.at_pos(t_pos)
+                    char query_base = seqidx.at_pos(q_pos);
+                    char target_base = seqidx.at_pos(t_pos);
+                    if (query_base == target_base
+                        && query_base != 'N'
                         && offset(q_pos) != offset(t_pos)) { // guard against self mappings
                         if (match_len == 0) {
                             q_pos_match_start = q_pos;
@@ -71,10 +75,6 @@ void paf_worker(
                 add_match();
             }
                 break;
-            case 'X':
-                incr_pos(q_pos, c.len);
-                incr_pos(t_pos, c.len);
-                break;
             case 'I':
                 //std::cerr << "ins " << c.len << std::endl;
                 incr_pos(q_pos, c.len);


=====================================
src/main.cpp
=====================================
@@ -80,15 +80,23 @@ int main(int argc, char** argv) {
                 std::cerr << "[seqwish] ERROR: input alignment file " << args::get(paf_alns) << " does not exist" << std::endl;
                 return 4;
             }else {
+                 // Check if the first non-empty line has the CIGAR
+
                 igzstream paf_in(p.first.c_str());
 
-                std::string line;
-                std::getline(paf_in, line);
+                while (!paf_in.eof()) {
+                    std::string line;
+                    std::getline(paf_in, line);
+
+                    if (!line.empty()) {
+                        paf_row_t paf(line);
 
-                paf_row_t paf(line);
-                if (paf.cigar.empty()){
-                    std::cerr << "[seqwish] WARNING: input alignment file " << p.first << " does not have CIGAR strings. "
-                    << "The resulting graph will only represent the input sequences." << std::endl;
+                        if (paf.cigar.empty()){
+                            std::cerr << "[seqwish] WARNING: input alignment file " << p.first << " does not have CIGAR strings. "
+                                      << "The resulting graph will only represent the input sequences." << std::endl;
+                        }
+                        break;
+                    }
                 }
             }
         }


=====================================
src/seqindex.cpp
=====================================
@@ -33,13 +33,11 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
     }
     size_t seq_bytes_written = 0;
     size_t seq_names_bytes_written = 0;
+    bool notified_empty_seqs = false;
     while (in.good()) {
-        seqname_offset.push_back(seq_names_bytes_written);
-        seq_offset.push_back(seq_bytes_written);
         line[0] = '>';
-        line = line.substr(0, line.find(" "));
-        seqnames << line << " ";
-        seq_names_bytes_written += line.size() + 1;
+        std::string seq_name = line.substr(0, line.find(" "));
+
         std::string seq;
         // get the sequence
         if (input_is_fasta) {
@@ -52,16 +50,29 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
                 }
             }
         } else if (input_is_fastq) {
-            std::getline(in, seq); // sequence
+            std::getline(in, seq);  // sequence
             std::getline(in, line); // delimiter
             std::getline(in, line); // quality
             std::getline(in, line);
         }
-        // force the sequence to be upper-case
-        std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { return std::toupper(c); });
-        seqout << seq;
-        // record where the sequence starts
-        seq_bytes_written += seq.size();
+        if (seq.empty()){
+            if (!notified_empty_seqs){
+                notified_empty_seqs = true;
+                std::cerr << "[seqwish] WARNING: input FASTA file contains empty sequences, which will be ignored." << std::endl;
+            }
+        } else {
+            seqname_offset.push_back(seq_names_bytes_written);
+            seq_offset.push_back(seq_bytes_written);
+
+            seqnames << seq_name << " ";
+            seq_names_bytes_written += seq_name.size() + 1;
+
+            // force the sequence to be upper-case
+            std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { return std::toupper(c); });
+            seqout << seq;
+            // record where the sequence starts
+            seq_bytes_written += seq.size();
+        }
     }
     in.close();
     // add the last value so we can get sequence length for the last sequence and name
@@ -96,7 +107,7 @@ void seqindex_t::build_index(const std::string& filename, const std::string& idx
     std::remove(seqnamefile.c_str());
 
     if (duplicated_ids){
-        std::cerr << "[seqwish] ERROR: the input sequences have duplicated IDs." << std::endl;
+        std::cerr << "[seqwish] ERROR: input sequences have duplicated IDs." << std::endl;
         exit(1);
     }
 



View it on GitLab: https://salsa.debian.org/med-team/seqwish/-/compare/d566dc0d67b8cadf33b597fb124d6d2d97581b46...f70bd82ee3d16a977fb1215cf7da755cdb7570df

-- 
View it on GitLab: https://salsa.debian.org/med-team/seqwish/-/compare/d566dc0d67b8cadf33b597fb124d6d2d97581b46...f70bd82ee3d16a977fb1215cf7da755cdb7570df
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20210830/80c14e0b/attachment-0001.htm>


More information about the debian-med-commit mailing list