[med-svn] [Git][med-team/spoa][upstream] New upstream version 1.1.5

Andreas Tille gitlab at salsa.debian.org
Mon Jan 28 20:12:59 GMT 2019


Andreas Tille pushed to branch upstream at Debian Med / spoa


Commits:
fe7cc825 by Andreas Tille at 2019-01-28T19:04:57Z
New upstream version 1.1.5
- - - - -


5 changed files:

- CMakeLists.txt
- README.md
- include/spoa/graph.hpp
- src/graph.cpp
- src/main.cpp


Changes:

=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
 cmake_minimum_required(VERSION 3.2)
-project(spoa LANGUAGES CXX VERSION 1.1.3)
+project(spoa LANGUAGES CXX VERSION 1.1.5)
 
 include(GNUInstallDirs)
 


=====================================
README.md
=====================================
@@ -4,7 +4,7 @@
 [![Build status for c++/clang++](https://travis-ci.org/rvaser/spoa.svg?branch=master)](https://travis-ci.org/rvaser/spoa)
 [![Published in Genome Research](https://img.shields.io/badge/published%20in-Genome%20Research-blue.svg)](https://doi.org/10.1101/gr.214270.116)
 
-Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap). It supports Intel SSE4.1+ and AVX2 (marginally faster due to high latency shifts) vectorization.
+Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap). It supports Intel SSE4.1+ and AVX2 vectorization (marginally faster due to high latency shifts).
 
 ## Dependencies
 
@@ -39,6 +39,8 @@ Optionally, you can run `sudo make install` to install spoa library (and executa
 
 ***Note***: if you omitted `--recursive` from `git clone`, run `git submodule init` and `git submodule update` before proceeding with compilation.
 
+To build unit tests add `-Dspoa_build_tests=ON` while running `cmake`. After installation, an executable named `spoa_test` will be created in `build/bin`.
+
 ## Usage
 
 Usage of spoa is as following:
@@ -46,7 +48,8 @@ Usage of spoa is as following:
     spoa [options ...] <sequences>
 
         <sequences>
-            input file in FASTA/FASTQ format containing sequences
+            input file in FASTA/FASTQ format (can be compressed with gzip)
+            containing sequences
 
         options:
             -m, --match <int>
@@ -70,6 +73,8 @@ Usage of spoa is as following:
                     0 - consensus
                     1 - multiple sequence alignment
                     2 - 0 & 1
+            -d, --dot <file>
+                output file for the final POA graph in DOT format
             --version
                 prints the version number
             -h, --help


=====================================
include/spoa/graph.hpp
=====================================
@@ -78,7 +78,7 @@ public:
     void update_alignment(Alignment& alignment,
         const std::vector<int32_t>& subgraph_to_graph_mapping) const;
 
-    void print_graphviz() const;
+    void print_dot(const std::string& path) const;
 
     friend std::unique_ptr<Graph> createGraph();
 private:


=====================================
src/graph.cpp
=====================================
@@ -8,6 +8,7 @@
 #include <assert.h>
 #include <algorithm>
 #include <stack>
+#include <fstream>
 
 #include "spoa/graph.hpp"
 
@@ -679,7 +680,13 @@ void Graph::update_alignment(Alignment& alignment,
     }
 }
 
-void Graph::print_graphviz() const {
+void Graph::print_dot(const std::string& path) const {
+
+    if (path.empty()) {
+        return;
+    }
+
+    std::ofstream out(path);
 
     std::vector<int32_t> in_consensus(nodes_.size(), -1);
     int32_t rank = 0;
@@ -687,30 +694,34 @@ void Graph::print_graphviz() const {
         in_consensus[id] = rank++;
     }
 
-    printf("digraph %u {\n", num_sequences_);
-    printf("    graph [rankdir=LR]\n");
+    out << "digraph " << num_sequences_ << " {" << std::endl;
+    out << "    graph [rankdir=LR]" << std::endl;
     for (uint32_t i = 0; i < nodes_.size(); ++i) {
-        printf("    %u [label = \"%u - %c\"", i, i, decoder_[nodes_[i]->code_]);
+        out << "    " << i << " [label = \"" << i << " - ";
+        out << static_cast<char>(decoder_[nodes_[i]->code_]) << "\"";
         if (in_consensus[i] != -1) {
-            printf(", style=filled, fillcolor=goldenrod1");
+            out << ", style=filled, fillcolor=goldenrod1";
         }
-        printf("]\n");
+        out << "]" << std::endl;
 
         for (const auto& edge: nodes_[i]->out_edges_) {
-            printf("    %u -> %u [label = \"%lu\"", i, edge->end_node_id_,
-                edge->total_weight_);
+            out << "    " << i << " -> " << edge->end_node_id_;
+            out << " [label = \"" << edge->total_weight_ << "\"";
             if (in_consensus[i] + 1 == in_consensus[edge->end_node_id_]) {
-                printf(", color=goldenrod1");
+                out << ", color=goldenrod1";
             }
-            printf("]\n");
+            out << "]" << std::endl;
         }
         for (const auto& aid: nodes_[i]->aligned_nodes_ids_) {
             if (aid > i) {
-                printf("    %u -> %u [style = dotted, arrowhead = none]\n", i, aid);
+                out << "    " << i << " -> " << aid;
+                out << " [style = dotted, arrowhead = none]" << std::endl;
             }
         }
     }
-    printf("}\n");
+    out << "}" << std::endl;
+
+    out.close();
 }
 
 }


=====================================
src/main.cpp
=====================================
@@ -7,7 +7,7 @@
 #include "spoa/spoa.hpp"
 #include "bioparser/bioparser.hpp"
 
-static const char* version = "v1.1.3";
+static const char* version = "v1.1.5";
 
 static struct option options[] = {
     {"match", required_argument, 0, 'm'},
@@ -15,6 +15,7 @@ static struct option options[] = {
     {"gap", required_argument, 0, 'g'},
     {"algorithm", required_argument, 0, 'l'},
     {"result", required_argument, 0, 'r'},
+    {"dot", required_argument, 0, 'd'},
     {"version", no_argument, 0, 'v'},
     {"help", no_argument, 0, 'h'},
     {0, 0, 0, 0}
@@ -31,8 +32,10 @@ int main(int argc, char** argv) {
     uint8_t algorithm = 0;
     uint8_t result = 0;
 
+    std::string dot_path = "";
+
     char opt;
-    while ((opt = getopt_long(argc, argv, "m:x:g:l:r:h", options, nullptr)) != -1) {
+    while ((opt = getopt_long(argc, argv, "m:x:g:l:r:d:h", options, nullptr)) != -1) {
         switch (opt) {
             case 'm':
                 match = atoi(optarg);
@@ -49,6 +52,9 @@ int main(int argc, char** argv) {
             case 'r':
                 result = atoi(optarg);
                 break;
+            case 'd':
+                dot_path = optarg;
+                break;
             case 'v':
                 printf("%s\n", version);
                 exit(0);
@@ -66,22 +72,30 @@ int main(int argc, char** argv) {
         exit(1);
     }
 
-    std::string input_path = argv[optind];
-    auto extension = input_path.substr(std::min(input_path.rfind('.'),
-        input_path.size()));
+    std::string sequences_path = argv[optind];
+
+    auto is_suffix = [](const std::string& src, const std::string& suffix) -> bool {
+        if (src.size() < suffix.size()) {
+            return false;
+        }
+        return src.compare(src.size() - suffix.size(), suffix.size(), suffix) == 0;
+    };
 
     std::unique_ptr<bioparser::Parser<spoa::Sequence>> sparser = nullptr;
 
-    if (extension == ".fasta" || extension == ".fa") {
+    if (is_suffix(sequences_path, ".fasta") || is_suffix(sequences_path, ".fa") ||
+        is_suffix(sequences_path, ".fasta.gz") || is_suffix(sequences_path, ".fa.gz")) {
         sparser = bioparser::createParser<bioparser::FastaParser, spoa::Sequence>(
-            input_path);
-    } else if (extension == ".fastq" || extension == ".fq") {
+            sequences_path);
+    } else if (is_suffix(sequences_path, ".fastq") || is_suffix(sequences_path, ".fq") ||
+        is_suffix(sequences_path, ".fastq.gz") || is_suffix(sequences_path, ".fq.gz")) {
         sparser = bioparser::createParser<bioparser::FastqParser, spoa::Sequence>(
-            input_path);
+            sequences_path);
     } else {
         fprintf(stderr, "[spoa::] error: "
             "file %s has unsupported format extension (valid extensions: "
-            ".fasta, .fa, .fastq, .fq)!\n", input_path.c_str());
+            ".fasta, .fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!\n",
+            sequences_path.c_str());
         exit(1);
     }
 
@@ -120,6 +134,8 @@ int main(int argc, char** argv) {
         }
     }
 
+    graph->print_dot(dot_path);
+
     return 0;
 }
 
@@ -128,7 +144,8 @@ void help() {
         "usage: spoa [options ...] <sequences>\n"
         "\n"
         "    <sequences>\n"
-        "        input file in FASTA/FASTQ format containing sequences\n"
+        "        input file in FASTA/FASTQ format (can be compressed with gzip)\n"
+        "        containing sequences\n"
         "\n"
         "    options:\n"
         "        -m, --match <int>\n"
@@ -152,6 +169,8 @@ void help() {
         "                0 - consensus\n"
         "                1 - multiple sequence alignment\n"
         "                2 - 0 & 1\n"
+        "        -d, --dot <file>\n"
+        "            output file for the final POA graph in DOT format\n"
         "        --version\n"
         "            prints the version number\n"
         "        -h, --help\n"



View it on GitLab: https://salsa.debian.org/med-team/spoa/commit/fe7cc8259359e98cc23153eb653f66327673e360

-- 
View it on GitLab: https://salsa.debian.org/med-team/spoa/commit/fe7cc8259359e98cc23153eb653f66327673e360
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190128/52a8c97f/attachment-0001.html>


More information about the debian-med-commit mailing list