[med-svn] [Git][med-team/spoa][upstream] New upstream version 1.1.5
Andreas Tille
gitlab at salsa.debian.org
Mon Jan 28 20:12:59 GMT 2019
Andreas Tille pushed to branch upstream at Debian Med / spoa
Commits:
fe7cc825 by Andreas Tille at 2019-01-28T19:04:57Z
New upstream version 1.1.5
- - - - -
5 changed files:
- CMakeLists.txt
- README.md
- include/spoa/graph.hpp
- src/graph.cpp
- src/main.cpp
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.2)
-project(spoa LANGUAGES CXX VERSION 1.1.3)
+project(spoa LANGUAGES CXX VERSION 1.1.5)
include(GNUInstallDirs)
=====================================
README.md
=====================================
@@ -4,7 +4,7 @@
[![Build status for c++/clang++](https://travis-ci.org/rvaser/spoa.svg?branch=master)](https://travis-ci.org/rvaser/spoa)
[![Published in Genome Research](https://img.shields.io/badge/published%20in-Genome%20Research-blue.svg)](https://doi.org/10.1101/gr.214270.116)
-Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap). It supports Intel SSE4.1+ and AVX2 (marginally faster due to high latency shifts) vectorization.
+Spoa (SIMD POA) is a c++ implementation of the partial order alignment (POA) algorithm (as described in 10.1093/bioinformatics/18.3.452) which is used to generate consensus sequences (as described in 10.1093/bioinformatics/btg109). It supports three alignment modes: local (Smith-Waterman), global (Needleman-Wunsch) and semi-global alignment (overlap). It supports Intel SSE4.1+ and AVX2 vectorization (marginally faster due to high latency shifts).
## Dependencies
@@ -39,6 +39,8 @@ Optionally, you can run `sudo make install` to install spoa library (and executa
***Note***: if you omitted `--recursive` from `git clone`, run `git submodule init` and `git submodule update` before proceeding with compilation.
+To build unit tests add `-Dspoa_build_tests=ON` while running `cmake`. After installation, an executable named `spoa_test` will be created in `build/bin`.
+
## Usage
Usage of spoa is as following:
@@ -46,7 +48,8 @@ Usage of spoa is as following:
spoa [options ...] <sequences>
<sequences>
- input file in FASTA/FASTQ format containing sequences
+ input file in FASTA/FASTQ format (can be compressed with gzip)
+ containing sequences
options:
-m, --match <int>
@@ -70,6 +73,8 @@ Usage of spoa is as following:
0 - consensus
1 - multiple sequence alignment
2 - 0 & 1
+ -d, --dot <file>
+ output file for the final POA graph in DOT format
--version
prints the version number
-h, --help
=====================================
include/spoa/graph.hpp
=====================================
@@ -78,7 +78,7 @@ public:
void update_alignment(Alignment& alignment,
const std::vector<int32_t>& subgraph_to_graph_mapping) const;
- void print_graphviz() const;
+ void print_dot(const std::string& path) const;
friend std::unique_ptr<Graph> createGraph();
private:
=====================================
src/graph.cpp
=====================================
@@ -8,6 +8,7 @@
#include <assert.h>
#include <algorithm>
#include <stack>
+#include <fstream>
#include "spoa/graph.hpp"
@@ -679,7 +680,13 @@ void Graph::update_alignment(Alignment& alignment,
}
}
-void Graph::print_graphviz() const {
+void Graph::print_dot(const std::string& path) const {
+
+ if (path.empty()) {
+ return;
+ }
+
+ std::ofstream out(path);
std::vector<int32_t> in_consensus(nodes_.size(), -1);
int32_t rank = 0;
@@ -687,30 +694,34 @@ void Graph::print_graphviz() const {
in_consensus[id] = rank++;
}
- printf("digraph %u {\n", num_sequences_);
- printf(" graph [rankdir=LR]\n");
+ out << "digraph " << num_sequences_ << " {" << std::endl;
+ out << " graph [rankdir=LR]" << std::endl;
for (uint32_t i = 0; i < nodes_.size(); ++i) {
- printf(" %u [label = \"%u - %c\"", i, i, decoder_[nodes_[i]->code_]);
+ out << " " << i << " [label = \"" << i << " - ";
+ out << static_cast<char>(decoder_[nodes_[i]->code_]) << "\"";
if (in_consensus[i] != -1) {
- printf(", style=filled, fillcolor=goldenrod1");
+ out << ", style=filled, fillcolor=goldenrod1";
}
- printf("]\n");
+ out << "]" << std::endl;
for (const auto& edge: nodes_[i]->out_edges_) {
- printf(" %u -> %u [label = \"%lu\"", i, edge->end_node_id_,
- edge->total_weight_);
+ out << " " << i << " -> " << edge->end_node_id_;
+ out << " [label = \"" << edge->total_weight_ << "\"";
if (in_consensus[i] + 1 == in_consensus[edge->end_node_id_]) {
- printf(", color=goldenrod1");
+ out << ", color=goldenrod1";
}
- printf("]\n");
+ out << "]" << std::endl;
}
for (const auto& aid: nodes_[i]->aligned_nodes_ids_) {
if (aid > i) {
- printf(" %u -> %u [style = dotted, arrowhead = none]\n", i, aid);
+ out << " " << i << " -> " << aid;
+ out << " [style = dotted, arrowhead = none]" << std::endl;
}
}
}
- printf("}\n");
+ out << "}" << std::endl;
+
+ out.close();
}
}
=====================================
src/main.cpp
=====================================
@@ -7,7 +7,7 @@
#include "spoa/spoa.hpp"
#include "bioparser/bioparser.hpp"
-static const char* version = "v1.1.3";
+static const char* version = "v1.1.5";
static struct option options[] = {
{"match", required_argument, 0, 'm'},
@@ -15,6 +15,7 @@ static struct option options[] = {
{"gap", required_argument, 0, 'g'},
{"algorithm", required_argument, 0, 'l'},
{"result", required_argument, 0, 'r'},
+ {"dot", required_argument, 0, 'd'},
{"version", no_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{0, 0, 0, 0}
@@ -31,8 +32,10 @@ int main(int argc, char** argv) {
uint8_t algorithm = 0;
uint8_t result = 0;
+ std::string dot_path = "";
+
char opt;
- while ((opt = getopt_long(argc, argv, "m:x:g:l:r:h", options, nullptr)) != -1) {
+ while ((opt = getopt_long(argc, argv, "m:x:g:l:r:d:h", options, nullptr)) != -1) {
switch (opt) {
case 'm':
match = atoi(optarg);
@@ -49,6 +52,9 @@ int main(int argc, char** argv) {
case 'r':
result = atoi(optarg);
break;
+ case 'd':
+ dot_path = optarg;
+ break;
case 'v':
printf("%s\n", version);
exit(0);
@@ -66,22 +72,30 @@ int main(int argc, char** argv) {
exit(1);
}
- std::string input_path = argv[optind];
- auto extension = input_path.substr(std::min(input_path.rfind('.'),
- input_path.size()));
+ std::string sequences_path = argv[optind];
+
+ auto is_suffix = [](const std::string& src, const std::string& suffix) -> bool {
+ if (src.size() < suffix.size()) {
+ return false;
+ }
+ return src.compare(src.size() - suffix.size(), suffix.size(), suffix) == 0;
+ };
std::unique_ptr<bioparser::Parser<spoa::Sequence>> sparser = nullptr;
- if (extension == ".fasta" || extension == ".fa") {
+ if (is_suffix(sequences_path, ".fasta") || is_suffix(sequences_path, ".fa") ||
+ is_suffix(sequences_path, ".fasta.gz") || is_suffix(sequences_path, ".fa.gz")) {
sparser = bioparser::createParser<bioparser::FastaParser, spoa::Sequence>(
- input_path);
- } else if (extension == ".fastq" || extension == ".fq") {
+ sequences_path);
+ } else if (is_suffix(sequences_path, ".fastq") || is_suffix(sequences_path, ".fq") ||
+ is_suffix(sequences_path, ".fastq.gz") || is_suffix(sequences_path, ".fq.gz")) {
sparser = bioparser::createParser<bioparser::FastqParser, spoa::Sequence>(
- input_path);
+ sequences_path);
} else {
fprintf(stderr, "[spoa::] error: "
"file %s has unsupported format extension (valid extensions: "
- ".fasta, .fa, .fastq, .fq)!\n", input_path.c_str());
+ ".fasta, .fasta.gz, .fa, .fa.gz, .fastq, .fastq.gz, .fq, .fq.gz)!\n",
+ sequences_path.c_str());
exit(1);
}
@@ -120,6 +134,8 @@ int main(int argc, char** argv) {
}
}
+ graph->print_dot(dot_path);
+
return 0;
}
@@ -128,7 +144,8 @@ void help() {
"usage: spoa [options ...] <sequences>\n"
"\n"
" <sequences>\n"
- " input file in FASTA/FASTQ format containing sequences\n"
+ " input file in FASTA/FASTQ format (can be compressed with gzip)\n"
+ " containing sequences\n"
"\n"
" options:\n"
" -m, --match <int>\n"
@@ -152,6 +169,8 @@ void help() {
" 0 - consensus\n"
" 1 - multiple sequence alignment\n"
" 2 - 0 & 1\n"
+ " -d, --dot <file>\n"
+ " output file for the final POA graph in DOT format\n"
" --version\n"
" prints the version number\n"
" -h, --help\n"
View it on GitLab: https://salsa.debian.org/med-team/spoa/commit/fe7cc8259359e98cc23153eb653f66327673e360
--
View it on GitLab: https://salsa.debian.org/med-team/spoa/commit/fe7cc8259359e98cc23153eb653f66327673e360
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20190128/52a8c97f/attachment-0001.html>
More information about the debian-med-commit
mailing list