[med-svn] [Git][med-team/diamond-aligner][upstream] New upstream version 0.9.24+dfsg
Andreas Tille
gitlab at salsa.debian.org
Sat Dec 29 20:21:25 GMT 2018
Andreas Tille pushed to branch upstream at Debian Med / diamond-aligner
Commits:
0238729e by Andreas Tille at 2018-12-29T20:10:17Z
New upstream version 0.9.24+dfsg
- - - - -
29 changed files:
- CMakeLists.txt
- README.md
- build_simple.sh
- src/ChangeLog
- src/align/align.cpp
- src/basic/basic.cpp
- src/basic/config.cpp
- src/basic/config.h
- src/basic/const.h
- src/basic/sequence.h
- src/data/queries.cpp
- src/data/queries.h
- src/data/reference.cpp
- src/data/reference.h
- src/dp/score_vector.h
- src/output/blast_tab_format.cpp
- src/output/output_format.h
- src/output/target_culling.h
- src/run/main.cpp
- src/run/tools.cpp
- src/util/command_line_parser.cpp
- src/util/io/file_source.cpp
- src/util/io/input_file.cpp
- src/util/log_stream.h
- + src/util/sequence/sequence.cpp
- + src/util/sequence/sequence.h
- src/util/system/system.cpp
- src/util/util.cpp
- src/util/util.h
Changes:
=====================================
CMakeLists.txt
=====================================
@@ -3,6 +3,18 @@ project (DIAMOND)
option(BUILD_STATIC "BUILD_STATIC" OFF)
option(EXTRA "EXTRA" OFF)
+option(STATIC_LIBGCC "STATIC_LIBGCC" OFF)
+option(STATIC_LIBSTDC++ "STATIC_LIBSTDC++" OFF)
+option(SSSE3 "SSSE3" OFF)
+option(POPCNT "POPCNT" OFF)
+
+IF(STATIC_LIBSTDC++)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++")
+endif()
+
+IF(STATIC_LIBGCC)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libgcc")
+endif()
if(BUILD_STATIC)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
@@ -12,7 +24,17 @@ endif()
set(CMAKE_CXX_STANDARD 11)
-if(CMAKE_BUILD_MARCH)
+if (${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
+else()
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=gnu++11")
+endif()
+
+if(SSSE3)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3")
+ if(POPCNT)
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt")
+ endif()
+elseif(CMAKE_BUILD_MARCH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${CMAKE_BUILD_MARCH}")
else()
include(CheckCXXCompilerFlag)
@@ -123,6 +145,7 @@ add_executable(diamond src/run/main.cpp
src/run/cluster.cpp
src/util/algo/greedy_vortex_cover.cpp
src/util/algo/greedy_vortex_cover_weighted.cpp
+ src/util/sequence/sequence.cpp
)
if(EXTRA)
=====================================
README.md
=====================================
@@ -32,7 +32,7 @@ quick example for setting up and using the program on Linux.
Installing the software on your system may be done by downloading it in
binary format for immediate use:
- wget http://github.com/bbuchfink/diamond/releases/download/v0.9.23/diamond-linux64.tar.gz
+ wget http://github.com/bbuchfink/diamond/releases/download/v0.9.24/diamond-linux64.tar.gz
tar xzf diamond-linux64.tar.gz
The extracted `diamond` binary file should be moved to a directory
=====================================
build_simple.sh
=====================================
@@ -80,4 +80,5 @@ g++ -DNDEBUG -O3 -Wno-deprecated-declarations $1 $2 $3 \
src/run/cluster.cpp \
src/util/algo/greedy_vortex_cover.cpp \
src/util/algo/greedy_vortex_cover_weighted.cpp \
+ src/util/sequence/sequence.cpp \
-lz -lpthread -o diamond
=====================================
src/ChangeLog
=====================================
@@ -1,3 +1,16 @@
+[0.9.24]
+- Fixed a compiler error on macOS.
+- Added --header option to output header for tabular output format.
+- The quality string output in tabular format (qqual field) is clipped to the aligned part of the query.
+- Print '*' as quality string if quality values are not available in tabular output format.
+- Added field 'full_qqual' to print unclipped query quality values to the tabular format.
+- Added field 'full_qseq' to print unclipped query sequence to the tabular format.
+- Added support for using the hyphen character '-' to denote the standard input for input file parameters.
+- Status messages are written to stderr.
+- Fixed a bug that could incorrectly report queries as unaligned in the output of the --un option.
+- Added option '--al' to write aligned queries to file.
+- Added options '--alfmt' and '--unfmt' to set the format of the aligned/unaligned query file (supported values: fasta, fastq).
+
[0.9.23]
- Fixed an issue that could cause too high memory usage.
- Added output field 'qqual' to print query FASTQ quality values to the tabular format.
=====================================
src/align/align.cpp
=====================================
@@ -105,8 +105,11 @@ void align_worker(size_t thread_id, const Parameters *params, const Metadata *me
if (*output_format != Output_format::null) {
buf = new TextBuffer;
const bool aligned = mapper->generate_output(*buf, stat, *metadata);
- if (aligned && (!config.unaligned.empty() || !config.aligned_file.empty()))
+ if (aligned && (!config.unaligned.empty() || !config.aligned_file.empty())) {
+ query_aligned_mtx.lock();
query_aligned[hits.query] = true;
+ query_aligned_mtx.unlock();
+ }
}
delete mapper;
OutputSink::get().push(hits.query, buf);
=====================================
src/basic/basic.cpp
=====================================
@@ -24,7 +24,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "sequence.h"
#include "masking.h"
-const char* Const::version_string = "0.9.23";
+const char* Const::version_string = "0.9.24";
const char* Const::program_name = "diamond";
const char* Const::id_delimiters = " \a\b\f\n\r\t\v\1";
=====================================
src/basic/config.cpp
=====================================
@@ -70,7 +70,8 @@ Config::Config(int argc, const char **argv)
.add_command("seed-stat", "")
.add_command("smith-waterman", "")
.add_command("protein-snps", "")
- .add_command("cluster", "");
+ .add_command("cluster", "")
+ .add_command("translate", "");
Options_group general("General options");
general.add()
@@ -94,8 +95,9 @@ Config::Config(int argc, const char **argv)
\tsstart means Start of alignment in subject\n\
\tsend means End of alignment in subject\n\
\tqseq means Aligned part of query sequence\n\
+\tfull_qseq means Query sequence\n\
\tsseq means Aligned part of subject sequence\n\
-\tfull_sseq means Full subject sequence\n\
+\tfull_sseq means Subject sequence\n\
\tevalue means Expect value\n\
\tbitscore means Bit score\n\
\tscore means Raw score\n\
@@ -118,7 +120,8 @@ Config::Config(int argc, const char **argv)
\n\tDefault: qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore", output_format)
("verbose", 'v', "verbose console output", verbose)
("log", 0, "enable debug log", debug_log)
-("quiet", 0, "disable console output", quiet);
+("quiet", 0, "disable console output", quiet)
+("header", 0, "Write header lines to blast tabular format.", output_header);
Options_group makedb("Makedb options");
makedb.add()
@@ -130,6 +133,8 @@ Config::Config(int argc, const char **argv)
("strand", 0, "query strands to search (both/minus/plus)", query_strands, string("both"))
("un", 0, "file for unaligned queries", unaligned)
("al", 0, "file or aligned queries", aligned_file)
+ ("unfmt", 0, "format of unaligned query file (fasta/fastq)", unfmt, string("fasta"))
+ ("alfmt", 0, "format of aligned query file (fasta/fastq)", alfmt, string("fasta"))
("unal", 0, "report unaligned queries (0=no, 1=yes)", report_unaligned, -1)
("max-target-seqs", 'k', "maximum number of target sequences to report alignments for", max_alignments, uint64_t(25))
("top", 0, "report alignments within this percentage range of top alignment score (overrides --max-target-seqs)", toppercent, 100.0)
@@ -342,7 +347,7 @@ Config::Config(int argc, const char **argv)
;
}
- invocation = join(' ', vector<string>(&argv[0], &argv[argc]));
+ invocation = join(" ", vector<string>(&argv[0], &argv[argc]));
log_stream << invocation << endl;
if (!no_auto_append) {
@@ -423,6 +428,11 @@ Config::Config(int argc, const char **argv)
if (query_strands != "both" && query_strands != "minus" && query_strands != "plus")
throw std::runtime_error("Invalid value for parameter --strand");
+ if (unfmt == "fastq" || alfmt == "fastq")
+ store_query_quality = true;
+ if (!aligned_file.empty())
+ log_stream << "Aligned file format: " << alfmt << endl;
+
/*log_stream << "sizeof(hit)=" << sizeof(hit) << " sizeof(packed_uint40_t)=" << sizeof(packed_uint40_t)
<< " sizeof(sorted_list::entry)=" << sizeof(sorted_list::entry) << endl;*/
=====================================
src/basic/config.h
=====================================
@@ -168,11 +168,14 @@ struct Config
unsigned swipe_chunk_size;
unsigned query_parallel_limit;
bool long_reads;
+ bool output_header;
+ string alfmt;
+ string unfmt;
enum {
makedb = 0, blastp = 1, blastx = 2, view = 3, help = 4, version = 5, getseq = 6, benchmark = 7, random_seqs = 8, compare = 9, sort = 10, roc = 11, db_stat = 12, model_sim = 13,
match_file_stat = 14, model_seqs = 15, opt = 16, mask = 17, fastq2fasta = 18, dbinfo = 19, test_extra = 20, test_io = 21, db_annot_stats = 22, read_sim = 23, info = 24, seed_stat = 25,
- smith_waterman = 26, protein_snps = 27, cluster = 28
+ smith_waterman = 26, protein_snps = 27, cluster = 28, translate = 29
};
unsigned command;
=====================================
src/basic/const.h
=====================================
@@ -23,7 +23,7 @@ struct Const
{
enum {
- build_version = 124,
+ build_version = 125,
seedp_bits = 10,
seedp = 1<<seedp_bits,
max_seed_weight = 32,
=====================================
src/basic/sequence.h
=====================================
@@ -86,6 +86,9 @@ struct sequence
}
bool empty() const
{ return len_ == 0; }
+ bool present() const {
+ return len_ > 0;
+ }
const char* c_str() const
{ return reinterpret_cast<const char*>(data_); }
size_t print(char *ptr, unsigned begin, unsigned len) const
=====================================
src/data/queries.cpp
=====================================
@@ -17,6 +17,10 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
****/
#include "queries.h"
+#include "../util/sequence/sequence.h"
+#include "../basic/config.h"
+
+using namespace std;
unsigned current_query_chunk;
Sequence_set* query_source_seqs::data_ = 0;
@@ -24,6 +28,7 @@ Sequence_set* query_seqs::data_ = 0;
String_set<0>* query_ids::data_ = 0;
Partitioned_histogram query_hst;
vector<bool> query_aligned;
+tthread::mutex query_aligned_mtx;
Seed_set *query_seeds = 0;
Hashed_seed_set *query_seeds_hashed = 0;
String_set<0> *query_qual = nullptr;
@@ -35,11 +40,12 @@ void write_unaligned(OutputFile *file)
TextBuffer buf;
for (size_t i = 0; i < n; ++i) {
if (!query_aligned[i]) {
- buf << '>' << query_ids::get()[i].c_str() << '\n';
- (align_mode.query_translated ? query_source_seqs::get()[i] : query_seqs::get()[i]).print(buf, input_value_traits);
- buf << '\n';
- file->write(buf.get_begin(), buf.size());
- buf.clear();
+ Util::Sequence::format(align_mode.query_translated ? query_source_seqs::get()[i] : query_seqs::get()[i],
+ query_ids::get()[i].c_str(),
+ query_qual ? (*query_qual)[i].c_str() : nullptr,
+ *file,
+ config.unfmt,
+ input_value_traits);
}
}
}
@@ -50,11 +56,12 @@ void write_aligned(OutputFile *file)
TextBuffer buf;
for (size_t i = 0; i < n; ++i) {
if (query_aligned[i]) {
- buf << '>' << query_ids::get()[i].c_str() << '\n';
- (align_mode.query_translated ? query_source_seqs::get()[i] : query_seqs::get()[i]).print(buf, input_value_traits);
- buf << '\n';
- file->write(buf.get_begin(), buf.size());
- buf.clear();
+ Util::Sequence::format(align_mode.query_translated ? query_source_seqs::get()[i] : query_seqs::get()[i],
+ query_ids::get()[i].c_str(),
+ query_qual ? (*query_qual)[i].c_str() : nullptr,
+ *file,
+ config.alfmt,
+ input_value_traits);
}
}
}
\ No newline at end of file
=====================================
src/data/queries.h
=====================================
@@ -25,6 +25,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include "../basic/statistics.h"
#include "sequence_set.h"
#include "seed_set.h"
+#include "../util/tinythread.h"
extern Partitioned_histogram query_hst;
extern unsigned current_query_chunk;
@@ -50,6 +51,7 @@ struct query_ids
static String_set<0> *data_;
};
+extern tthread::mutex query_aligned_mtx;
extern vector<bool> query_aligned;
extern String_set<0> *query_qual;
=====================================
src/data/reference.cpp
=====================================
@@ -401,12 +401,19 @@ void db_info()
db_file.close();
}
-DatabaseFile* DatabaseFile::auto_create_from_fasta() {
- InputFile db_file(config.database);
+bool DatabaseFile::is_diamond_db(const string &file_name) {
+ if (file_name == "-")
+ return false;
+ InputFile db_file(file_name);
uint64_t magic_number;
- if (db_file.read(&magic_number, 1) != 1 || magic_number != ReferenceHeader::MAGIC_NUMBER) {
+ bool r = db_file.read(&magic_number, 1) == 1 && magic_number == ReferenceHeader::MAGIC_NUMBER;
+ db_file.close();
+ return r;
+}
+
+DatabaseFile* DatabaseFile::auto_create_from_fasta() {
+ if (!is_diamond_db(config.database)) {
message_stream << "Database file is not a DIAMOND database, treating as FASTA." << endl;
- db_file.close();
config.input_ref_file = config.database;
TempFile *db;
make_db(&db);
@@ -414,8 +421,6 @@ DatabaseFile* DatabaseFile::auto_create_from_fasta() {
delete db;
return r;
}
- else {
- db_file.close();
+ else
return new DatabaseFile(config.database);
- }
}
\ No newline at end of file
=====================================
src/data/reference.h
=====================================
@@ -22,6 +22,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <vector>
#include <string>
#include <string.h>
+#include <stdint.h>
#include "../util/io/serializer.h"
#include "../util/io/input_file.h"
#include "../data/seed_histogram.h"
@@ -43,8 +44,8 @@ struct ReferenceHeader
uint64_t magic_number;
uint32_t build, db_version;
uint64_t sequences, letters, pos_array_offset;
- static const uint64_t MAGIC_NUMBER = 0x24af8a415ee186dllu;
enum { current_db_version = 2 };
+ static constexpr uint64_t MAGIC_NUMBER = 0x24af8a415ee186dllu;
};
struct ReferenceHeader2
@@ -76,6 +77,7 @@ struct DatabaseFile : public InputFile
DatabaseFile(TempFile &tmp_file);
static void read_header(InputFile &stream, ReferenceHeader &header);
static DatabaseFile* auto_create_from_fasta();
+ static bool is_diamond_db(const string &file_name);
void rewind();
bool load_seqs(vector<unsigned> &block_to_database_id, size_t max_letters, bool masked, Sequence_set **dst_seq, String_set<0> **dst_id, bool load_ids = true, const vector<bool> *filter = NULL);
void get_seq();
=====================================
src/dp/score_vector.h
=====================================
@@ -325,7 +325,10 @@ struct score_vector<int16_t>
template<typename _t>
struct ScoreTraits
-{};
+{
+ typedef void Score;
+ enum { CHANNELS = 0 };
+};
template<>
struct ScoreTraits<int32_t>
=====================================
src/output/blast_tab_format.cpp
=====================================
@@ -16,12 +16,16 @@ You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
****/
+#include <sstream>
#include <set>
+#include <numeric>
#include "../basic/match.h"
#include "output_format.h"
#include "../data/taxonomy.h"
#include "../data/queries.h"
+using namespace std;
+
const char* Blast_tab_format::field_str[] = {
"qseqid", // 0 means Query Seq - id
"qgi", // 1 means Query GI
@@ -75,7 +79,67 @@ const char* Blast_tab_format::field_str[] = {
"qqual", // 49
"qnum", // 50
"snum", // 51
- "scovhsp" // 52
+ "scovhsp", // 52
+ "full_qqual", // 53
+ "full_qseq" // 54
+};
+
+const char* Blast_tab_format::field_desc[] = {
+ "Query ID", // 0 means Query Seq - id
+ "qgi", // 1 means Query GI
+ "qacc", // 2 means Query accesion
+ "qaccver", // 3 means Query accesion.version
+ "Query length", // 4 means Query sequence length
+ "Subject ID", // 5 means Subject Seq - id
+ "Subject IDs", // 6 means All subject Seq - id(s), separated by a ';'
+ "sgi", // 7 means Subject GI
+ "sallgi", // 8 means All subject GIs
+ "sacc", // 9 means Subject accession
+ "saccver", // 10 means Subject accession.version
+ "sallacc", // 11 means All subject accessions
+ "Subject length", // 12 means Subject sequence length
+ "Start of alignment in query", // 13 means Start of alignment in query
+ "End of alignment in query", // 14 means End of alignment in query
+ "Start of alignment in subject", // 15 means Start of alignment in subject
+ "End of alignment in subject", // 16 means End of alignment in subject
+ "Aligned part of query sequence", // 17 means Aligned part of query sequence
+ "Aligned part of subject sequence", // 18 means Aligned part of subject sequence
+ "Expected value", // 19 means Expect value
+ "Bit score", // 20 means Bit score
+ "Raw score", // 21 means Raw score
+ "Alignment length", // 22 means Alignment length
+ "Percentage of identical matches", // 23 means Percentage of identical matches
+ "Number of identical matches", // 24 means Number of identical matches
+ "Number of mismatches", // 25 means Number of mismatches
+ "Number of positive-scoring matches", // 26 means Number of positive - scoring matches
+ "Number of gap openings", // 27 means Number of gap openings
+ "Total number of gaps", // 28 means Total number of gaps
+ "Percentage of positive-scoring matches", // 29 means Percentage of positive - scoring matches
+ "frames", // 30 means Query and subject frames separated by a '/'
+ "Query frame", // 31 means Query frame
+ "sframe", // 32 means Subject frame
+ "Blast traceback operations", // 33 means Blast traceback operations(BTOP)
+ "Subject Taxonomy IDs", // 34 means unique Subject Taxonomy ID(s), separated by a ';' (in numerical order)
+ "sscinames", // 35 means unique Subject Scientific Name(s), separated by a ';'
+ "scomnames", // 36 means unique Subject Common Name(s), separated by a ';'
+ "sblastnames", // 37 means unique Subject Blast Name(s), separated by a ';' (in alphabetical order)
+ "sskingdoms", // 38 means unique Subject Super Kingdom(s), separated by a ';' (in alphabetical order)
+ "Subject title", // 39 means Subject Title
+ "Subject titles", // 40 means All Subject Title(s), separated by a '<>'
+ "sstrand", // 41 means Subject Strand
+ "qcovs", // 42 means Query Coverage Per Subject
+ "Query coverage per HSP", // 43 means Query Coverage Per HSP
+ "qcovus", // 44 means Query Coverage Per Unique Subject(blastn only)
+ "Query title", // 45 means Query title
+ "swdiff", // 46
+ "time", // 47
+ "Subject sequence", // 48
+ "Aligned part of query quality values", // 49
+ "qnum", // 50
+ "snum", // 51
+ "scovhsp", // 52
+ "Query quality values", // 53
+ "Query sequence" // 54
};
Blast_tab_format::Blast_tab_format() :
@@ -98,7 +162,7 @@ Blast_tab_format::Blast_tab_format() :
config.salltitles = true;
if (j == 48)
config.use_lazy_dict = true;
- if (j == 49)
+ if (j == 49 || j == 53)
config.store_query_quality = true;
}
}
@@ -250,7 +314,7 @@ void Blast_tab_format::print_match(const Hsp_context& r, const Metadata &metadat
out << r.subject_seq;
break;
case 49:
- out << (*query_qual)[r.query_id].c_str();
+ out << (query_qual && (*query_qual)[r.query_id].present() ? (*query_qual)[r.query_id].subseq(r.query_source_range().begin_, r.query_source_range().end_).c_str() : "*");
break;
case 50:
out << query_block_to_database_id[r.query_id];
@@ -261,6 +325,12 @@ void Blast_tab_format::print_match(const Hsp_context& r, const Metadata &metadat
case 52:
out << (double)r.subject_range().length() * 100.0 / r.subject_len;
break;
+ case 53:
+ out << (query_qual && (*query_qual)[r.query_id].present() ? (*query_qual)[r.query_id].c_str() : "*");
+ break;
+ case 54:
+ r.query.source().print(out, input_value_traits);
+ break;
default:
throw std::runtime_error(string("Invalid output field: ") + field_str[*i]);
}
@@ -289,6 +359,7 @@ void Blast_tab_format::print_query_intro(size_t query_num, const char *query_nam
case 39:
case 40:
case 48:
+ case 49:
out << '*';
break;
case 12:
@@ -317,8 +388,11 @@ void Blast_tab_format::print_query_intro(size_t query_num, const char *query_nam
case 45:
out << query_name;
break;
- case 49:
- out << (*query_qual)[query_num].c_str();
+ case 53:
+ out << (query_qual && (*query_qual)[query_num].present() ? (*query_qual)[query_num].c_str() : "*");
+ break;
+ case 54:
+ (align_mode.query_translated ? query_source_seqs::get()[query_num] : query_seqs::get()[query_num]).print(out, input_value_traits);
break;
default:
throw std::runtime_error(string("Invalid output field: ") + field_str[*i]);
@@ -328,4 +402,15 @@ void Blast_tab_format::print_query_intro(size_t query_num, const char *query_nam
}
out << '\n';
}
+}
+
+void Blast_tab_format::print_header(Consumer &f, int mode, const char *matrix, int gap_open, int gap_extend, double evalue, const char *first_query_name, unsigned first_query_len) const {
+ if (!config.output_header)
+ return;
+ stringstream ss;
+ ss << "# DIAMOND v" << Const::version_string << ". http://github.com/bbuchfink/diamond" << endl;
+ ss << "# Invocation: " << config.invocation << endl;
+ ss << "# Fields: " << join(", ", apply(fields, [](unsigned i) -> string { return string(field_desc[i]); })) << endl;
+ const string s(ss.str());
+ f.consume(s.data(), s.length());
}
\ No newline at end of file
=====================================
src/output/output_format.h
=====================================
@@ -87,13 +87,14 @@ struct DAA_format : public Output_format
struct Blast_tab_format : public Output_format
{
- static const char* field_str[];
+ static const char* field_str[], *field_desc[];
Blast_tab_format();
- virtual void print_query_intro(size_t query_num, const char *query_name, unsigned query_len, TextBuffer &out, bool unaligned) const;
- virtual void print_match(const Hsp_context& r, const Metadata &metadata, TextBuffer &out);
+ virtual void print_header(Consumer &f, int mode, const char *matrix, int gap_open, int gap_extend, double evalue, const char *first_query_name, unsigned first_query_len) const override;
+ virtual void print_query_intro(size_t query_num, const char *query_name, unsigned query_len, TextBuffer &out, bool unaligned) const override;
+ virtual void print_match(const Hsp_context& r, const Metadata &metadata, TextBuffer &out) override;
virtual ~Blast_tab_format()
{ }
- virtual Output_format* clone() const
+ virtual Output_format* clone() const override
{
return new Blast_tab_format(*this);
}
=====================================
src/output/target_culling.h
=====================================
@@ -30,6 +30,7 @@ struct TargetCulling
virtual int cull(const vector<IntermediateRecord> &target_hsp) const = 0;
virtual void add(const Target &t) = 0;
virtual void add(const vector<IntermediateRecord> &target_hsp) = 0;
+ virtual ~TargetCulling() = default;
enum { FINISHED = 0, NEXT = 1, INCLUDE = 2};
static TargetCulling* get();
};
@@ -70,6 +71,7 @@ struct GlobalCulling : public TargetCulling
top_score_ = target_hsp[0].score;
++n_;
}
+ virtual ~GlobalCulling() = default;
private:
size_t n_;
int top_score_;
@@ -119,6 +121,7 @@ struct RangeCulling : public TargetCulling
for (std::vector<IntermediateRecord>::const_iterator i = target_hsp.begin(); i != target_hsp.end(); ++i)
p_.insert(i->absolute_query_range(), i->score);
}
+ virtual ~RangeCulling() = default;
private:
IntervalPartition p_;
};
=====================================
src/run/main.cpp
=====================================
@@ -42,11 +42,11 @@ void info();
void seed_stat();
void pairwise();
void protein_snps();
+void translate();
int main(int ac, const char* av[])
{
try {
-
check_simd();
config = Config(ac, av);
@@ -126,6 +126,9 @@ int main(int ac, const char* av[])
case Config::cluster:
Workflow::Cluster::run();
break;
+ case Config::translate:
+ translate();
+ break;
#ifdef EXTRA
case Config::test_extra:
test_main();
=====================================
src/run/tools.cpp
=====================================
@@ -360,3 +360,16 @@ void protein_snps()
}
call_protein_snps(current_gene, ref_genes[current_gene], snps, dataset);
}
+
+void translate() {
+ input_value_traits = nucleotide_traits;
+ TextInputFile in(config.query_file);
+ vector<char> id, seq;
+ vector<char> proteins[6];
+ while (FASTA_format().get_seq(id, seq, in)) {
+ Translator::translate(seq, proteins);
+ cout << ">" << string(id.data(), id.size()) << endl;
+ cout << sequence(proteins[0]) << endl;
+ }
+ in.close();
+}
\ No newline at end of file
=====================================
src/util/command_line_parser.cpp
=====================================
@@ -18,6 +18,7 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
#include <iostream>
#include <algorithm>
+#include <string.h>
#include "command_line_parser.h"
using namespace std;
@@ -87,7 +88,7 @@ void Command_line_parser::store(int count, const char ** str, unsigned &command)
vector<string> v;
for (int i = 2; i < count; ++i) {
- if (str[i][0] == '-') {
+ if (str[i][0] == '-' && strlen(str[i]) > 1) {
store_option(v);
v.clear();
}
=====================================
src/util/io/file_source.cpp
=====================================
@@ -38,9 +38,9 @@ FileSource::FileSource(const string &file_name) :
file_name_(file_name)
{
#ifdef _MSC_VER
- f_ = file_name.empty() ? stdin : fopen(file_name.c_str(), "rb");
+ f_ = (file_name.empty() || file_name == "-") ? stdin : fopen(file_name.c_str(), "rb");
#else
- int fd_ = file_name.empty() ? 0 : POSIX_OPEN2(file_name.c_str(), O_RDONLY);
+ int fd_ = (file_name.empty() || file_name == "-") ? 0 : POSIX_OPEN2(file_name.c_str(), O_RDONLY);
if (fd_ < 0) {
perror(0);
throw std::runtime_error(string("Error opening file ") + file_name);
=====================================
src/util/io/input_file.cpp
=====================================
@@ -41,7 +41,7 @@ InputFile::InputFile(const string &file_name, int flags) :
Deserializer(new InputStreamBuffer(new FileSource(file_name))),
file_name(file_name)
{
- if (file_name.empty())
+ if (file_name.empty() || file_name == "-")
return;
#ifndef _MSC_VER
struct stat buf;
=====================================
src/util/log_stream.h
=====================================
@@ -37,7 +37,7 @@ struct Message_stream
Message_stream& operator<<(const _t& x)
{
if(to_cout_)
- std::cout << x;
+ std::cerr << x;
if (to_file_) {
std::ofstream f("diamond.log", std::ios_base::out | std::ios_base::app);
f << x;
@@ -49,7 +49,7 @@ struct Message_stream
Message_stream& operator<<(std::ostream & (*_Pfn)(std::ostream&))
{
if(to_cout_)
- ((*_Pfn)(std::cout));
+ ((*_Pfn)(std::cerr));
if (to_file_) {
mtx.lock();
std::ofstream f("diamond.log", std::ios_base::out | std::ios_base::app);
=====================================
src/util/sequence/sequence.cpp
=====================================
@@ -0,0 +1,50 @@
+/****
+DIAMOND protein aligner
+Copyright (C) 2013-2018 Benjamin Buchfink <buchfink at gmail.com>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+****/
+
+#include <algorithm>
+#include <stdexcept>
+#include "sequence.h"
+
+using namespace std;
+
+namespace Util { namespace Sequence {
+
+void format(sequence seq, const char *id, const char *qual, OutputFile &out, const std::string &format, const Value_traits &value_traits) {
+ static TextBuffer buf;
+ constexpr size_t WRAP = 160;
+ const size_t l = seq.length();
+ if(format == "fasta") {
+ buf << '>' << id << '\n';
+ for (size_t i = 0; i < seq.length(); i += WRAP) {
+ seq.print(buf, i, min(i + WRAP, seq.length()), value_traits);
+ buf << '\n';
+ }
+ }
+ else if (format == "fastq") {
+ buf << '@' << id << '\n';
+ seq.print(buf, value_traits);
+ buf << "\n+\n";
+ buf << qual << '\n';
+ }
+ else
+ throw runtime_error("Invalid sequence file format");
+ out.write(buf.get_begin(), buf.size());
+ buf.clear();
+}
+
+}}
\ No newline at end of file
=====================================
src/util/sequence/sequence.h
=====================================
@@ -0,0 +1,32 @@
+/****
+DIAMOND protein aligner
+Copyright (C) 2013-2018 Benjamin Buchfink <buchfink at gmail.com>
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+****/
+
+#ifndef UTIL_SEQUENCE_SEQUENCE_H_
+#define UTIL_SEQUENCE_SEQUENCE_H_
+
+#include <string>
+#include "../../basic/sequence.h"
+#include "../io/output_file.h"
+
+namespace Util { namespace Sequence {
+
+void format(sequence seq, const char *id, const char *qual, OutputFile &out, const std::string &format, const Value_traits &value_traits);
+
+}}
+
+#endif
\ No newline at end of file
=====================================
src/util/system/system.cpp
=====================================
@@ -10,18 +10,16 @@ using namespace std;
#else
#include <unistd.h>
#include <sys/stat.h>
-#include <linux/limits.h>
#endif
string executable_path() {
-#ifdef _MSC_VER
char buf[4096];
+#ifdef _MSC_VER
if (GetModuleFileNameA(NULL, buf, sizeof(buf)) == 0)
throw runtime_error("Error executing GetModuleFileNameA.");
return string(buf);
#else
- char buf[PATH_MAX + 1];
- if (readlink("/proc/self/exe", buf, PATH_MAX) < 0)
+ if (readlink("/proc/self/exe", buf, sizeof(buf)) < 0)
throw runtime_error("Error executing readlink on /proc/self/exe.");
return string(buf);
#endif
=====================================
src/util/util.cpp
=====================================
@@ -70,7 +70,7 @@ const EscapeSequence EscapeSequences::xml_data[5] = {
const EscapeSequences EscapeSequences::XML(EscapeSequences::xml_data, 5);
-string join(char c, const vector<string> &v) {
+string join(const char *c, const vector<string> &v) {
string s;
if (v.empty())
return s;
=====================================
src/util/util.h
=====================================
@@ -526,6 +526,15 @@ inline set<unsigned> parse_csv(const string &s)
return r;
}
-std::string join(char c, const std::vector<std::string> &v);
+std::string join(const char *c, const std::vector<std::string> &v);
+
+template<typename _t, typename _f>
+auto apply(const std::vector<_t> &v, _f f) -> std::vector<typename std::result_of<_f(_t)>::type> {
+ std::vector<typename std::result_of<_f(_t)>::type> r;
+ r.reserve(v.size());
+ for (const auto &i : v)
+ r.push_back(f(i));
+ return r;
+}
#endif /* UTIL_H_ */
View it on GitLab: https://salsa.debian.org/med-team/diamond-aligner/commit/0238729ea8a318b983987b3e4ad3b030db5d4d8a
--
View it on GitLab: https://salsa.debian.org/med-team/diamond-aligner/commit/0238729ea8a318b983987b3e4ad3b030db5d4d8a
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20181229/e60b3793/attachment-0001.html>
More information about the debian-med-commit
mailing list