[med-svn] [diamond-aligner] 02/07: New upstream version 0.8.34+dfsg

Andreas Tille tille at debian.org
Fri Jan 27 11:04:12 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository diamond-aligner.

commit 982daa694aac70c8f06cda996335b132ef67dc4d
Author: Andreas Tille <tille at debian.org>
Date:   Wed Jan 25 21:19:34 2017 +0100

    New upstream version 0.8.34+dfsg
---
 CMakeLists.txt                       |   1 +
 README.rst                           |   4 +-
 build_simple.sh                      |   1 +
 diamond_manual.pdf                   | Bin 92606 -> 0 bytes
 src/ChangeLog                        |  11 ++
 src/align/align.cpp                  |  14 +-
 src/align/query_mapper.cpp           |  16 +-
 src/basic/basic.cpp                  |   2 +-
 src/basic/config.cpp                 |  33 +++-
 src/basic/config.h                   |   9 +-
 src/basic/const.h                    |   2 +-
 src/basic/hssp.cpp                   |   2 +
 src/basic/reduction.h                |   1 +
 src/basic/shape.h                    |  55 ++++++-
 src/basic/statistics.h               |   5 +-
 src/basic/value.h                    |   7 +
 src/data/frequent_seeds.h            |   2 +-
 src/data/load_seqs.h                 |  16 +-
 src/data/reference.h                 |  10 +-
 src/data/string_set.h                |   7 +
 src/dp/dp.h                          |   1 +
 src/dp/dp_matrix.h                   |   2 +
 src/dp/floating_sw.cpp               |   6 +-
 src/dp/growing_buffer.h              |   1 +
 src/dp/padded_banded_sw.cpp          |   2 +
 src/{basic/const.h => extra/extra.h} |  33 +---
 src/extra/model_sim.cpp              |  11 +-
 src/extra/opt.cpp                    | 307 +++++++++++++++++++++++++++++++++++
 src/output/daa_file.h                |   1 +
 src/output/join_blocks.cpp           |  25 ++-
 src/output/output.h                  |  11 +-
 src/run/benchmark.cpp                |  62 +++++--
 src/run/double_indexed.cpp           |  11 +-
 src/run/main.cpp                     |   4 +
 src/search/align_range.h             |  66 ++++++++
 src/search/search.cpp                |   3 -
 src/search/setup.cpp                 |   7 +-
 src/search/sse_dist.h                | 133 ---------------
 src/search/stage2.cpp                |  43 ++++-
 src/search/trace_pt_buffer.h         |   7 +-
 src/util/async_buffer.h              |  68 ++++----
 src/util/binary_file.h               |   3 +-
 src/util/double_buffer.h             |   1 +
 src/util/log_stream.h                |  20 ++-
 src/util/system.h                    |  28 ++++
 src/util/tinythread.cpp              |   2 +
 src/util/util.cpp                    |   1 +
 src/util/util.h                      |  14 +-
 48 files changed, 772 insertions(+), 299 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index c667855..5de19c1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,6 +78,7 @@ add_executable(diamond src/run/main.cpp
   src/output/sam_format.cpp
   src/align/align.cpp
   src/search/setup.cpp
+  src/extra/opt.cpp
 )
 
 target_link_libraries(diamond ${ZLIB_LIBRARY} ${CMAKE_THREAD_LIBS_INIT})
diff --git a/README.rst b/README.rst
index 11565a1..1fc2019 100644
--- a/README.rst
+++ b/README.rst
@@ -8,7 +8,7 @@ Please read the `manual <https://github.com/bbuchfink/diamond/raw/master/diamond
 
 Installing the software on your system may be done by downloading it in binary format for immediate use::
 
-    wget http://github.com/bbuchfink/diamond/releases/download/v0.8.31/diamond-linux64.tar.gz
+    wget http://github.com/bbuchfink/diamond/releases/download/v0.8.34/diamond-linux64.tar.gz
     tar xzf diamond-linux64.tar.gz
 
 The extracted ``diamond`` binary file should be moved to a directory contained in your executable search path (PATH environment variable).
@@ -28,7 +28,7 @@ The output file here is specified with the ``–o`` option and named ``matches.m
 *Note*:
   - The program may use quite a lot of memory and also temporary disk space. Should the program fail due to running out of either one, you need to set a lower value for the block size parameter ``-b`` (see the `manual <https://github.com/bbuchfink/diamond/raw/master/diamond_manual.pdf>`_).
   - The default (fast) mode was mainly designed for short reads. For longer sequences, the sensitive modes (options ``--sensitive`` or ``--more-sensitive``) are recommended.
-  - The full speed of the program is only attained on large query datasets. It is currently not efficient in mapping a smaller number of query sequences.
+  - The runtime of the program is not linear in the size of the query file and it is much more efficient for large query files (> 1 million sequences) than for smaller ones.
   - The default e-value cutoff of DIAMOND is 0.001 while that of BLAST is 10, so by default the program will search a lot more stringently than BLAST and not report weak hits.  
 About
 =====
diff --git a/build_simple.sh b/build_simple.sh
index c20e281..4d93287 100755
--- a/build_simple.sh
+++ b/build_simple.sh
@@ -46,4 +46,5 @@ g++ -DNDEBUG -O3 -mssse3 -Wno-deprecated-declarations -std=gnu++98 $1 \
   src/output/sam_format.cpp \
   src/align/align.cpp \
   src/search/setup.cpp \
+  src/extra/opt.cpp \
 -lz -lpthread -o diamond
diff --git a/diamond_manual.pdf b/diamond_manual.pdf
deleted file mode 100644
index c1d0538..0000000
Binary files a/diamond_manual.pdf and /dev/null differ
diff --git a/src/ChangeLog b/src/ChangeLog
index fa5261c..8b304d0 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,14 @@
+[0.8.34]
+- fixed a compiler error
+
+[0.8.33]
+- modified option --no-self-hits to also require matching sequence titles for filtering of a self hit
+- fixed a bug that could cause a crash in the `joining output blocks` stage
+
+[0.8.32]
+- improved speed and sensitivity
+- fixed an issue that could cause too high memory usage in certain cases
+
 [0.8.31]
 - renamed option --run-len to --min-orf
 - added compositional score adjustments (option --comp-based-stats (0,1), enabled by default)
diff --git a/src/align/align.cpp b/src/align/align.cpp
index 9be8d2b..3d5745a 100644
--- a/src/align/align.cpp
+++ b/src/align/align.cpp
@@ -143,21 +143,23 @@ void align_worker(Output_stream *out)
 
 void align_queries(const Trace_pt_buffer &trace_pts, Output_stream* output_file)
 {
-	Trace_pt_list v;
 	query_queue.last_query = (unsigned)-1;
 	for (unsigned bin = 0; bin < trace_pts.bins(); ++bin) {
 		log_stream << "Processing query bin " << bin + 1 << '/' << trace_pts.bins() << '\n';
 		task_timer timer("Loading trace points", 3);
-		statistics.max(Statistics::TEMP_SPACE, trace_pts.load(v, bin));
+		Trace_pt_list *v = new Trace_pt_list;
+		statistics.max(Statistics::TEMP_SPACE, trace_pts.load(*v, bin));
 		timer.go("Sorting trace points");
-		merge_sort(v.begin(), v.end(), config.threads_);
-		v.init();
+		merge_sort(v->begin(), v->end(), config.threads_);
+		v->init();
 		timer.go("Computing alignments");
-		query_queue.init(v.begin(), v.end());
+		query_queue.init(v->begin(), v->end());
 		Thread_pool threads;
 		for (unsigned i = 0; i < config.threads_; ++i)
 			threads.push_back(launch_thread(align_worker, output_file));
 		threads.join_all();
+		timer.go("Deallocating buffers");
+		delete v;
 	}
 	if (!blocked_processing && *output_format != Output_format::daa && config.report_unaligned != 0) {
 		Text_buffer buf;
@@ -167,4 +169,4 @@ void align_queries(const Trace_pt_buffer &trace_pts, Output_stream* output_file)
 		}
 		output_file->write(buf.get_begin(), buf.size());
 	}
-}
+}
\ No newline at end of file
diff --git a/src/align/query_mapper.cpp b/src/align/query_mapper.cpp
index 8b803b5..15efc45 100644
--- a/src/align/query_mapper.cpp
+++ b/src/align/query_mapper.cpp
@@ -137,9 +137,10 @@ bool Query_mapper::generate_output(Text_buffer &buffer, Statistics &stat)
 	unsigned n_hsp = 0, n_target_seq = 0, hit_hsps = 0;
 	const unsigned top_score = targets[0].filter_score, query_len = (unsigned)query_seq(0).length();
 	size_t seek_pos = 0;
+	const char *query_title = query_ids::get()[query_id].c_str();
 
 	for (size_t i = 0; i < targets.size(); ++i) {
-		if (config.min_bit_score == 0 && score_matrix.evalue(targets[i].filter_score, config.db_size, query_len) > config.max_evalue
+		if ((config.min_bit_score == 0 && score_matrix.evalue(targets[i].filter_score, config.db_size, query_len) > config.max_evalue)
 			|| score_matrix.bitscore(targets[i].filter_score) < config.min_bit_score)
 			break;
 
@@ -152,10 +153,11 @@ bool Query_mapper::generate_output(Text_buffer &buffer, Statistics &stat)
 		for (list<Hsp_data>::iterator j = targets[i].hsps.begin(); j != targets[i].hsps.end(); ++j) {
 			if (hit_hsps >= config.max_hsps)
 				break;
+			const char *ref_title = ref_ids::get()[targets[i].subject_id].c_str();
 			if (j->id_percent() < config.min_id
 				|| j->query_cover_percent(source_query_len) < config.query_cover
 				|| j->subject_cover_percent(subject_len) < config.subject_cover
-				|| (config.no_self_hits && j->identities == j->length && j->query_source_range.length() == source_query_len && j->subject_range.length() == subject_len))
+				|| (config.no_self_hits && j->identities == j->length && j->query_source_range.length() == source_query_len && j->subject_range.length() == subject_len && strcmp(query_title, ref_title) == 0))
 				continue;
 
 			if (blocked_processing) {
@@ -166,9 +168,9 @@ bool Query_mapper::generate_output(Text_buffer &buffer, Statistics &stat)
 			else {
 				if (n_hsp == 0) {
 					if (*output_format == Output_format::daa)
-						seek_pos = write_daa_query_record(buffer, query_ids::get()[query_id].c_str(), align_mode.query_translated ? query_source_seqs::get()[query_id] : query_seqs::get()[query_id]);
+						seek_pos = write_daa_query_record(buffer, query_title, align_mode.query_translated ? query_source_seqs::get()[query_id] : query_seqs::get()[query_id]);
 					else
-						output_format->print_query_intro(query_id, query_ids::get()[query_id].c_str(), source_query_len, buffer, false);
+						output_format->print_query_intro(query_id, query_title, source_query_len, buffer, false);
 				}
 				if (*output_format == Output_format::daa)
 					write_daa_record(buffer, *j, query_id, targets[i].subject_id);
@@ -177,10 +179,10 @@ bool Query_mapper::generate_output(Text_buffer &buffer, Statistics &stat)
 						query_id,
 						query_seq(j->frame),
 						query_source_seq(),
-						query_ids::get()[query_id].c_str(),
+						query_title,
 						targets[i].subject_id,
 						targets[i].subject_id,
-						ref_ids::get()[targets[i].subject_id].c_str(),
+						ref_title,
 						subject_len,
 						n_target_seq,
 						hit_hsps), buffer);
@@ -207,7 +209,7 @@ bool Query_mapper::generate_output(Text_buffer &buffer, Statistics &stat)
 			Intermediate_record::finish_query(buffer, seek_pos);
 	}
 	else if (!blocked_processing && *output_format != Output_format::daa && config.report_unaligned != 0) {
-		output_format->print_query_intro(query_id, query_ids::get()[query_id].c_str(), source_query_len, buffer, true);
+		output_format->print_query_intro(query_id, query_title, source_query_len, buffer, true);
 		output_format->print_query_epilog(buffer, true);
 	}
 
diff --git a/src/basic/basic.cpp b/src/basic/basic.cpp
index ca4338d..4d32aaa 100644
--- a/src/basic/basic.cpp
+++ b/src/basic/basic.cpp
@@ -23,7 +23,7 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include "statistics.h"
 #include "sequence.h"
 
-const char* Const::version_string = "0.8.31";
+const char* Const::version_string = "0.8.34";
 const char* Const::program_name = "diamond";
 const char* Const::id_delimiters = " \a\b\f\n\r\t\v";
 
diff --git a/src/basic/config.cpp b/src/basic/config.cpp
index c480bad..8b925a2 100644
--- a/src/basic/config.cpp
+++ b/src/basic/config.cpp
@@ -52,7 +52,8 @@ Config::Config(int argc, const char **argv)
 		.add_command("dbstat", "")
 		.add_command("modelsim", "")
 		.add_command("match-file-stat", "")
-		.add_command("model-seqs", "");
+		.add_command("model-seqs", "")
+		.add_command("opt", "");
 
 	Options_group general("General options");
 	general.add()
@@ -135,6 +136,7 @@ Config::Config(int argc, const char **argv)
 
 	Options_group advanced("Advanced options");
 	advanced.add()
+		("bin", 0, "number of query bins for seed search", query_bins, 16u)
 		("min-orf", 'l', "ignore translated sequences without an open reading frame of at least this length", run_len)
 		("freq-sd", 0, "number of standard deviations for ignoring frequent seeds", freq_sd, 0.0)
 		("id2", 0, "minimum number of identities for stage 1 hit", min_identities)
@@ -183,8 +185,19 @@ Config::Config(int argc, const char **argv)
 		("neighborhood-score", 0, "", neighborhood_score)
 		("algo", 0, "", algo, 0u)
 		("seed-weight", 'w', "", seed_weight, 7u)
-		("very-sensitive", 0, "", mode_very_sensitive);
-
+		("very-sensitive", 0, "", mode_very_sensitive)
+		("idl", 0, "", id_left)
+		("idr", 0, "", id_right)
+		("idn", 0, "", id_n)
+		("bmatch", 0, "", bmatch)
+		("bmismatch", 0, "", bmismatch)
+		("bcutoff", 0, "", bcutoff)
+		("ants", 0, "", n_ants, uint64_t(100))
+		("rho", 0, "", rho, 0.99)
+		("p_best", 0, "", p_best, 0.05)
+		("d_exp", 0, "", d_exp, 1.0)
+		("d_new", 0, "", d_new, 1.0);
+		
 	parser.add(general).add(makedb).add(aligner).add(advanced).add(view_options).add(getseq_options).add(hidden_options);
 	parser.store(argc, argv, command);
 
@@ -244,13 +257,20 @@ Config::Config(int argc, const char **argv)
 		message_stream = Message_stream(false);
 		break;
 	case 3:
-		log_stream = Message_stream();
+		log_stream = Message_stream(true, true);
+		verbose_stream = Message_stream(true, true);
+		message_stream = Message_stream(true, true);
+		break;
 	case 2:
 		verbose_stream = Message_stream();
 	default:
 		;
 	}
 
+	for (int i = 0; i < argc; ++i)
+		log_stream << argv[i] << ' ';
+	log_stream << endl;
+
 	if (!no_auto_append) {
 		auto_append_extension(database, ".dmnd");
 		if (command == Config::view)
@@ -276,7 +296,7 @@ Config::Config(int argc, const char **argv)
 		;
 	}
 
-	if (command == Config::blastp || command == Config::blastx || command == Config::benchmark || command == Config::model_sim) {
+	if (command == Config::blastp || command == Config::blastx || command == Config::benchmark || command == Config::model_sim || command == Config::opt) {
 		if (tmpdir == "")
 			tmpdir = extract_dir(output_file);
 		if (gap_open == -1)
@@ -330,9 +350,6 @@ Config::Config(int argc, const char **argv)
 		}
 		else
 			message_stream << max_alignments << endl;
-
-		if(mode_very_sensitive)
-			set_option(chunk_size, 0.4);
 	}
 
 	Translator::init(query_gencode);
diff --git a/src/basic/config.h b/src/basic/config.h
index c6fe486..8398234 100644
--- a/src/basic/config.h
+++ b/src/basic/config.h
@@ -121,10 +121,17 @@ struct Config
 	bool mode_very_sensitive;
 	unsigned max_hsps;
 	bool no_self_hits;
+	unsigned id_left, id_right, id_n;
+	int bmatch, bmismatch, bcutoff;
+	unsigned query_bins;
+	uint64_t n_ants;
+	double rho;
+	double p_best;
+	double d_exp, d_new;
 
 	enum {
 		makedb = 0, blastp = 1, blastx = 2, view = 3, help = 4, version = 5, getseq = 6, benchmark = 7, random_seqs = 8, compare = 9, sort = 10, roc = 11, db_stat = 12, model_sim = 13,
-		match_file_stat = 14, model_seqs = 15
+		match_file_stat = 14, model_seqs = 15, opt = 16
 	};
 	unsigned	command;
 
diff --git a/src/basic/const.h b/src/basic/const.h
index ef72f05..386460a 100644
--- a/src/basic/const.h
+++ b/src/basic/const.h
@@ -23,7 +23,7 @@ struct Const
 {
 
 	enum {
-		build_version = 93,
+		build_version = 96,
 		daa_version = 0,
 		seedp_bits = 10,
 		seedp = 1<<seedp_bits,
diff --git a/src/basic/hssp.cpp b/src/basic/hssp.cpp
index 518a526..4030b14 100644
--- a/src/basic/hssp.cpp
+++ b/src/basic/hssp.cpp
@@ -92,6 +92,8 @@ Hsp_context& Hsp_context::parse()
 
 	for (; i.good(); ++i) {
 		++hsp_.length;
+		if (i.query_pos >= query.length())
+			throw std::runtime_error("Query sequence index out of bounds.");
 		switch (i.op()) {
 		case op_match:
 			++hsp_.identities;
diff --git a/src/basic/reduction.h b/src/basic/reduction.h
index 41cc45a..7bf6420 100644
--- a/src/basic/reduction.h
+++ b/src/basic/reduction.h
@@ -79,6 +79,7 @@ struct Reduction
 
 	static void reduce_seq(const sequence &seq, vector<char> &dst)
 	{
+		dst.clear();
 		dst.resize(seq.length());
 		for (unsigned i = 0; i < seq.length(); ++i)
 			dst[i] = reduction(seq[i]);
diff --git a/src/basic/shape.h b/src/basic/shape.h
index ec5ceb6..21187f4 100644
--- a/src/basic/shape.h
+++ b/src/basic/shape.h
@@ -43,6 +43,46 @@ bool include_partition<Filter_partition>(unsigned p)
 	return current_range.contains(p);
 }*/
 
+
+struct Letter_trail
+{
+	Letter_trail()
+	{
+		bucket[0] = 0;
+		for (int i = 1; i < 20; ++i)
+			bucket[i] = -1;
+	}
+	Letter_trail(const Reduction &reduction)
+	{
+		for (int i = 0; i < 20; ++i)
+			bucket[i] = reduction(i);
+	}
+	int operator()(char l) const
+	{
+		return bucket[(long)l];
+	}
+	int next() const
+	{
+		for (int i = 0; i < 20; ++i)
+			if (bucket[i] == -1)
+				return i;
+		return -1;
+	}
+	int buckets() const
+	{
+		int m = 0;
+		for (int i = 0; i < 20; ++i)
+			m = std::max(m, bucket[i]);
+		return m + 1;
+	}
+	double background_p() const;
+	friend std::ostream& operator<<(std::ostream &s, const Letter_trail &t);
+	int bucket[20];
+};
+
+#define OPT_W 7
+typedef Letter_trail Trail[OPT_W];
+
 struct shape
 {
 
@@ -141,8 +181,21 @@ struct shape
 		return score;
 	}
 
+	bool hit(const Letter *x, const Letter *y, const Trail &trail) const
+	{
+		for (unsigned i = 0; i < weight_; ++i)
+#if OPT_W==1
+			if (trail[0](x[positions_[i]]) != trail[0](y[positions_[i]]))
+				return false;
+#else
+			if (trail[i](x[positions_[i]]) != trail[i](y[positions_[i]]))
+				return false;
+#endif
+		return true;
+	}
+
 	uint32_t length_, weight_, positions_[Const::max_seed_weight], d_, mask_, rev_mask_, id_;
 
 };
 
-#endif /* SHAPE_H_ */
+#endif /* SHAPE_H_ */
\ No newline at end of file
diff --git a/src/basic/statistics.h b/src/basic/statistics.h
index 2326107..7d6ad92 100644
--- a/src/basic/statistics.h
+++ b/src/basic/statistics.h
@@ -32,7 +32,7 @@ typedef uint64_t stat_type;
 struct Statistics
 {
 
-	enum value { SEED_HITS, TENTATIVE_MATCHES0, TENTATIVE_MATCHES1, TENTATIVE_MATCHES2, TENTATIVE_MATCHES3, TENTATIVE_MATCHES4, MATCHES, ALIGNED, GAPPED, DUPLICATES,
+	enum value { SEED_HITS, TENTATIVE_MATCHES0, TENTATIVE_MATCHES1, TENTATIVE_MATCHES2, TENTATIVE_MATCHES3, TENTATIVE_MATCHES4, TENTATIVE_MATCHESX, MATCHES, ALIGNED, GAPPED, DUPLICATES,
 		GAPPED_HITS, QUERY_SEEDS, QUERY_SEEDS_HIT, REF_SEEDS, REF_SEEDS_HIT, QUERY_SIZE, REF_SIZE, OUT_HITS, OUT_MATCHES, COLLISION_LOOKUPS, QCOV, BIAS_ERRORS, SCORE_TOTAL, ALIGNED_QLEN, PAIRWISE, HIGH_SIM,
 		TEMP_SPACE, SECONDARY_HITS, ERASED_HITS, COUNT };
 
@@ -65,7 +65,8 @@ struct Statistics
 		log_stream << "Traceback errors = " << data_[BIAS_ERRORS] << endl;
 		verbose_stream << "Hits (filter stage 0) = " << data_[SEED_HITS] << endl;
 		verbose_stream << "Hits (filter stage 1) = " << data_[TENTATIVE_MATCHES1] << " (" << data_[TENTATIVE_MATCHES1]*100.0/ data_[SEED_HITS] << " %)" << endl;
-		verbose_stream << "Hits (filter stage 2) = " << data_[TENTATIVE_MATCHES2] << " (" << data_[TENTATIVE_MATCHES2] * 100.0 / data_[TENTATIVE_MATCHES1] << " %)" << endl;
+		verbose_stream << "Hits (filter stage x) = " << data_[TENTATIVE_MATCHESX] << " (" << data_[TENTATIVE_MATCHESX] * 100.0 / data_[TENTATIVE_MATCHES1] << " %)" << endl;
+		verbose_stream << "Hits (filter stage 2) = " << data_[TENTATIVE_MATCHES2] << " (" << data_[TENTATIVE_MATCHES2] * 100.0 / data_[TENTATIVE_MATCHESX] << " %)" << endl;
 		verbose_stream << "Hits (filter stage 3) = " << data_[TENTATIVE_MATCHES3] << " (" << data_[TENTATIVE_MATCHES3] * 100.0 / data_[TENTATIVE_MATCHES2] << " %)" << endl;
 		verbose_stream << "Hits (filter stage 4) = " << data_[TENTATIVE_MATCHES4] << " (" << data_[TENTATIVE_MATCHES4] * 100.0 / data_[TENTATIVE_MATCHES3] << " %)" << endl;
 		log_stream << "Gapped hits = " << data_[GAPPED_HITS] << endl;
diff --git a/src/basic/value.h b/src/basic/value.h
index 7ee3867..d814ac3 100644
--- a/src/basic/value.h
+++ b/src/basic/value.h
@@ -22,6 +22,7 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include <assert.h>
 #include <string.h>
 #include <sstream>
+#include <stdexcept>
 #include "const.h"
 #include "config.h"
 #include "../util/util.h"
@@ -96,6 +97,12 @@ struct Align_mode
 {
 	Align_mode(unsigned mode);
 	static unsigned from_command(unsigned command);
+	unsigned check_context(unsigned i) const
+	{
+		if (i >= query_contexts)
+			throw std::runtime_error("Sequence context is out of bounds.");
+		return i;
+	}
 	enum { blastp = 2, blastx = 3, blastn = 4 };
 	Sequence_type sequence_type, input_sequence_type;
 	unsigned mode, query_contexts;
diff --git a/src/data/frequent_seeds.h b/src/data/frequent_seeds.h
index 522651f..f9de6d4 100644
--- a/src/data/frequent_seeds.h
+++ b/src/data/frequent_seeds.h
@@ -38,7 +38,7 @@ struct Frequent_seeds
 
 private:
 
-	static const double hash_table_factor;
+	static const double hash_table_factor;   
 
 	struct Build_context;
 
diff --git a/src/data/load_seqs.h b/src/data/load_seqs.h
index 29be206..c9ded80 100644
--- a/src/data/load_seqs.h
+++ b/src/data/load_seqs.h
@@ -66,15 +66,13 @@ inline size_t load_seqs(Input_stream &file,
 	size_t letters = 0, n = 0;
 	vector<Letter> seq;
 	vector<char> id;
-	try {
-		while(letters < max_letters && format.get_seq(id, seq, file)) {
-			ids->push_back(id);
-			letters += push_seq(**seqs, *source_seqs, seq);
-			++n;
-		}
-	} catch(invalid_sequence_char_exception &e) {
-		std::cerr << n << endl;
-		throw e;
+	
+	while (letters < max_letters && format.get_seq(id, seq, file)) {
+		ids->push_back(id);
+		letters += push_seq(**seqs, *source_seqs, seq);
+		++n;
+		if ((*seqs)->get_length() > std::numeric_limits<int>::max())
+			throw std::runtime_error("Number of sequences in file exceeds supported maximum.");
 	}
 	ids->finish_reserve();
 	(*seqs)->finish_reserve();
diff --git a/src/data/reference.h b/src/data/reference.h
index e215ba7..c12344f 100644
--- a/src/data/reference.h
+++ b/src/data/reference.h
@@ -137,13 +137,13 @@ struct Ref_map
 		const unsigned block = current_ref_block;
 		if(data_.size() < block+1) {
 			data_.resize(block+1);
-			data_[block].insert(data_[block].end(), ref_count, std::numeric_limits<unsigned>::max());
+			data_[block].insert(data_[block].end(), ref_count, std::numeric_limits<uint32_t>::max());
 		}
 	}
 	uint32_t get(unsigned block, unsigned i)
 	{
 		uint32_t n = data_[block][i];
-		if(n != std::numeric_limits<unsigned>::max())
+		if(n != std::numeric_limits<uint32_t>::max())
 			return n;
 		{
 			mtx_.lock();
@@ -187,6 +187,12 @@ struct Ref_map
 	{
 		return rev_map_[i];
 	}
+	uint32_t check_id(uint32_t i) const
+	{
+		if (i >= next_)
+			throw std::runtime_error("Dictionary reference id out of bounds.");
+		return i;
+	}
 private:
 	tthread::mutex mtx_;
 	vector<vector<uint32_t> > data_;
diff --git a/src/data/string_set.h b/src/data/string_set.h
index 62ab8f3..d66d7e9 100644
--- a/src/data/string_set.h
+++ b/src/data/string_set.h
@@ -72,6 +72,13 @@ struct String_set
 	const _t* ptr(size_t i) const
 	{ return &data_[limits_[i]]; }
 
+	size_t check_idx(size_t i) const
+	{
+		if (limits_.size() < i + 2)
+			throw std::runtime_error("Sequence set index out of bounds.");
+		return i;
+	}
+
 	size_t length(size_t i) const
 	{ return limits_[i+1] - limits_[i] - _padding; }
 
diff --git a/src/dp/dp.h b/src/dp/dp.h
index f01ca80..94454b5 100644
--- a/src/dp/dp.h
+++ b/src/dp/dp.h
@@ -63,6 +63,7 @@ struct Fixed_score_buffer
 	inline void init(size_t col_size, size_t cols, _t init)
 	{
 		col_size_ = col_size;
+		data_.clear();
 		data_.reserve(col_size * cols);
 		data_.resize(col_size);
 		for (size_t i = 0; i<col_size; ++i)
diff --git a/src/dp/dp_matrix.h b/src/dp/dp_matrix.h
index 1d7ea0e..6f1f760 100644
--- a/src/dp/dp_matrix.h
+++ b/src/dp/dp_matrix.h
@@ -73,7 +73,9 @@ struct DP_matrix
 		scores_ (TLS::get(scores_ptr)),
 		hgap_ (TLS::get(hgap_ptr))
 	{
+		scores_.clear();
 		scores_.resize(2*band+1);
+		hgap_.clear();
 		hgap_.resize(2*band+2);
 		hgap_front_ = &hgap_.front();
 		score_front_ = &scores_.front();
diff --git a/src/dp/floating_sw.cpp b/src/dp/floating_sw.cpp
index 045da25..00f5a1a 100644
--- a/src/dp/floating_sw.cpp
+++ b/src/dp/floating_sw.cpp
@@ -26,8 +26,8 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include "../align/align.h"
 #include "dp.h"
 
-template<typename _score, typename _traceback> TLS_PTR typename Score_buffer<_score,_traceback>::Type* Scalar_dp_matrix<_score,_traceback>::score_ptr = 0;
-template<typename _score, typename _traceback> TLS_PTR Double_buffer<_score>* Scalar_dp_matrix<_score,_traceback>::hgap_ptr = 0;
+template<typename _score, typename _traceback> TLS_PTR typename Score_buffer<_score, _traceback>::Type* Scalar_dp_matrix<_score, _traceback>::score_ptr = 0;
+template<typename _score, typename _traceback> TLS_PTR Double_buffer<_score>* Scalar_dp_matrix<_score, _traceback>::hgap_ptr = 0;
 
 template struct Scalar_dp_matrix<int, Traceback>;
 
@@ -120,7 +120,7 @@ Hsp_data floating_sw_dir(const Letter *query, const Letter* subject, int band, _
 template<typename _score, typename _traceback, typename _score_correction>
 void floating_sw(const Letter *query, const Letter *subject, Hsp_data &segment, int band, _score xdrop, _score gap_open, _score gap_extend, uint64_t &cell_updates, unsigned query_anchor, unsigned subject_anchor, const _score_correction &score_correction, const _traceback&, const _score&)
 {
-	segment.merge(floating_sw_dir<Right, _score, _traceback, _score_correction>(query + 1, subject + 1, band, xdrop, gap_open, gap_extend, cell_updates, score_correction, query_anchor+1),
+	segment.merge(floating_sw_dir<Right, _score, _traceback, _score_correction>(query + 1, subject + 1, band, xdrop, gap_open, gap_extend, cell_updates, score_correction, query_anchor + 1),
 		floating_sw_dir<Left, _score, _traceback, _score_correction>(query, subject, band, xdrop, gap_open, gap_extend, cell_updates, score_correction, query_anchor), query_anchor, subject_anchor);
 }
 
diff --git a/src/dp/growing_buffer.h b/src/dp/growing_buffer.h
index 7efc748..f405dcb 100644
--- a/src/dp/growing_buffer.h
+++ b/src/dp/growing_buffer.h
@@ -32,6 +32,7 @@ struct Growing_buffer
 	inline void init(size_t size, size_t padding, size_t padding_front, _t init)
 	{
 		const size_t total = size + padding;
+		data_.clear();
 		data_.resize(total);
 		col_size_ = total;
 		for(size_t i=0;i<total;++i)
diff --git a/src/dp/padded_banded_sw.cpp b/src/dp/padded_banded_sw.cpp
index 3c67607..3e01ebc 100644
--- a/src/dp/padded_banded_sw.cpp
+++ b/src/dp/padded_banded_sw.cpp
@@ -66,7 +66,9 @@ struct Padded_banded_DP_matrix
 		scores_(TLS::get(scores_ptr)),
 		hgap_(TLS::get(hgap_ptr))
 	{
+		scores_.clear();
 		scores_.resize(2 * band + 1);
+		hgap_.clear();
 		hgap_.resize(2 * band + 2);
 		hgap_front_ = &hgap_.front();
 		score_front_ = &scores_.front();
diff --git a/src/basic/const.h b/src/extra/extra.h
similarity index 75%
copy from src/basic/const.h
copy to src/extra/extra.h
index ef72f05..69f4428 100644
--- a/src/basic/const.h
+++ b/src/extra/extra.h
@@ -1,5 +1,5 @@
 /****
-Copyright (c) 2016, Benjamin Buchfink
+Copyright (c) 2017, Benjamin Buchfink
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
@@ -16,31 +16,12 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ****/
 
-#ifndef CONST_H_
-#define CONST_H_
+#include <vector>
+#include "../basic/sequence.h"
 
-struct Const
-{
+using std::vector;
 
-	enum {
-		build_version = 93,
-		daa_version = 0,
-		seedp_bits = 10,
-		seedp = 1<<seedp_bits,
-		max_seed_weight = 32,
-		max_shapes = 16,
-		max_shape_len = 32
-	};
+void get_random_seq(vector<Letter> &seq);
+void get_related_seq(const sequence &seq, vector<Letter> &out, double id);
 
-	static const char* version_string;
-	static const char* program_name;
-	static const char* id_delimiters;
-
-};
-
-#define SIMPLE_SEARCH
-// #define FREQUENCY_MASKING
-// #define ST_JOIN
-// #define NO_COLLISION_FILTER
-
-#endif /* CONST_H_ */
+extern const double subst_freq[20][20];
\ No newline at end of file
diff --git a/src/extra/model_sim.cpp b/src/extra/model_sim.cpp
index 06a1453..345ad09 100644
--- a/src/extra/model_sim.cpp
+++ b/src/extra/model_sim.cpp
@@ -20,6 +20,7 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include <fstream>
 #include "../basic/sequence.h"
 #include "../basic/shape_config.h"
+#include "extra.h"
 
 struct MSS_model
 {
@@ -269,10 +270,8 @@ void get_random_seq(vector<Letter> &seq)
 		*i = get_distribution<20>(background_freq);
 }
 
-void get_related_seq(const sequence &seq, vector<Letter> &out, double id)
-{
-	static const double p[20][20] = {
-	{0, 0.051599, 0.0319706, 0.0472072, 0.0259005, 0.042533, 0.0794012, 0.104447, 0.0190464, 0.0529913, 0.0806505, 0.0561607, 0.023709, 0.0243562, 0.0447981, 0.119439, 0.0710634, 0.00618423, 0.019181, 0.099362, },
+const double subst_freq[20][20] = {
+	{ 0, 0.051599, 0.0319706, 0.0472072, 0.0259005, 0.042533, 0.0794012, 0.104447, 0.0190464, 0.0529913, 0.0806505, 0.0561607, 0.023709, 0.0243562, 0.0447981, 0.119439, 0.0710634, 0.00618423, 0.019181, 0.099362, },
 	{ 0.0836991,0,0.0557221,0.0526752,0.00570959,0.0834397,0.0992451,0.0474531,0.0360983,0.0290852,0.0591073,0.18726,0.0187483,0.0177066,0.03043,0.062954,0.0596622,0.00896391,0.0252332,0.036807, },
 	{ 0.0688047,0.0737048,0,0.134642,0.0104452,0.0556231,0.0811654,0.0970697,0.0503946,0.0214787,0.0370703,0.069479,0.0150052,0.0171298,0.0255345,0.114536,0.0729503,0.00573275,0.021037,0.0281966, },
 	{ 0.0734774,0.0538258,0.10774,0,0.00541203,0.0638292,0.220103,0.0852837,0.0313663,0.0145824,0.0242017,0.0687326,0.00866069,0.0125848,0.0451219,0.0885509,0.0562425,0.0041692,0.0139378,0.0221784, },
@@ -293,11 +292,13 @@ void get_related_seq(const sequence &seq, vector<Letter> &out, double id)
 	{ 0.0629104,0.0481246,0.0279043,0.0248727,0.0132862,0.0319342,0.0336743,0.0255073,0.0636279,0.0693312,0.123129,0.03326,0.0336581,0.1923,0.0183125,0.0405417,0.039996,0.0424192,0,0.0752103, },
 	{ 0.122866,0.0265131,0.0140538,0.0140485,0.020662,0.0200589,0.0307184,0.022685,0.010223,0.253759,0.190633,0.0272914,0.0436694,0.0420276,0.0241076,0.034372,0.0705984,0.00696795,0.0247446,0, } };
 
+void get_related_seq(const sequence &seq, vector<Letter> &out, double id)
+{
 	for (unsigned i = 0; i < seq.length(); ++i)
 		if ((double)rand() / RAND_MAX < id)
 			out[i] = seq[i];
 		else
-			out[i] = get_distribution<20>(p[(size_t)seq[i]]);
+			out[i] = get_distribution<20>(subst_freq[(size_t)seq[i]]);
 }
 
 template<typename _model>
diff --git a/src/extra/opt.cpp b/src/extra/opt.cpp
new file mode 100644
index 0000000..5f8366e
--- /dev/null
+++ b/src/extra/opt.cpp
@@ -0,0 +1,307 @@
+/****
+Copyright (c) 2017, Benjamin Buchfink
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+****/
+
+#include <string.h>
+#include <time.h>
+#include "../util/util.h"
+#include "../basic/shape_config.h"
+#include "extra.h"
+#include "../util/log_stream.h"
+#include "../util/util.h"
+#include "../util/thread.h"
+
+double Letter_trail::background_p() const
+{
+	double p = 0;
+	for (int i = 0; i < 20; ++i)
+		for (int j = 0; j < 20; ++j)
+			if (bucket[i] == bucket[j])
+				p += background_freq[i] * background_freq[j];
+	return p;
+}
+
+double background_p(const Trail& t)
+{
+	double p = 1.0;
+	for (int pos = 0; pos < OPT_W; ++pos)
+		p *= t[pos].background_p();
+	return p;
+}
+
+std::ostream& operator<<(std::ostream &s, const Letter_trail &t)
+{
+	const int buckets = t.buckets();
+	for (int i = 0; i < buckets; ++i) {
+		for (int j = 0; j < 20; ++j)
+			if (t.bucket[j] == i)
+				s << value_traits.alphabet[j];
+		s << ' ';
+	}
+	return s;
+}
+
+double tau_min, tau_max;
+
+void set_limits(double &x)
+{
+	x = std::min(std::max(x, tau_min), tau_max);
+	//x = std::max(x, tau_min);
+}
+
+const size_t max_ants = 10000;
+Trail ants[max_ants];
+double sens[max_ants];
+
+struct Trails
+{
+
+	Trails()
+	{
+		for (int i = 0; i < 20; ++i)
+			for (int j = 0; j < 20; ++j)
+				for (int p = 0; p < OPT_W; ++p)
+					pheromone[p][i][j] = 100.0;
+	}
+
+	double delta_tau(int pos, int i, int j) const
+	{
+		//return pheromone[pos][i][j] * pow(subst_freq[i][j], config.d_exp);
+		return pheromone[pos][i][j] * pow(std::max(score_matrix(i, j) + 1, 0), config.d_exp);
+	}
+
+	double delta_tau0(int pos, int i) const
+	{
+		return pheromone[pos][i][i] * pow(config.d_new, config.d_exp);
+	}
+
+	Letter_trail get(int pos) const
+	{
+		Letter_trail t;
+		int next = 0;
+		double p[20];
+		while (true) {
+
+			double sum = delta_tau0(pos, next);
+			for (int i = next + 1; i < 20; ++i)
+				if (t.bucket[i] == -1)
+					sum += delta_tau(pos, next, i);
+
+			memset(p, 0, sizeof(p));
+			p[next] = delta_tau0(pos, next) / sum;
+			for (int i = next + 1; i < 20; ++i)
+				if (t.bucket[i] == -1)
+					p[i] = delta_tau(pos, next, i) / sum;
+
+			int i = get_distribution<20>(p);
+
+			if (i == next) {
+				const int bucket = t.bucket[next];
+				next = t.next();
+				if (next != -1)
+					t.bucket[next] = bucket + 1;
+				else
+					break;
+			}
+			else {
+				t.bucket[i] = t.bucket[next];
+				next = i;
+			}
+
+		}
+		return t;
+	}
+
+	void get(Trail &out) const
+	{
+		for (int pos = 0; pos < OPT_W; ++pos)
+			out[pos] = get(pos);
+	}
+
+	void update(const Letter_trail &t, int pos, double sens)
+	{
+		const int buckets = t.buckets();
+		vector<vector<int> > v(buckets);
+		for (int i = 0; i < 20; ++i)
+			v[t.bucket[i]].push_back(i);
+		for (int i = 0; i < buckets; ++i) {
+			int j;
+			for (j = 0; j < (int)v[i].size() - 1; ++j) {
+				pheromone[pos][v[i][j]][v[i][j + 1]] += sens;
+				set_limits(pheromone[pos][v[i][j]][v[i][j + 1]]);
+			}
+			pheromone[pos][v[i][j]][v[i][j]] += sens;
+			set_limits(pheromone[pos][v[i][j]][v[i][j]]);
+		}
+	}
+
+	void evaporate()
+	{
+		for (int pos = 0; pos < OPT_W; ++pos)
+			for (int i = 0; i < 20; ++i)
+				for (int j = 0; j < 20; ++j) {
+					pheromone[pos][i][j] *= config.rho;
+					set_limits(pheromone[pos][i][j]);
+				}
+	}
+
+	void update(const Trail &t, double sens)
+	{		
+		for (int pos = 0; pos < OPT_W; ++pos)
+			update(t[pos], pos, sens);
+	}
+
+	void update_all()
+	{
+		for (size_t i = 0; i < config.n_ants; ++i)
+			update(ants[i], sens[i]);
+	}
+
+	double pheromone[OPT_W][20][20];
+};
+
+Trails trails;
+
+void get_sens_worker(vector<char>::const_iterator query, vector<char>::const_iterator query_end, vector<char>::const_iterator subject, vector<double> *sens)
+{
+	vector<bool> hit;
+
+	for (; query < query_end; query += 70, subject += 70) {
+		hit.insert(hit.begin(), config.n_ants, false);
+		for (size_t j = 0; j <= 70 - shapes[0].length_; ++j) {
+			for (size_t k = 0; k < config.n_ants; ++k)
+				if (shapes[1].hit(&query[j], &subject[j], ants[k]) && !hit[k]) {
+					++((*sens)[k]);
+					hit[k] = true;
+				}
+		}
+		hit.clear();
+	}
+}
+
+void get_sens(const vector<char> &query, const vector<char> &subject)
+{
+	const size_t n_seqs = query.size() / 70;
+	partition<size_t> p(n_seqs, config.threads_);
+	vector<vector<double> > v(config.threads_);
+	Thread_pool threads;
+	for (unsigned i = 0; i < config.threads_; ++i) {
+		v[i].resize(config.n_ants);
+		threads.push_back(launch_thread(get_sens_worker, query.begin() + p.getMin(i) * 70, query.begin() + p.getMax(i) * 70, subject.begin() + p.getMin(i) * 70, &v[i]));
+	}
+	threads.join_all();
+	memset(sens, 0, sizeof(sens));
+	for (unsigned i = 0; i < config.threads_; ++i)
+		for (unsigned j = 0; j < config.n_ants; ++j)
+			sens[j] += v[i][j];
+
+	for (size_t k = 0; k < config.n_ants; ++k)
+		sens[k] /= n_seqs;
+}
+
+bool hit(const sequence &seq, const vector<Letter> &v, const Trail &t)
+{
+	for (size_t j = 0; j <= 70 - shapes[0].length_; ++j)
+		if (shapes[0].hit(&seq[j], &v[j], t))
+			return true;
+	return false;
+}
+
+void get_related_seq(const sequence &seq, vector<Letter> &out, double id, size_t region, const Trail &previous)
+{
+	vector<Letter> r(region);
+	out.clear();
+	for (size_t i = 0; i < seq.length(); i += region) {
+		const sequence query(&seq[i], region);
+		do {
+			get_related_seq(query, r, id);
+		} while (hit(query, r, previous));
+		out.insert(out.end(), r.begin(), r.end());
+	}
+}
+
+void opt()
+{
+	static const size_t region = 70;
+	static const size_t count = (size_t)1e6;
+	static const double id = 0.25;
+	
+	srand((unsigned)time(0));
+
+	Trail previous;
+	previous[0] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	previous[1] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	previous[2] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	previous[3] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	previous[4] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	previous[5] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	previous[6] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	
+	task_timer timer("Init");
+	vector<char> query(count*region);
+	get_random_seq(query);
+
+	vector<char> subject(count*region);
+	get_related_seq(sequence(query), subject, id, region, previous);
+
+	timer.go("Calculating sensitivity");
+	for (int pos = 0; pos < OPT_W; ++pos)
+		ants[0][pos] = Letter_trail(Reduction("A KR EDNQ C G H ILVM FYW P ST"));
+	get_sens(query, subject);
+	timer.finish();
+	
+	double p_bg = background_p(ants[0]);
+	cout << "Sensitivity = " << sens[0] << endl;
+	cout << "P(background) = " << p_bg << endl;	
+	double global_best = 0;
+
+	while (true) {
+		timer.go("Setting ants");
+		for (size_t i = 0; i < config.n_ants; ++i)
+			trails.get(ants[i]);
+		
+		timer.go("Getting sensitivity");
+		get_sens(query, subject);
+		
+		double max_sens_eff = 0;
+		size_t max_ant;
+		for (size_t i = 0; i < config.n_ants; ++i) {
+			const double e = sens[i] * std::min(p_bg / background_p(ants[i]), 1.0);
+			sens[i] = e;
+			if (e > max_sens_eff) {
+				max_sens_eff = e;
+				max_ant = i;
+			}
+		}
+		timer.finish();
+		global_best = std::max(global_best, max_sens_eff);
+		tau_max = 1 / (1 - config.rho)*global_best;
+		tau_min = tau_max*(1 - pow(config.p_best, 0.05)) / 9 / pow(config.p_best, 0.05);
+		cout << "Effective sensitivity = " << max_sens_eff << ", global = " << global_best << endl;
+		cout << "Sensitivity = " << sens[max_ant] << endl;
+		cout << "P(background) = " << background_p(ants[max_ant]) << endl;
+		cout << "tau_max = " << tau_max << " tau_min = " << tau_min << endl;
+		for (int pos = 0; pos < OPT_W; ++pos)
+			cout << ants[max_ant][pos] << endl;
+		cout << endl;
+
+		trails.evaporate();
+		trails.update(ants[max_ant], max_sens_eff);
+		//trails.update_all();
+	}
+
+}
\ No newline at end of file
diff --git a/src/output/daa_file.h b/src/output/daa_file.h
index 9333582..41c683e 100644
--- a/src/output/daa_file.h
+++ b/src/output/daa_file.h
@@ -196,6 +196,7 @@ struct DAA_file
 		f_.read(&size, 1);
 		if(size == 0)
 			return false;
+		buf.clear();
 		buf.resize(size);
 		f_.read(buf.data(), size);
 		query_num = query_count_++;
diff --git a/src/output/join_blocks.cpp b/src/output/join_blocks.cpp
index b3c593c..31a4387 100644
--- a/src/output/join_blocks.cpp
+++ b/src/output/join_blocks.cpp
@@ -22,8 +22,6 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include "../output/daa_write.h"
 #include "output_format.h"
 
-#ifndef ST_JOIN
-
 struct Join_fetcher
 {
 	static void init(const vector<Temp_file> &tmp_file)
@@ -50,6 +48,7 @@ struct Join_fetcher
 	{
 		unsigned size;
 		files[b].read(&size, 1);
+		buf[b].clear();
 		buf[b].resize(size);
 		files[b].read(buf[b].data(), size);
 		files[b].read(&query_ids[b], 1);
@@ -63,7 +62,7 @@ struct Join_fetcher
 		unaligned_from = query_last + 1;
 		query_last = query_id;
 		for (unsigned i = 0; i < buf.size(); ++i)
-			if (query_ids[i] == query_id)
+			if (query_ids[i] == query_id && query_id != Intermediate_record::finished)
 				fetch(i);
 			else
 				buf[i].clear();
@@ -108,7 +107,7 @@ struct Join_record
 		same_subject_ = info_.subject_id == subject;
 	}
 
-	static bool push_next(unsigned block, unsigned subject, Binary_buffer::Iterator &it, vector<Join_record> &v)	
+	static bool push_next(unsigned block, unsigned subject, Binary_buffer::Iterator &it, vector<Join_record> &v)
 	{
 		if (it.good()) {
 			v.push_back(Join_record(block, subject, it));
@@ -132,7 +131,7 @@ void join_query(vector<Binary_buffer> &buf, Text_buffer &out, Statistics &statis
 
 	vector<Join_record> records;
 	vector<Binary_buffer::Iterator> it;
-	for (unsigned i = 0; i<current_ref_block; ++i) {
+	for (unsigned i = 0; i < current_ref_block; ++i) {
 		it.push_back(buf[i].begin());
 		Join_record::push_next(i, std::numeric_limits<unsigned>::max(), it.back(), records);
 	}
@@ -155,10 +154,10 @@ void join_query(vector<Binary_buffer> &buf, Text_buffer &out, Statistics &statis
 				Hsp_data hsp(next.info_, query_source_len);
 				output_format->print_match(Hsp_context(hsp,
 					query,
-					context[hsp.frame],
+					context[align_mode.check_context(hsp.frame)],
 					align_mode.query_translated ? query_source_seqs::get()[query] : context[0],
 					query_name,
-					next.info_.subject_id,
+					ref_map.check_id(next.info_.subject_id),
 					ref_map.original_id(next.info_.subject_id),
 					ref_map.name(next.info_.subject_id),
 					ref_map.length(next.info_.subject_id),
@@ -194,11 +193,13 @@ void join_worker(Task_queue<Text_buffer,Join_writer> *queue)
 	size_t n;
 	Text_buffer *out;
 	Statistics stat;
+	const String_set<0>& qids = query_ids::get();
 
-	while (queue->get(n, out, fetcher)) {
+	while (queue->get(n, out, fetcher) && fetcher.query_id != Intermediate_record::finished) {
 		stat.inc(Statistics::ALIGNED);
 		size_t seek_pos;
-		const char * query_name = query_ids::get()[fetcher.query_id].c_str();
+
+		const char * query_name = qids[qids.check_idx(fetcher.query_id)].c_str();
 		const sequence query_seq = align_mode.query_translated ? query_source_seqs::get()[fetcher.query_id] : query_seqs::get()[fetcher.query_id];
 
 		if (*output_format != Output_format::daa && config.report_unaligned != 0) {
@@ -231,7 +232,7 @@ void join_blocks(unsigned ref_blocks, Output_stream &master_out, const vector<Te
 	ref_map.init_rev_map();
 	Join_fetcher::init(tmp_file);
 	Join_writer writer(master_out);
-	Task_queue<Text_buffer, Join_writer> queue(3*config.threads_, writer);
+	Task_queue<Text_buffer, Join_writer> queue(3 * config.threads_, writer);
 	Thread_pool threads;
 	for (unsigned i = 0; i < config.threads_; ++i)
 		threads.push_back(launch_thread(join_worker, &queue));
@@ -245,6 +246,4 @@ void join_blocks(unsigned ref_blocks, Output_stream &master_out, const vector<Te
 		}
 		writer(out);
 	}
-}
-
-#endif
\ No newline at end of file
+}
\ No newline at end of file
diff --git a/src/output/output.h b/src/output/output.h
index 62b5afb..da1eb83 100644
--- a/src/output/output.h
+++ b/src/output/output.h
@@ -80,25 +80,16 @@ struct Intermediate_record
 	}
 	static size_t write_query_intro(Text_buffer &buf, unsigned query_id)
 	{
-#ifndef ST_JOIN
 		size_t seek_pos = buf.size();
 		buf.write(query_id).write(0u);
 		return seek_pos;
-#else
-		return 0;
-#endif
 	}
 	static void finish_query(Text_buffer &buf, size_t seek_pos)
 	{
-#ifndef ST_JOIN
-		*(unsigned*)(&buf[seek_pos + sizeof(unsigned)]) = (unsigned)(buf.size() - seek_pos - sizeof(unsigned) * 2);
-#endif
+		*(unsigned*)(&buf[seek_pos + sizeof(unsigned)]) = safe_cast<unsigned>(buf.size() - seek_pos - sizeof(unsigned) * 2);
 	}
 	static void write(Text_buffer &buf, const Hsp_data &match, unsigned query_id, unsigned subject_id)
 	{
-#ifdef ST_JOIN
-		buf.write(query_id);
-#endif
 		buf.write(ref_map.get(current_ref_block, subject_id))
 			.write(get_segment_flag(match))
 			.write_packed(match.score)
diff --git a/src/run/benchmark.cpp b/src/run/benchmark.cpp
index aef448c..7c057f9 100644
--- a/src/run/benchmark.cpp
+++ b/src/run/benchmark.cpp
@@ -57,12 +57,12 @@ int xdrop_ungapped2(const Letter *query, const Letter *subject)
 		&& *s != '\xff')
 	{
 		st += score_matrix(*q, *s);
-		//score = std::max(score, st);
+		score = std::max(score, st);
 		++q;
 		++s;
 
 		st += score_matrix(*q, *s);
-		//score = std::max(score, st);
+		score = std::max(score, st);
 		++q;
 		++s;
 
@@ -74,25 +74,66 @@ int xdrop_ungapped2(const Letter *query, const Letter *subject)
 	return score;
 }
 
+int xdrop_window(const Letter *query, const Letter *subject)
+{
+	static const int window = 40;
+	int score(0), st(0), n = 0;
+
+	const Letter *q(query), *s(subject);
+
+	st = score;
+	while (n < window
+		&& *q != '\xff'
+		&& *s != '\xff')
+	{
+		st += score_matrix(*q, *s);
+		//score = std::max(score, st);
+		++q;
+		++s;
+		++n;
+
+		st += score_matrix(*q, *s);
+		//score = std::max(score, st);
+		++q;
+		++s;
+		++n;
+
+		st += score_matrix(*q, *s);
+		//score = std::max(score, st);
+		++q;
+		++s;
+		++n;
+	}
+	return st;
+}
+
 void benchmark_ungapped(const Sequence_set &ss, unsigned qa, unsigned sa)
 {
-	static const unsigned n = 10000000;
+	static const size_t n = 100000000llu;
 	Timer t;
 	t.start();
 
 	const Letter *q = &ss[0][qa], *s = &ss[1][sa];
 	int score=0;
 
-	for (unsigned i = 0; i < n; ++i) {
+	uint64_t mask = 0;
+	for (unsigned i = 0; i < 64; ++i) {
+		if (q[i] == '\xff' || s[i] == '\xff')
+			break;
+		if (q[i] == s[i])
+			mask |= 1llu << i;
+	}
+
+	for (size_t i = 0; i < n; ++i) {
 
-		score += xdrop_ungapped2(q, s);
+		//score += xdrop_window(q, s);
+		//score += binary_ungapped(mask);
 
 	}
 	t.stop();
 
 	cout << score << endl;
-	cout << " n/sec=" << (double)n / t.getElapsedTimeInSec() << endl;
-	cout << "t=" << t.getElapsedTimeInMicroSec() << endl;
+	cout << "t=" << t.getElapsedTimeInMicroSec() * 1000 / n << " ns" << endl;
 }
 
 void benchmark_greedy(const Sequence_set &ss, unsigned qa, unsigned sa)
@@ -182,6 +223,7 @@ Query  80  TVIGHL  85
            T+I  L
 Sbjct  76  TIINGL  81	*/
 
+aln1:
 
 	s1 = sequence::from_string("SLFEQLGGQAAVQAVTAQFYANIQADATVATFFNGIDMPNQTNKTAAFLCAALGGPNAWTGRNLKEVHANMGVSNAQFTTVIGHLRSALTGAGVAAALVEQTVAVAETVRGDVVTV");
 	s2 = sequence::from_string("RKQRIVIKISGACLKQNDSSIIDFIKINDLAEQIEKISKKYIVSIVLGGGNIWRGSIAKELDMDRNLADNMGMMATIINGLALENALNHLNVNTIVLSAIKCDKLVHESSANNIKKAIEKEQVMIFVAGTGFPYFTTDSCAAIRAAETESSIILMGKNGVDGVYDSDPKINPNAQFYEHITFNMALTQNLKVMDATALALCQENNINLLVFNIDKPNAIVDVLEKKNKYTIVSK");
@@ -283,8 +325,6 @@ KCLEHLFFFKLIGDTPIDTFLMEMLEAPHQIT");
 	*/
 
 
-
-aln1:
 	s1 = sequence::from_string("tspmtpditgkpfvaadasndyikrevmipmrdgvklhtvivlpkgaknapivltrtpyd\
 asgrterlasphmkdllsagddvfveggyirvfqdvrgkygsegdyvmtrplrgplnpse\
 vdhatdawdtidwlvknvsesngkvgmigssyegftvvmaltnphpalkvavpespmidg\
@@ -313,8 +353,8 @@ VIAREQLEEMCTAVNRIHRGPEHPSHIVLPIIKR");
 	ss.finish_reserve();
 
 	//benchmark_floating(ss, qa, sa);
-	benchmark_greedy(ss, qa, sa);
+	//benchmark_greedy(ss, qa, sa);
 	//benchmark_cmp();
-	//benchmark_ungapped(ss, qa, sa);
+	benchmark_ungapped(ss, qa, sa);
 
 }
\ No newline at end of file
diff --git a/src/run/double_indexed.cpp b/src/run/double_indexed.cpp
index 4349b0f..b01cd8a 100644
--- a/src/run/double_indexed.cpp
+++ b/src/run/double_indexed.cpp
@@ -111,7 +111,7 @@ void run_ref_chunk(Database_file &db_file,
 	const pair<size_t, size_t> len_bounds = ref_seqs::data_->len_bounds(shapes[0].length_);
 	ref_hst = Partitioned_histogram(*ref_seqs::data_, (unsigned)len_bounds.second);
 
-	ref_map.init((unsigned)ref_seqs::get().get_length());
+	ref_map.init(safe_cast<unsigned>(ref_seqs::get().get_length()));
 
 	timer.go("Allocating buffers");
 	char *ref_buffer = sorted_list::alloc_buffer(ref_hst);
@@ -120,16 +120,13 @@ void run_ref_chunk(Database_file &db_file,
 	timer_mapping.resume();
 	Trace_pt_buffer::instance = new Trace_pt_buffer(query_seqs::data_->get_length() / align_mode.query_contexts,
 		config.tmpdir,
-		config.mem_buffered());
+		config.query_bins);
 	timer.finish();
 	timer_mapping.stop();
 
-	for (unsigned i = 0; i<shapes.count(); ++i)
+	for (unsigned i = 0; i < shapes.count(); ++i)
 		process_shape(i, timer_mapping, query_chunk, query_buffer, ref_buffer);
 
-	/*timer.go("Closing temporary storage");
-	Trace_pt_buffer::instance->close();*/
-
 	timer.go("Deallocating buffers");
 	delete[] ref_buffer;
 
@@ -301,5 +298,7 @@ void master_thread_di()
 	verbose_stream << "Block size = " << (size_t)(config.chunk_size * 1e9) << endl;
 	Config::set_option(config.db_size, (uint64_t)ref_header.letters);
 
+	set_max_open_files(config.query_bins * config.threads_ + unsigned(ref_header.letters / (size_t)(config.chunk_size * 1e9)) + 16);
+
 	master_thread(db_file, timer_mapping, timer2);
 }
\ No newline at end of file
diff --git a/src/run/main.cpp b/src/run/main.cpp
index 5b8df8e..af4be63 100644
--- a/src/run/main.cpp
+++ b/src/run/main.cpp
@@ -29,6 +29,7 @@ using std::endl;
 void run_mapper();
 void master_thread_di();
 void model_seqs();
+void opt();
 
 int main(int ac, const char* av[])
 {
@@ -86,6 +87,9 @@ int main(int ac, const char* av[])
 		case Config::model_seqs:
 			model_seqs();
 			break;
+		case Config::opt:
+			opt();
+			break;
 		default:
 			return 1;
 		}
diff --git a/src/search/align_range.h b/src/search/align_range.h
index 1868ba8..efd76b9 100644
--- a/src/search/align_range.h
+++ b/src/search/align_range.h
@@ -52,6 +52,72 @@ inline int stage2_ungapped(const Letter *query, const Letter *subject, unsigned
 	return xdrop_ungapped(query, subject, shapes[sid].length_, delta, len);
 }
 
+
+inline unsigned popcount32(unsigned x)
+{
+#ifdef _MSC_VER
+	return __popcnt(x);
+#else
+	return __builtin_popcount(x);
+#endif
+}
+
+inline unsigned popcount64(unsigned long long x)
+{
+#ifdef _MSC_VER
+	return (unsigned)__popcnt64(x);
+#else
+	return __builtin_popcountll(x);
+#endif
+}
+
+#ifdef __SSE2__
+
+struct Byte_finger_print_48
+{
+	Byte_finger_print_48(const Letter *q) :
+		r1(_mm_loadu_si128((__m128i const*)(q - 16))),
+		r2(_mm_loadu_si128((__m128i const*)(q))),
+		r3(_mm_loadu_si128((__m128i const*)(q + 16)))
+	{}
+	static uint64_t match_block(__m128i x, __m128i y)
+	{
+		return (uint64_t)_mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
+	}
+	unsigned match(const Byte_finger_print_48 &rhs) const
+	{
+		return popcount64(match_block(r3, rhs.r3) << 32 | match_block(r1, rhs.r1) << 16 | match_block(r2, rhs.r2));
+	}
+	__m128i r1, r2, r3;
+};
+
+#else
+
+struct Byte_finger_print_48
+{
+	Byte_finger_print_48()
+	{}
+	Byte_finger_print_48(const Letter *q)
+	{
+		//printf("%llx\n", q);
+		memcpy(r, q - 16, 48);
+	}
+	unsigned match(const Byte_finger_print_48 &rhs) const
+	{
+		unsigned n = 0;
+		for (unsigned i = 0; i < 48; ++i)
+			if (r[i] == rhs.r[i])
+				++n;
+		return n;
+	}
+	Letter r[48];
+	//char r[32];
+};
+
+#endif
+
+typedef Byte_finger_print_48 Finger_print;
+
 struct Range_ref
 {
 	Range_ref(vector<Finger_print>::const_iterator q_begin, vector<Finger_print>::const_iterator s_begin) :
diff --git a/src/search/search.cpp b/src/search/search.cpp
index 3b947e4..5506ab0 100644
--- a/src/search/search.cpp
+++ b/src/search/search.cpp
@@ -21,9 +21,6 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include "sse_dist.h"
 
 Trace_pt_buffer* Trace_pt_buffer::instance;
-#ifdef __SSE2__
-TLS_PTR vector<sequence>* hit_filter::subjects_ptr;
-#endif
 
 TLS_PTR vector<Finger_print> *Seed_filter::vq_ptr, *Seed_filter::vs_ptr;
 TLS_PTR vector<Stage1_hit> *Seed_filter::hits_ptr;
diff --git a/src/search/setup.cpp b/src/search/setup.cpp
index 796122d..9381940 100644
--- a/src/search/setup.cpp
+++ b/src/search/setup.cpp
@@ -29,7 +29,7 @@ void setup_search_params(pair<size_t, size_t> query_len_bounds, size_t chunk_db_
 		Config::set_option(config.index_mode, 4u);
 		::shapes = shape_config(config.index_mode, config.shapes, config.shape_mask);
 		config.seed_anchor = std::min(::shapes[0].length_ - 1, 8u);
-		Config::set_option(config.min_identities, 6u);
+		Config::set_option(config.min_identities, 9u);
 		Config::set_option(config.min_ungapped_score, 19.0);
 		Config::set_option(config.window, 60u);
 		Config::set_option(config.hit_band, 8);
@@ -37,12 +37,13 @@ void setup_search_params(pair<size_t, size_t> query_len_bounds, size_t chunk_db_
 	}
 	else {
 
+		Config::set_option(config.min_identities, 11u);
 		if (query_len_bounds.second <= 40) {
-			Config::set_option(config.min_identities, 10u);
+			//Config::set_option(config.min_identities, 10u);
 			Config::set_option(config.min_ungapped_score, std::min(27.0, b));
 		}
 		else {
-			Config::set_option(config.min_identities, 9u);
+			//Config::set_option(config.min_identities, 9u);
 			Config::set_option(config.min_ungapped_score, std::min(23.0, b));
 		}
 
diff --git a/src/search/sse_dist.h b/src/search/sse_dist.h
index 3ae80b6..8e351f5 100644
--- a/src/search/sse_dist.h
+++ b/src/search/sse_dist.h
@@ -23,139 +23,6 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include "../basic/value.h"
 #include "../util/simd.h"
 
-inline unsigned popcount32(unsigned x)
-{
-#ifdef _MSC_VER
-	return __popcnt(x);
-	//return (unsigned)popcount_3(x);
-#else
-	return __builtin_popcount(x);
-#endif
-}
-
-inline unsigned popcount64(unsigned long long x)
-{
-#ifdef _MSC_VER
-	return (unsigned)__popcnt64(x);
-#else
-	return __builtin_popcountll(x);
-#endif
-}
-
-struct Masked {};
-
-struct Int_finger_print
-{
-	Int_finger_print(const Letter *q) :
-		r1(*(uint64_t*)(q-8)),
-		r2(*(uint64_t*)(q)),
-		r3(*(uint64_t*)(q + 8)),
-		r4(*(uint64_t*)(q +16))
-	{ }
-	Int_finger_print(const Letter *q, Masked) :
-		r1(*(uint64_t*)(q - 8)),
-		r2(*(uint64_t*)(q)),
-		r3(*(uint64_t*)(q + 8)),
-		r4(*(uint64_t*)(q + 16))
-	{ }
-	unsigned match(const Int_finger_print &rhs) const
-	{
-		return popcount64(r1 & rhs.r1) + popcount64(r2 & rhs.r2) + popcount64(r3 & rhs.r3) + popcount64(r4 & rhs.r4);
-	}
-	uint64_t r1, r2,r3,r4;
-};
-
-#ifdef __SSE2__
-
-struct Byte_finger_print
-{
-	Byte_finger_print(const Letter *q) :
-		r1(_mm_loadu_si128((__m128i const*)(q - 8))),
-		r2(_mm_loadu_si128((__m128i const*)(q + 8)))
-	{ }
-	Byte_finger_print(const Letter *q, Masked):
-		r1 (_mm_and_si128(_mm_loadu_si128((__m128i const*)(q - 8)), _mm_set1_epi8('\x7f'))),
-		r2 (_mm_and_si128(_mm_loadu_si128((__m128i const*)(q + 8)), _mm_set1_epi8('\x7f')))
-	{ }
-	static unsigned match_block(__m128i x, __m128i y)
-	{
-		return _mm_movemask_epi8(_mm_cmpeq_epi8(x, y));
-	}
-	unsigned match(const Byte_finger_print &rhs) const
-	{
-		/*for (unsigned i = 0; i < 16; ++i)
-			cout << value_traits.alphabet[r1.m128i_u8[i]];
-		for (unsigned i = 0; i < 16; ++i)
-			cout << value_traits.alphabet[r2.m128i_u8[i]];
-		cout << endl;
-		for (unsigned i = 0; i < 16; ++i)
-			cout << value_traits.alphabet[rhs.r1.m128i_u8[i]];
-		for (unsigned i = 0; i < 16; ++i)
-			cout << value_traits.alphabet[rhs.r2.m128i_u8[i]];
-		cout << endl;*/
-		return popcount32(match_block(r1, rhs.r1) << 16 | match_block(r2, rhs.r2));
-	}
-	__m128i r1, r2;
-};
-
-#else
-
-struct Byte_finger_print
-{
-	Byte_finger_print()
-	{}
-	Byte_finger_print(const Letter *q)
-	{
-		//printf("%llx\n", q);
-		memcpy(r, q - 8, 32);
-	}
-	unsigned match(const Byte_finger_print &rhs) const
-	{
-		unsigned n = 0;
-		for (unsigned i = 0; i < 32; ++i)
-			if (r[i] == rhs.r[i])
-				++n;
-		return n;
-	}
-	Letter r[32];
-	//char r[32];
-};
-
-#endif
-
-struct Halfbyte_finger_print_naive
-{
-	Halfbyte_finger_print_naive(const Letter *q) :
-		r1(reduce(q-8)),
-		r2(reduce(q+8))
-	{ }
-	static unsigned match_block(uint64_t x, uint64_t y)
-	{
-		uint64_t v = ~(x ^ y);
-		v &= v >> 1;
-		v &= 0x5555555555555555LL;
-		v &= v >> 2;
-		v &= 0x1111111111111111LL;
-		return (unsigned)popcount64(v);
-	}
-	unsigned match(const Halfbyte_finger_print_naive &rhs) const
-	{
-		return match_block(r1, rhs.r1) + match_block(r2, rhs.r2);
-	}
-	static uint64_t reduce(const Letter *q)
-	{
-		uint64_t x;
-		for (unsigned i = 0; i < 16; ++i)
-			x = (x << 4) | reduction(q[i]);
-		return x;
-	}
-	static const Reduction reduction;
-	uint64_t r1, r2;
-};
-
-
-typedef Byte_finger_print Finger_print;
-
 #ifdef __SSSE3__
 inline __m128i reduce_seq_ssse3(const __m128i &seq)
 {
diff --git a/src/search/stage2.cpp b/src/search/stage2.cpp
index 99cbcbd..338742d 100644
--- a/src/search/stage2.cpp
+++ b/src/search/stage2.cpp
@@ -25,6 +25,39 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include "collision.h"
 
 #ifdef __SSE2__
+TLS_PTR vector<sequence>* hit_filter::subjects_ptr;
+#endif
+
+unsigned count_id(const Letter *query, const Letter *subject)
+{
+	static const Reduction reduction("KR E D Q N C G H LM FY VI W P S T A"); // murphy.10
+	unsigned n = 0;
+	for (int i = -1; i >= -((int)config.id_left) && query[i] != '\xff' && subject[i] != '\xff'; --i)
+		if (reduction(query[i]) == reduction(subject[i]))
+			++n;
+	for (unsigned i = 0; i < config.id_right && query[i] != '\xff' && subject[i] != '\xff'; ++i)
+		if (reduction(query[i]) == reduction(subject[i]))
+			++n;
+	return n;
+}
+
+int extend_binary(const Letter *query, const Letter *subject)
+{
+	int s = 0, t = 0;
+	for (int i = -1; i >= -60 && query[i] != '\xff' && subject[i] != '\xff'; --i) {
+		t += query[i] == subject[i] ? config.bmatch : -config.bmismatch;
+		s = std::max(s, t);
+	}
+	int sr = 0;
+	t = 0;
+	for (unsigned i = 0; i < 60 && query[i] != '\xff' && subject[i] != '\xff'; ++i) {
+		t += query[i] == subject[i] ? config.bmatch : -config.bmismatch;
+		sr = std::max(sr, t);
+	}
+	return s + sr;
+}
+
+#ifdef __SSE2__
 
 void search_query_offset(Loc q,
 	const sorted_list::const_iterator &s,
@@ -40,8 +73,14 @@ void search_query_offset(Loc q,
 	for (vector<Stage1_hit>::const_iterator i = hits; i < hits_end; ++i) {
 		const Loc s_pos = s[i->s];
 		const Letter* subject = ref_seqs::data_->data(s_pos);
-		/*cout << sequence(query, 32) << endl;
-		cout << sequence(subject, 32) << endl;*/
+
+		/*if (count_id(query, subject) < config.id_n)
+			continue;*/
+
+		/*if (extend_binary(query, subject) < config.bcutoff)
+			continue;*/
+
+		stats.inc(Statistics::TENTATIVE_MATCHESX);
 
 		unsigned delta, len;
 		int score;
diff --git a/src/search/trace_pt_buffer.h b/src/search/trace_pt_buffer.h
index 3908f07..a73b527 100644
--- a/src/search/trace_pt_buffer.h
+++ b/src/search/trace_pt_buffer.h
@@ -97,10 +97,9 @@ struct hit
 
 struct Trace_pt_buffer : public Async_buffer<hit>
 {
-	Trace_pt_buffer(size_t input_size, const string &tmpdir, bool mem_buffered):
-		Async_buffer<hit> (input_size, tmpdir, mem_buffered ? mem_bins : file_bins)
-	{ }
-	enum { mem_bins = 1, file_bins = 4 };
+	Trace_pt_buffer(size_t input_size, const string &tmpdir, unsigned query_bins):
+		Async_buffer<hit>(input_size, tmpdir, query_bins)
+	{}
 	static Trace_pt_buffer *instance;
 };
 
diff --git a/src/util/async_buffer.h b/src/util/async_buffer.h
index afc0956..38a97e4 100644
--- a/src/util/async_buffer.h
+++ b/src/util/async_buffer.h
@@ -1,5 +1,5 @@
 /****
-Copyright (c) 2014, University of Tuebingen
+Copyright (c) 2013-2017, Benjamin Buchfink, University of Tuebingen
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
@@ -14,8 +14,6 @@ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-****
-Author: Benjamin Buchfink
 ****/
 
 #ifndef ASYNC_BUFFER_H_
@@ -30,61 +28,57 @@ using std::vector;
 using std::string;
 using std::endl;
 
-const unsigned async_buffer_max_bins = 4;
-
 template<typename _t>
 struct Async_buffer
 {
 
 	typedef vector<_t> Vector;
 
-	Async_buffer(size_t input_count, const string &tmpdir, unsigned bins):
-		bins_ (bins),
-		bin_size_ ((input_count + bins_ - 1) / bins_)
+	Async_buffer(size_t input_count, const string &tmpdir, unsigned bins) :
+		bins_(bins),
+		bin_size_((input_count + bins_ - 1) / bins_)
 	{
 		log_stream << "Async_buffer() " << input_count << ',' << bin_size_ << endl;
-		for(unsigned j=0;j<config.threads_;++j)
-			for(unsigned i=0;i<bins;++i) {
-				tmp_file_.push_back(Temp_file ());
+		for (unsigned j = 0; j < config.threads_; ++j)
+			for (unsigned i = 0; i < bins; ++i) {
+				tmp_file_.push_back(Temp_file());
 				size_.push_back(0);
 			}
 	}
 
 	struct Iterator
 	{
-		Iterator(Async_buffer &parent, unsigned thread_num):
-			parent_ (parent),
-			thread_num_ (thread_num)
+		Iterator(Async_buffer &parent, unsigned thread_num) :
+			buffer_(parent.bins()),
+			parent_(parent),
+			thread_num_(thread_num)
 		{
-			for(unsigned i=0;i<parent.bins_;++i) {
-				size_[i] = 0;
-				out_[i] = parent.get_out(thread_num_, i);
-			}
+			for (unsigned i = 0; i < parent.bins_; ++i)
+				out_.push_back(parent.get_out(thread_num_, i));
 		}
 		void push(const _t &x)
 		{
 			const unsigned bin = (unsigned)(x / parent_.bin_size_);
 			assert(bin < parent_.bins());
-			buffer_[bin*buffer_size+(size_[bin]++)] = x;
-			if(size_[bin] == buffer_size)
+			buffer_[bin].push_back(x);
+			if (buffer_[bin].size() == buffer_size)
 				flush(bin);
 		}
 		void flush(unsigned bin)
 		{
-			out_[bin]->typed_write(&buffer_[bin*buffer_size], size_[bin]);
-			parent_.add_size(thread_num_, bin, size_[bin]);
-			size_[bin] = 0;
+			out_[bin]->typed_write(buffer_[bin].data(), buffer_[bin].size());
+			parent_.add_size(thread_num_, bin, buffer_[bin].size());
+			buffer_[bin].clear();
 		}
 		~Iterator()
 		{
-			for(unsigned bin=0;bin<parent_.bins_;++bin)
+			for (unsigned bin = 0; bin < parent_.bins_; ++bin)
 				flush(bin);
 		}
 	private:
 		enum { buffer_size = 65536 };
-		_t buffer_[async_buffer_max_bins*buffer_size];
-		size_t size_[async_buffer_max_bins];
-		Temp_file* out_[async_buffer_max_bins];
+		vector<vector<_t> > buffer_;
+		vector<Temp_file*> out_;
 		Async_buffer &parent_;
 		const unsigned thread_num_;
 	};
@@ -97,13 +91,13 @@ struct Async_buffer
 		size_t size = 0;
 		for (unsigned i = 0; i < config.threads_; ++i)
 			size += size_[i*bins_ + bin];
-		log_stream << "Async_buffer.load() " << size << "(" << (double)size*sizeof(_t)/(1<<30) << " GB)" << endl;
+		log_stream << "Async_buffer.load() " << size << "(" << (double)size*sizeof(_t) / (1 << 30) << " GB)" << endl;
 		total_size += size;
 		data.resize(size);
 		_t* ptr = data.data();
-		for(unsigned i=0;i<config.threads_;++i) {
-			Input_stream f (tmp_file_[i*bins_+bin]);
-			const size_t s = size_[i*bins_+bin];
+		for (unsigned i = 0; i < config.threads_; ++i) {
+			Input_stream f(tmp_file_[i*bins_ + bin]);
+			const size_t s = size_[i*bins_ + bin];
 			const size_t n = f.read(ptr, s);
 			ptr += s;
 			f.close_and_delete();
@@ -114,15 +108,21 @@ struct Async_buffer
 	}
 
 	unsigned bins() const
-	{ return bins_; }
+	{
+		return bins_;
+	}
 
 private:
 
 	Temp_file* get_out(unsigned threadid, unsigned bin)
-	{ return &tmp_file_[threadid*bins_+bin]; }
+	{
+		return &tmp_file_[threadid*bins_ + bin];
+	}
 
 	void add_size(unsigned thread_id, unsigned bin, size_t n)
-	{ size_[thread_id*bins_+bin] += n; }
+	{
+		size_[thread_id*bins_ + bin] += n;
+	}
 
 	const unsigned bins_;
 	const size_t bin_size_;
diff --git a/src/util/binary_file.h b/src/util/binary_file.h
index 4769f27..16e9ada 100644
--- a/src/util/binary_file.h
+++ b/src/util/binary_file.h
@@ -191,7 +191,7 @@ struct Input_stream
 	template<class _t>
 	size_t read(_t *ptr, size_t count)
 	{
-		return read_bytes((char*)ptr, count*sizeof(_t))/sizeof(_t);
+		return read_bytes((char*)ptr, count*sizeof(_t)) / sizeof(_t);
 	}
 
 	template<class _t>
@@ -200,6 +200,7 @@ struct Input_stream
 		size_t size;
 		if (read(&size, 1) != 1)
 			throw File_read_exception(file_name);
+		v.clear();
 		v.resize(size);
 		if (read(v.data(), size) != size)
 			throw File_read_exception(file_name);
diff --git a/src/util/double_buffer.h b/src/util/double_buffer.h
index 21009df..4090a8d 100644
--- a/src/util/double_buffer.h
+++ b/src/util/double_buffer.h
@@ -33,6 +33,7 @@ struct Double_buffer
 	inline void init(size_t size, size_t padding, size_t padding_front, _t init)
 	{
 		const size_t total = size + padding + padding_front;
+		data_.clear();
 		data_.resize(total*2);
 		ptr1 = &data_[padding_front];
 		ptr2 = &data_[total+padding_front];
diff --git a/src/util/log_stream.h b/src/util/log_stream.h
index 9675726..22525dc 100644
--- a/src/util/log_stream.h
+++ b/src/util/log_stream.h
@@ -22,20 +22,28 @@ Author: Benjamin Buchfink
 #define LOG_STREAM_H_
 
 #include <iostream>
+#include <fstream>
 #include "Timer.h"
+#include "tinythread.h"
 
 using std::endl;
 
 struct Message_stream
 {
-	Message_stream(bool to_cout=true):
-		to_cout_ (to_cout)
+	Message_stream(bool to_cout = true, bool to_file = false) :
+		to_cout_(to_cout),
+		to_file_(to_file)
 	{}
 	template<typename _t>
 	Message_stream& operator<<(const _t& x)
 	{
 		if(to_cout_)
 			std::cout << x;
+		if (to_file_) {
+			std::ofstream f("diamond.log", std::ios_base::out | std::ios_base::app);
+			f << x;
+			f.close();
+		}
 		return *this;
 	}
 	//Message_stream& operator<<(std::ostream & (__cdecl *_Pfn)(std::ostream&))
@@ -43,10 +51,16 @@ struct Message_stream
 	{
 		if(to_cout_)
 			((*_Pfn)(std::cout));
+		if (to_file_) {
+			std::ofstream f("diamond.log", std::ios_base::out | std::ios_base::app);
+			((*_Pfn)(f));
+			f.close();
+		}
 		return *this;
 	}
+	static tthread::mutex mtx;
 private:
-	bool to_cout_;
+	bool to_cout_, to_file_;
 };
 
 extern Message_stream message_stream;
diff --git a/src/util/system.h b/src/util/system.h
index 94651ca..6d507ad 100644
--- a/src/util/system.h
+++ b/src/util/system.h
@@ -21,6 +21,8 @@ Author: Benjamin Buchfink
 #ifndef SYSTEM_H_
 #define SYSTEM_H_
 
+#include <stdexcept>
+
 #ifdef _WIN32
 #define cpuid(info,x)    __cpuidex(info,x,0)
 #else
@@ -50,4 +52,30 @@ inline void cpuid(int CPUInfo[4],int InfoType) {
 
 #endif
 
+#ifndef _MSC_VER
+
+#include <sys/resource.h>
+
+inline void set_max_open_files(unsigned n)
+{
+	rlimit rlp;
+	if (getrlimit(RLIMIT_NOFILE, &rlp) != 0)
+		throw std::runtime_error("Error executing getrlimit.");
+	if (rlp.rlim_max < n)
+		throw std::runtime_error("Open files hard limit is too low. Set lower value for --bin parameter.");
+	if (rlp.rlim_cur < n) {
+		rlp.rlim_cur = n;
+		if (setrlimit(RLIMIT_NOFILE, &rlp) != 0)
+			throw std::runtime_error("Error executing setrlimit.");
+	}
+	//std::cout << "Soft limit = " << rlp.rlim_cur << " Hard limit = " << rlp.rlim_max << std::endl;
+}
+
+#else
+
+inline void set_max_open_files(unsigned n)
+{}
+
+#endif
+
 #endif /* SYSTEM_H_ */
diff --git a/src/util/tinythread.cpp b/src/util/tinythread.cpp
index 5ce1319..813a3bc 100644
--- a/src/util/tinythread.cpp
+++ b/src/util/tinythread.cpp
@@ -24,6 +24,7 @@ freely, subject to the following restrictions:
 #include <exception>
 #include <iostream>
 #include "tinythread.h"
+#include "log_stream.h"
 
 #if defined(_TTHREAD_POSIX_)
   #include <unistd.h>
@@ -171,6 +172,7 @@ void * thread::wrapper_function(void * aArg)
   }
   catch (std::exception &e) {
 	  std::cerr << "Error: " << e.what() << std::endl;
+	  log_stream << "Error: " << e.what() << std::endl;
 	  std::terminate();
   }
   catch(...)
diff --git a/src/util/util.cpp b/src/util/util.cpp
index b8ecc36..1c4463c 100644
--- a/src/util/util.cpp
+++ b/src/util/util.cpp
@@ -32,6 +32,7 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 Message_stream message_stream;
 Message_stream verbose_stream (false);
 Message_stream log_stream (false);
+tthread::mutex Message_stream::mtx;
 
 const Complexity_filter Complexity_filter::instance;
 TLS_PTR vector<Ptr_wrapper_base*> *TLS::ptr_;
diff --git a/src/util/util.h b/src/util/util.h
index 2407bd9..6a4c654 100644
--- a/src/util/util.h
+++ b/src/util/util.h
@@ -28,6 +28,8 @@ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR P
 #include <math.h>
 #include <ctype.h>
 #include <string>
+#include <limits>
+#include <stdexcept>
 #include "simd.h"
 #include "../basic/const.h"
 #include "../basic/packed_loc.h"
@@ -392,8 +394,10 @@ struct Matrix
 	void init(int rows, int cols, bool reset = false)
 	{
 		cols_ = cols;
-		if (!reset)
+		if (!reset) {
+			data_.clear();
 			data_.resize(rows*cols);
+		}
 		else {
 			data_.clear();
 			data_.insert(data_.begin(), rows*cols, _t());
@@ -539,4 +543,12 @@ inline void print_binary(uint64_t x)
 	}
 }
 
+template<typename _t>
+inline _t safe_cast(size_t x)
+{
+	if (x > (size_t)std::numeric_limits<_t>::max())
+		throw std::runtime_error("Integer value out of bounds.");
+	return (_t)x;
+}
+
 #endif /* UTIL_H_ */

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/diamond-aligner.git



More information about the debian-med-commit mailing list