[med-svn] [last-align] 01/04: New upstream version 869

Sascha Steinbiss satta at debian.org
Fri Aug 11 19:32:40 UTC 2017


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository last-align.

commit 00a898dda28bae53f453bbe0aaefd78c48ac13c2
Author: Sascha Steinbiss <satta at debian.org>
Date:   Fri Aug 11 15:16:56 2017 -0400

    New upstream version 869
---
 ChangeLog.txt                          | 198 +++++++++++++-
 doc/last-dotplot.html                  | 126 ++++++++-
 doc/last-dotplot.txt                   |  99 +++++--
 doc/last-papers.html                   |   5 +
 doc/last-papers.txt                    |   8 +
 doc/last-train.html                    |  24 +-
 doc/last-train.txt                     |  21 +-
 doc/last-tutorial.html                 |  28 +-
 doc/last-tutorial.txt                  |  27 +-
 doc/lastal.html                        |  12 +-
 doc/lastal.txt                         |  12 +-
 doc/lastdb.html                        |   2 +-
 doc/lastdb.txt                         |   2 +-
 scripts/last-dotplot                   | 463 ++++++++++++++++++++++++---------
 scripts/last-postmask                  |   8 +-
 scripts/last-train                     | 129 ++++++---
 scripts/maf-convert                    |  52 ++--
 src/Alignment.cc                       |  25 +-
 src/AlignmentWrite.cc                  |   5 +-
 src/Centroid.cc                        |  52 ++--
 src/Centroid.hh                        |  13 +-
 src/LastEvaluer.cc                     |  17 +-
 src/LastalArguments.cc                 |  25 +-
 src/LastalArguments.hh                 |   1 +
 src/LastdbArguments.cc                 |  18 +-
 src/alp/sls_alignment_evaluer.cpp      |  14 +-
 src/alp/sls_alignment_evaluer.hpp      |   4 +-
 src/alp/sls_alp_data.cpp               |   9 +-
 src/alp/sls_alp_data.hpp               |   4 +
 src/alp/sls_basic.hpp                  |   8 +-
 src/alp/sls_falp_alignment_evaluer.cpp |  10 -
 src/alp/sls_fsa1_pvalues.cpp           |  54 +---
 src/alp/sls_fsa1_pvalues.hpp           |  21 --
 src/alp/sls_pvalues.cpp                |  54 +---
 src/alp/sls_pvalues.hpp                |  20 --
 src/getoptUtil.hh                      |  17 ++
 src/lastal.cc                          |  13 +-
 src/makefile                           |   4 +-
 src/split/cbrc_split_aligner.hh        |   2 +-
 src/split/last-split.cc                |  15 +-
 src/version.hh                         |   2 +-
 41 files changed, 1113 insertions(+), 510 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index 4b51ca3..2203b24 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,9 +1,205 @@
+2017-06-20  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-train.txt, scripts/last-train, test/last-train-test.out,
+	test/last-train-test.sh:
+	Make it easier to feed stdin/pipe to last-train
+	[b73f1146e688] [tip]
+
+	* doc/lastal.txt, src/AlignmentWrite.cc, test/last-test.out:
+	Add raw score to BlastTab+ format
+	[56f960a740a3]
+
+2017-06-15  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-postmask, scripts/last-train:
+	Make last-train read sequence file "-" from stdin
+	[f9ec9c71d72e]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Change last-dotplot's verbosity
+	[5182d8528ce9]
+
+	* src/LastEvaluer.cc, src/alp/sls_alignment_evaluer.cpp,
+	src/alp/sls_alignment_evaluer.hpp, src/alp/sls_alp_data.cpp,
+	src/alp/sls_alp_data.hpp:
+	Enable E-values for some unusual scoring schemes
+	[230d1abe4c5d]
+
+2017-06-02  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-papers.txt, doc/last-tutorial.txt:
+	Update docs
+	[c15cd2ae062d]
+
+	* scripts/last-dotplot:
+	last-dotplot: get bp-per-pixel faster
+	[6a4915d5b5cb]
+
+	* src/LastalArguments.cc, src/LastdbArguments.cc, src/getoptUtil.hh,
+	src/makefile:
+	Fix parsing of command-line options
+	[9afba231262d]
+
+2017-05-18  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	last-dotplot: do e.g. chr7 annotations -> hg19.chr7 only if no
+	ambiguity
+	[a75b439f0cf2]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	last-dotplot: add genePred and rmsk options
+	[6e8ff424ce2b]
+
+	* scripts/last-dotplot:
+	last-dotplot: refactor
+	[1ecdbf2ddb78]
+
+	* scripts/last-dotplot:
+	last-dotplot: get layer from BED score
+	[93d47bb8527f]
+
+	* scripts/last-dotplot:
+	last-dotplot: start implementing "layers"
+	[5e4e3f2cf68b]
+
+	* scripts/last-dotplot:
+	last-dotplot: parse BED more liberally
+	[d086361eabec]
+
+2017-05-17  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	Refactor last-dotplot
+	[09a9d7ef13ae]
+
+2017-05-15  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/maf-convert:
+	Make maf-convert to tab faster
+	[0d11825c43b2]
+
+	* doc/last-train.txt, scripts/last-train:
+	Add lastal's -C option to last-train
+	[791ec91ac03c]
+
+2017-05-02  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Add border options to last-dotplot
+	[f3b6c666afad]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Add sequence order options to last-dotplot
+	[5b2acb7fdb3e]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Tweak last-dotplot's documentation
+	[db0ddc92f2ec]
+
+2017-04-18  Martin C. Frith  <Martin C. Frith>
+
+	* src/Alignment.cc, src/Centroid.cc, src/Centroid.hh:
+	Refactor
+	[cb934e010567]
+
+	* doc/last-train.txt, scripts/last-train, test/last-train-test.out,
+	test/last-train-test.sh:
+	Improve last-train sampling for long queries, e.g. chromosomes
+	[7c09e0d848f2]
+
+2017-04-04  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	Made last-dotplot print width & height.
+	[3ca7aa4b27a8]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Added last-dotplot options to show sequence lengths.
+	[1f46ab956351]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Added last-dotplot options to show BED features.
+	[16060c00b129]
+
+	* scripts/last-dotplot:
+	Made last-dotplot faster, sometimes.
+	[23de4eb3be1d]
+
+2017-03-09  Martin C. Frith  <Martin C. Frith>
+
+	* src/alp/sls_basic.hpp, src/split/cbrc_split_aligner.hh:
+	Fixed compile errors with old compilers.
+	[7872fdade23a]
+
+2017-03-07  Martin C. Frith  <Martin C. Frith>
+
+	* doc/lastal.txt, src/LastalArguments.cc, src/LastalArguments.hh,
+	src/lastal.cc, test/last-test.out, test/last-test.sh:
+	Added lastal -N option to quickly get the first hits per query.
+	[b69999b5cd1b]
+
+	* src/alp/sls_alignment_evaluer.cpp, src/alp/sls_basic.hpp,
+	src/alp/sls_falp_alignment_evaluer.cpp,
+	src/alp/sls_fsa1_pvalues.cpp, src/alp/sls_fsa1_pvalues.hpp,
+	src/alp/sls_pvalues.cpp, src/alp/sls_pvalues.hpp:
+	Bugfix: E-values were occasionally negative.
+	[681c5acdf5ca]
+
+2017-03-02  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Enabled sequence ranges for last-dotplot.
+	[85a72978fb7d]
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Added trim options to last-dotplot.
+	[bbc6f00e683b]
+
+	* scripts/last-dotplot:
+	Refactoring.
+	[7b972338fe04]
+
+2017-02-28  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot:
+	Refactoring.
+	[2c3511fa1734]
+
+	* scripts/last-dotplot:
+	Refactoring.
+	[1b4db1600728]
+
+	* scripts/last-dotplot:
+	Refactoring.
+	[cdb1e40c28e4]
+
+	* doc/last-papers.txt, scripts/last-dotplot:
+	Bugfix: dotplot left border misplaced (for some versions of
+	python/PIL).
+	[1514199bb31b]
+
+2017-02-03  Martin C. Frith  <Martin C. Frith>
+
+	* doc/lastdb.txt, src/LastdbArguments.cc:
+	Tiny doc/help edits.
+	[ec636cb2fe64]
+
+	* src/split/last-split.cc:
+	Bugfix: rarely, last-split output end-gaps with wrong score/don/acc.
+	[ff7752714cef]
+
+	* scripts/maf-convert, test/90089.maf, test/maf-convert-test.out, test
+	/maf-convert-test.sh:
+	Bugfix(?): made maf-convert to psl trim end-gaps.
+	[c5591feba7a2]
+
 2017-01-19  Martin C. Frith  <Martin C. Frith>
 
 	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh,
 	src/split/last-split.cc:
 	Added don and acc to spliced alignment output.
-	[138b43db3812] [tip]
+	[138b43db3812]
 
 	* src/split/cbrc_split_aligner.cc, src/split/last-split.cc:
 	Refactoring.
diff --git a/doc/last-dotplot.html b/doc/last-dotplot.html
index 7f03a03..e1717a0 100644
--- a/doc/last-dotplot.html
+++ b/doc/last-dotplot.html
@@ -342,10 +342,10 @@ sequences with names like "chrUn_random522" like this:</p>
 <pre class="literal-block">
 last-dotplot -1 'chr[!U]*' -2 'chr[!U]*' alns alns.png
 </pre>
-<p>Option "-1" selects sequences from the 1st genome, and "-2" selects
-sequences from the 2nd genome.  'chr[!U]*' is a <em>pattern</em> that
-specifies names starting with "chr", followed by any character except
-U, followed by anything.</p>
+<p>Option "-1" selects sequences from the 1st (horizontal) genome, and
+"-2" selects sequences from the 2nd (vertical) genome.  'chr[!U]*' is
+a <em>pattern</em> that specifies names starting with "chr", followed by any
+character except U, followed by anything.</p>
 <table border="1" class="docutils">
 <colgroup>
 <col width="26%" />
@@ -376,6 +376,11 @@ names starting in "chr" followed by one or two characters:</p>
 <pre class="literal-block">
 last-dotplot -1 'chr?' -1 'chr??' alns alns.png
 </pre>
+<p>You can also specify a sequence range; for example this gets the first
+1000 bases of chr9:</p>
+<pre class="literal-block">
+last-dotplot -1 chr9:0-1000 alns alns.png
+</pre>
 </div>
 <div class="section" id="options">
 <h2>Options</h2>
@@ -388,11 +393,14 @@ last-dotplot -1 'chr?' -1 'chr??' alns alns.png
 <kbd><span class="option">-h</span>, <span class="option">--help</span></kbd></td>
 <td>Show a help message, with default option values, and exit.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-v</span>, <span class="option">--verbose</span></kbd></td>
+<td>Show progress messages & data about the plot.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-1 <var>PATTERN</var></span>, <span class="option">--seq1=<var>PATTERN</var></span></kbd></td>
-<td>Which sequences to show from the 1st genome.</td></tr>
+<td>Which sequences to show from the 1st (horizontal) genome.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-2 <var>PATTERN</var></span>, <span class="option">--seq2=<var>PATTERN</var></span></kbd></td>
-<td>Which sequences to show from the 2nd genome.</td></tr>
+<td>Which sequences to show from the 2nd (vertical) genome.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-x <var>WIDTH</var></span>, <span class="option">--width=<var>WIDTH</var></span></kbd></td>
 <td>Maximum width in pixels.</td></tr>
@@ -400,20 +408,114 @@ last-dotplot -1 'chr?' -1 'chr??' alns alns.png
 <kbd><span class="option">-y <var>HEIGHT</var></span>, <span class="option">--height=<var>HEIGHT</var></span></kbd></td>
 <td>Maximum height in pixels.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-c <var>COLOR</var></span>, <span class="option">--forwardcolor=<var>COLOR</var></span></kbd></td>
+<td>Color for forward alignments.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">-r <var>COLOR</var></span>, <span class="option">--reversecolor=<var>COLOR</var></span></kbd></td>
+<td>Color for reverse alignments.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--sort1=<var>N</var></span></kbd></td>
+<td>Put the 1st genome's sequences left-to-right in order of: their
+appearance in the input (0), their names (1), their lengths (2).</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--sort2=<var>N</var></span></kbd></td>
+<td>Put the 2nd genome's sequences top-to-bottom in order of: their
+appearance in the input (0), their names (1), their lengths (2).</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--trim1</span></kbd></td>
+<td>Trim unaligned sequence flanks from the 1st (horizontal) genome.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--trim2</span></kbd></td>
+<td>Trim unaligned sequence flanks from the 2nd (vertical) genome.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--border-pixels=<var>INT</var></span></kbd></td>
+<td>Number of pixels between sequences.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--border-color=<var>COLOR</var></span></kbd></td>
+<td>Color for pixels between sequences.</td></tr>
+</tbody>
+</table>
+</blockquote>
+<div class="section" id="text-options">
+<h3>Text options</h3>
+<blockquote>
+<table class="docutils option-list" frame="void" rules="none">
+<col class="option" />
+<col class="description" />
+<tbody valign="top">
+<tr><td class="option-group">
 <kbd><span class="option">-f <var>FILE</var></span>, <span class="option">--fontfile=<var>FILE</var></span></kbd></td>
 <td>TrueType or OpenType font file.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-s <var>SIZE</var></span>, <span class="option">--fontsize=<var>SIZE</var></span></kbd></td>
 <td>TrueType or OpenType font size.</td></tr>
 <tr><td class="option-group">
-<kbd><span class="option">-c <var>COLOR</var></span>, <span class="option">--forwardcolor=<var>COLOR</var></span></kbd></td>
-<td>Color for forward alignments.</td></tr>
+<kbd><span class="option">--lengths1</span></kbd></td>
+<td>Show sequence lengths for the 1st (horizontal) genome.</td></tr>
 <tr><td class="option-group">
-<kbd><span class="option">-r <var>COLOR</var></span>, <span class="option">--reversecolor=<var>COLOR</var></span></kbd></td>
-<td>Color for reverse alignments.</td></tr>
+<kbd><span class="option">--lengths2</span></kbd></td>
+<td>Show sequence lengths for the 2nd (vertical) genome.</td></tr>
 </tbody>
 </table>
 </blockquote>
+</div>
+<div class="section" id="annotation-options">
+<h3>Annotation options</h3>
+<p>These options read annotations of sequence segments, and draw them as
+colored horizontal or vertical stripes.  This looks good only if the
+annotations are reasonably sparse: e.g. you can't sensibly view 20000
+gene annotations in one small dotplot.</p>
+<blockquote>
+<table class="docutils option-list" frame="void" rules="none">
+<col class="option" />
+<col class="description" />
+<tbody valign="top">
+<tr><td class="option-group">
+<kbd><span class="option">--bed1=<var>FILE</var></span></kbd></td>
+<td>Read <a class="reference external" href="https://genome.ucsc.edu/FAQ/FAQformat.html#format1">BED-format</a>
+annotations for the 1st genome.  They are drawn as stripes, with
+coordinates given by the first three BED fields.  The color is
+specified by the RGB field if present, else pale red if the
+strand is "+", pale blue if "-", or pale purple.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--bed2=<var>FILE</var></span></kbd></td>
+<td>Read BED-format annotations for the 2nd genome.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--rmsk1=<var>FILE</var></span></kbd></td>
+<td>Read repeat annotations for the 1st genome, in RepeatMasker .out
+or rmsk.txt format.  The color is pale purple for "low
+complexity" and "simple repeats", else pale red for "+" strand
+and pale blue for "-" strand.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--rmsk2=<var>FILE</var></span></kbd></td>
+<td>Read repeat annotations for the 2nd genome.</td></tr>
+</tbody>
+</table>
+</blockquote>
+</div>
+<div class="section" id="gene-options">
+<h3>Gene options</h3>
+<blockquote>
+<table class="docutils option-list" frame="void" rules="none">
+<col class="option" />
+<col class="description" />
+<tbody valign="top">
+<tr><td class="option-group">
+<kbd><span class="option">--genePred1=<var>FILE</var></span></kbd></td>
+<td>Read gene annotations for the 1st genome in <a class="reference external" href="https://genome.ucsc.edu/FAQ/FAQformat.html#format9">genePred format</a>.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--genePred2=<var>FILE</var></span></kbd></td>
+<td>Read gene annotations for the 2nd genome.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--exon-color=<var>COLOR</var></span></kbd></td>
+<td>Color for exons.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--cds-color=<var>COLOR</var></span></kbd></td>
+<td>Color for protein-coding regions.</td></tr>
+</tbody>
+</table>
+</blockquote>
+</div>
 <div class="section" id="unsequenced-gap-options">
 <h3>Unsequenced gap options</h3>
 <p>Note: these "gaps" are <em>not</em> alignment gaps (indels): they are regions
@@ -441,6 +543,10 @@ of unknown sequence.</p>
 <p>An unsequenced gap will be shown only if it covers at least one whole
 pixel.</p>
 </div>
+<div class="section" id="colors">
+<h3>Colors</h3>
+<p>Colors can be specified in <a class="reference external" href="http://effbot.org/imagingbook/imagecolor.htm">various ways described here</a>.</p>
+</div>
 </div>
 </div>
 </body>
diff --git a/doc/last-dotplot.txt b/doc/last-dotplot.txt
index 6b093a2..8642e59 100644
--- a/doc/last-dotplot.txt
+++ b/doc/last-dotplot.txt
@@ -27,10 +27,10 @@ sequences with names like "chrUn_random522" like this::
 
   last-dotplot -1 'chr[!U]*' -2 'chr[!U]*' alns alns.png
 
-Option "-1" selects sequences from the 1st genome, and "-2" selects
-sequences from the 2nd genome.  'chr[!U]*' is a *pattern* that
-specifies names starting with "chr", followed by any character except
-U, followed by anything.
+Option "-1" selects sequences from the 1st (horizontal) genome, and
+"-2" selects sequences from the 2nd (vertical) genome.  'chr[!U]*' is
+a *pattern* that specifies names starting with "chr", followed by any
+character except U, followed by anything.
 
 ==========  =============================
 Pattern     Meaning
@@ -49,35 +49,94 @@ names starting in "chr" followed by one or two characters::
 
   last-dotplot -1 'chr?' -1 'chr??' alns alns.png
 
+You can also specify a sequence range; for example this gets the first
+1000 bases of chr9::
+
+  last-dotplot -1 chr9:0-1000 alns alns.png
+
 Options
 -------
 
   -h, --help
       Show a help message, with default option values, and exit.
-
+  -v, --verbose
+      Show progress messages & data about the plot.
   -1 PATTERN, --seq1=PATTERN
-      Which sequences to show from the 1st genome.
-
+      Which sequences to show from the 1st (horizontal) genome.
   -2 PATTERN, --seq2=PATTERN
-      Which sequences to show from the 2nd genome.
-
+      Which sequences to show from the 2nd (vertical) genome.
   -x WIDTH, --width=WIDTH
       Maximum width in pixels.
-
   -y HEIGHT, --height=HEIGHT
       Maximum height in pixels.
+  -c COLOR, --forwardcolor=COLOR
+      Color for forward alignments.
+  -r COLOR, --reversecolor=COLOR
+      Color for reverse alignments.
+  --sort1=N
+      Put the 1st genome's sequences left-to-right in order of: their
+      appearance in the input (0), their names (1), their lengths (2).
+  --sort2=N
+      Put the 2nd genome's sequences top-to-bottom in order of: their
+      appearance in the input (0), their names (1), their lengths (2).
+  --trim1
+      Trim unaligned sequence flanks from the 1st (horizontal) genome.
+  --trim2
+      Trim unaligned sequence flanks from the 2nd (vertical) genome.
+  --border-pixels=INT
+      Number of pixels between sequences.
+  --border-color=COLOR
+      Color for pixels between sequences.
+
+Text options
+~~~~~~~~~~~~
 
   -f FILE, --fontfile=FILE
       TrueType or OpenType font file.
-
   -s SIZE, --fontsize=SIZE
       TrueType or OpenType font size.
-
-  -c COLOR, --forwardcolor=COLOR
-      Color for forward alignments.
-
-  -r COLOR, --reversecolor=COLOR
-      Color for reverse alignments.
+  --lengths1
+      Show sequence lengths for the 1st (horizontal) genome.
+  --lengths2
+      Show sequence lengths for the 2nd (vertical) genome.
+
+Annotation options
+~~~~~~~~~~~~~~~~~~
+
+These options read annotations of sequence segments, and draw them as
+colored horizontal or vertical stripes.  This looks good only if the
+annotations are reasonably sparse: e.g. you can't sensibly view 20000
+gene annotations in one small dotplot.
+
+  --bed1=FILE
+      Read `BED-format
+      <https://genome.ucsc.edu/FAQ/FAQformat.html#format1>`_
+      annotations for the 1st genome.  They are drawn as stripes, with
+      coordinates given by the first three BED fields.  The color is
+      specified by the RGB field if present, else pale red if the
+      strand is "+", pale blue if "-", or pale purple.
+  --bed2=FILE
+      Read BED-format annotations for the 2nd genome.
+  --rmsk1=FILE
+      Read repeat annotations for the 1st genome, in RepeatMasker .out
+      or rmsk.txt format.  The color is pale purple for "low
+      complexity" and "simple repeats", else pale red for "+" strand
+      and pale blue for "-" strand.
+  --rmsk2=FILE
+      Read repeat annotations for the 2nd genome.
+
+Gene options
+~~~~~~~~~~~~
+
+  --genePred1=FILE
+      Read gene annotations for the 1st genome in `genePred format
+      <https://genome.ucsc.edu/FAQ/FAQformat.html#format9>`_.
+  --genePred2=FILE
+      Read gene annotations for the 2nd genome.
+  --exon-color=COLOR
+      Color for exons.
+  --cds-color=COLOR
+      Color for protein-coding regions.
 
 Unsequenced gap options
 ~~~~~~~~~~~~~~~~~~~~~~~
@@ -96,3 +155,9 @@ of unknown sequence.
 
 An unsequenced gap will be shown only if it covers at least one whole
 pixel.
+
+Colors
+~~~~~~
+
+Colors can be specified in `various ways described here
+<http://effbot.org/imagingbook/imagecolor.htm>`_.
diff --git a/doc/last-papers.html b/doc/last-papers.html
index c33a5b4..995bed4 100644
--- a/doc/last-papers.html
+++ b/doc/last-papers.html
@@ -384,6 +384,11 @@ Frith MC, Kawaguchi R.  Genome Biology. 2015 16:106.</p>
 <p>Describes the split alignment algorithm, and its application to
 whole genome alignment.</p>
 </li>
+<li><p class="first"><a class="reference external" href="https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btw742">Training alignment parameters for arbitrary sequencers with
+LAST-TRAIN</a>.  Hamada M, Ono Y, Asai K Frith MC.
+Bioinformatics. 2017 33(6):926-928.</p>
+<p>Describes last-train.</p>
+</li>
 </ol>
 <div class="section" id="external-methods">
 <h2>External methods</h2>
diff --git a/doc/last-papers.txt b/doc/last-papers.txt
index f89b20a..4a9bf08 100644
--- a/doc/last-papers.txt
+++ b/doc/last-papers.txt
@@ -100,6 +100,14 @@ research to society.
     Describes the split alignment algorithm, and its application to
     whole genome alignment.
 
+12. `Training alignment parameters for arbitrary sequencers with
+    LAST-TRAIN`__.  Hamada M, Ono Y, Asai K Frith MC.
+    Bioinformatics. 2017 33(6):926-928.
+
+    __ https://academic.oup.com/bioinformatics/article-lookup/doi/10.1093/bioinformatics/btw742
+
+    Describes last-train.
+
 External methods
 ----------------
 
diff --git a/doc/last-train.html b/doc/last-train.html
index a8aa00f..a4889bd 100644
--- a/doc/last-train.html
+++ b/doc/last-train.html
@@ -330,8 +330,12 @@ lastdb mydb reference.fasta
 last-train mydb queries.fasta
 </pre>
 <p>last-train prints a summary of each alignment step, followed by the
-final score parameters in a format that can be read by <a class="reference external" href="lastal.html#score-options">lastal's -p
+final score parameters, in a format that can be read by <a class="reference external" href="lastal.html#score-options">lastal's -p
 option</a>.</p>
+<p>You can also pipe sequences into last-train, for example:</p>
+<pre class="literal-block">
+zcat queries.fasta.gz | last-train mydb
+</pre>
 <div class="section" id="options">
 <h2>Options</h2>
 <blockquote>
@@ -370,9 +374,15 @@ e.g. score(A→G) = score(G→A).</td></tr>
 optimize the parameters for low-similarity alignments,
 similarly to the BLOSUM matrices.</td></tr>
 <tr><td class="option-group">
-<kbd><span class="option">--sample=<var>N</var></span></kbd></td>
-<td>Use only the first N query letters, after randomizing the
-order of sequences.  0 means use all query letters.</td></tr>
+<kbd><span class="option">--sample-number=<var>N</var></span></kbd></td>
+<td>Use N randomly-chosen chunks of the query sequences.  The
+queries are chopped into fixed-length chunks (as if they were
+first concatenated into one long sequence).  If there are ≤ N
+chunks, all are picked.  Otherwise, if the final chunk is
+shorter, it is never picked.  0 means use everything.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--sample-length=<var>L</var></span></kbd></td>
+<td>Use randomly-chosen chunks of length L.</td></tr>
 </tbody>
 </table>
 </blockquote>
@@ -433,6 +443,12 @@ alignments: they are described in more detail at <a class="reference external" h
 1=query.  This matters only if the scores lack
 reverse-complement symmetry.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-C <var>COUNT</var></span></kbd></td>
+<td>Before extending gapped alignments, discard any gapless
+alignment whose query range lies in COUNT other gapless
+alignments with higher score-per-length.  This aims to
+reduce run time.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-T <var>NUMBER</var></span></kbd></td>
 <td>Type of alignment: 0=local, 1=overlap.</td></tr>
 <tr><td class="option-group">
diff --git a/doc/last-train.txt b/doc/last-train.txt
index 8ee7885..c9fcd93 100644
--- a/doc/last-train.txt
+++ b/doc/last-train.txt
@@ -15,9 +15,13 @@ The usage is like this::
   last-train mydb queries.fasta
 
 last-train prints a summary of each alignment step, followed by the
-final score parameters in a format that can be read by `lastal's -p
+final score parameters, in a format that can be read by `lastal's -p
 option <lastal.html#score-options>`_.
 
+You can also pipe sequences into last-train, for example::
+
+  zcat queries.fasta.gz | last-train mydb
+
 Options
 -------
 
@@ -40,9 +44,14 @@ Training options
          Ignore alignments with > PID% identity.  This aims to
          optimize the parameters for low-similarity alignments,
          similarly to the BLOSUM matrices.
-  --sample=N
-         Use only the first N query letters, after randomizing the
-         order of sequences.  0 means use all query letters.
+  --sample-number=N
+         Use N randomly-chosen chunks of the query sequences.  The
+         queries are chopped into fixed-length chunks (as if they were
+         first concatenated into one long sequence).  If there are ≤ N
+         chunks, all are picked.  Otherwise, if the final chunk is
+         shorter, it is never picked.  0 means use everything.
+  --sample-length=L
+         Use randomly-chosen chunks of length L.
 
 All options below this point are passed to lastal to do the
 alignments: they are described in more detail at `<lastal.html>`_.
@@ -69,6 +78,10 @@ Alignment options
   -S NUMBER  Score matrix applies to forward strand of: 0=reference,
              1=query.  This matters only if the scores lack
              reverse-complement symmetry.
+  -C COUNT   Before extending gapped alignments, discard any gapless
+             alignment whose query range lies in COUNT other gapless
+             alignments with higher score-per-length.  This aims to
+             reduce run time.
   -T NUMBER  Type of alignment: 0=local, 1=overlap.
   -m COUNT   Maximum number of initial matches per query position.
   -P COUNT   Number of parallel threads.
diff --git a/doc/last-tutorial.html b/doc/last-tutorial.html
index 62f419a..19c9ed9 100644
--- a/doc/last-tutorial.html
+++ b/doc/last-tutorial.html
@@ -484,33 +484,13 @@ the two reads in a pair to match (e.g.) different chromosomes.</p>
 </li>
 </ul>
 </div>
-<div class="section" id="example-8-compare-the-cat-and-rat-genomes">
-<h2>Example 8: Compare the cat and rat genomes</h2>
-<p>If you have ~50 GB of memory and don't mind waiting a few days, this
-is a good way to compare such genomes:</p>
-<pre class="literal-block">
-lastdb -uMAM8 -cR11 catdb cat.fa
-lastal -m100 -E0.05 catdb rat.fa | last-split -m1 > out.maf
-</pre>
-<p>This looks for a unique best alignment for each part of each rat
-chromosome.  Omitting -m100 makes it faster but somewhat less
-sensitive.  Omitting -uMAM8 reduces the memory use to ~10 GB and makes
-it faster but considerably less sensitive.</p>
-<p>This recipe aligns each rat base-pair to at most one cat base-pair,
-but not necessarily vice-versa.  You can get strictly 1-to-1
-alignments by swapping the sequences and running last-split again:</p>
-<pre class="literal-block">
-maf-swap out.maf | last-split -m1 > out2.maf
-</pre>
+<div class="section" id="example-8-compare-the-human-and-mouse-genomes">
+<h2>Example 8: Compare the human and mouse genomes</h2>
+<p>See <a class="reference external" href="https://github.com/mcfrith/last-genome-alignments">here</a>.</p>
 </div>
 <div class="section" id="example-9-compare-the-human-and-chimp-genomes">
 <h2>Example 9: Compare the human and chimp genomes</h2>
-<p>For strongly similar genomes (e.g. 99% identity), something like this
-is more appropriate:</p>
-<pre class="literal-block">
-lastdb -uNEAR -cR11 human human.fa
-lastal -m50 -E0.05 human chimp.fa | last-split -m1 > out.maf
-</pre>
+<p>See <a class="reference external" href="https://github.com/mcfrith/last-genome-alignments">here</a>.</p>
 </div>
 <div class="section" id="example-10-ambiguity-of-alignment-columns">
 <h2>Example 10: Ambiguity of alignment columns</h2>
diff --git a/doc/last-tutorial.txt b/doc/last-tutorial.txt
index cbff52a..32ede6e 100644
--- a/doc/last-tutorial.txt
+++ b/doc/last-tutorial.txt
@@ -175,34 +175,15 @@ Tuning speed, sensitivity, memory and disk usage
 * You can `trade off speed, sensitivity, memory and disk usage
   <last-tuning.html>`_.
 
-Example 8: Compare the cat and rat genomes
-------------------------------------------
-
-If you have ~50 GB of memory and don't mind waiting a few days, this
-is a good way to compare such genomes::
-
-  lastdb -uMAM8 -cR11 catdb cat.fa
-  lastal -m100 -E0.05 catdb rat.fa | last-split -m1 > out.maf
-
-This looks for a unique best alignment for each part of each rat
-chromosome.  Omitting -m100 makes it faster but somewhat less
-sensitive.  Omitting -uMAM8 reduces the memory use to ~10 GB and makes
-it faster but considerably less sensitive.
-
-This recipe aligns each rat base-pair to at most one cat base-pair,
-but not necessarily vice-versa.  You can get strictly 1-to-1
-alignments by swapping the sequences and running last-split again::
+Example 8: Compare the human and mouse genomes
+----------------------------------------------
 
-  maf-swap out.maf | last-split -m1 > out2.maf
+See `here <https://github.com/mcfrith/last-genome-alignments>`_.
 
 Example 9: Compare the human and chimp genomes
 ----------------------------------------------
 
-For strongly similar genomes (e.g. 99% identity), something like this
-is more appropriate::
-
-  lastdb -uNEAR -cR11 human human.fa
-  lastal -m50 -E0.05 human chimp.fa | last-split -m1 > out.maf
+See `here <https://github.com/mcfrith/last-genome-alignments>`_.
 
 Example 10: Ambiguity of alignment columns
 ------------------------------------------
diff --git a/doc/lastal.html b/doc/lastal.html
index 94f9541..191dde2 100644
--- a/doc/lastal.html
+++ b/doc/lastal.html
@@ -412,9 +412,10 @@ coordinates are one-based.  <em>Warning:</em> this is a lossy format,
 because it does not show gap positions.  <em>Warning:</em> the other
 LAST programs cannot read this format.  <em>Warning:</em> <a class="reference external" href="last-evalues.html">"bit score"
 is not the same as "score"</a>.</p>
-<p><strong>BlastTab+</strong> format is the same as BlastTab, with 2 extra
-columns at the end: length of query sequence and length of
-reference sequence.  More columns might be added in future.</p>
+<p><strong>BlastTab+</strong> format is the same as BlastTab, with 3 extra
+columns at the end: length of query sequence, length of
+reference sequence, and (raw) score.  More columns might be
+added in future.</p>
 <p class="last">For backwards compatibility, a NAME of 0 means TAB and 1 means
 MAF.</p>
 </td></tr>
@@ -669,6 +670,11 @@ start at one query position, if it gets COUNT successful
 extensions, it skips any remaining initial matches starting at
 that position.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-N <var>COUNT</var></span></kbd></td>
+<td>Stop after finding COUNT alignments per query strand.  This
+makes lastal faster: it quits gapless and gapped extensions as
+soon as it finds COUNT alignments with score ≥ e.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-R <var>DIGITS</var></span></kbd></td>
 <td><p class="first">Specify lowercase-marking of repeats, by two digits (e.g. "-R 01"),
 with the following meanings.</p>
diff --git a/doc/lastal.txt b/doc/lastal.txt
index d8b2690..db7eeb1 100644
--- a/doc/lastal.txt
+++ b/doc/lastal.txt
@@ -93,9 +93,10 @@ Cosmetic options
       LAST programs cannot read this format.  *Warning:* `"bit score"
       is not the same as "score" <last-evalues.html>`_.
 
-      **BlastTab+** format is the same as BlastTab, with 2 extra
-      columns at the end: length of query sequence and length of
-      reference sequence.  More columns might be added in future.
+      **BlastTab+** format is the same as BlastTab, with 3 extra
+      columns at the end: length of query sequence, length of
+      reference sequence, and (raw) score.  More columns might be
+      added in future.
 
       For backwards compatibility, a NAME of 0 means TAB and 1 means
       MAF.
@@ -311,6 +312,11 @@ Miscellaneous options
       extensions, it skips any remaining initial matches starting at
       that position.
 
+  -N COUNT
+      Stop after finding COUNT alignments per query strand.  This
+      makes lastal faster: it quits gapless and gapped extensions as
+      soon as it finds COUNT alignments with score ≥ e.
+
   -R DIGITS
       Specify lowercase-marking of repeats, by two digits (e.g. "-R 01"),
       with the following meanings.
diff --git a/doc/lastdb.html b/doc/lastdb.html
index 1e6fafb..974c588 100644
--- a/doc/lastdb.html
+++ b/doc/lastdb.html
@@ -387,7 +387,7 @@ for ~80% AT-rich genomes.</p>
 <td>Soft-mask lowercase letters.  This means that, when we compare
 these sequences to some other sequences using lastal, lowercase
 letters will be excluded from initial matches.  This will apply
-to lowercase letters in both sets of sequences.</td></tr>
+to lowercase letters in <em>both</em> sets of sequences.</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-u <var>NAME</var></span></kbd></td>
 <td><p class="first">Specify a seeding scheme.  The -m option will then be ignored.
diff --git a/doc/lastdb.txt b/doc/lastdb.txt
index 0d2d017..c0b5fdb 100644
--- a/doc/lastdb.txt
+++ b/doc/lastdb.txt
@@ -59,7 +59,7 @@ Main Options
   -c  Soft-mask lowercase letters.  This means that, when we compare
       these sequences to some other sequences using lastal, lowercase
       letters will be excluded from initial matches.  This will apply
-      to lowercase letters in both sets of sequences.
+      to lowercase letters in *both* sets of sequences.
 
   -u NAME
       Specify a seeding scheme.  The -m option will then be ignored.
diff --git a/scripts/last-dotplot b/scripts/last-dotplot
index 52becbf..6d4f1d2 100755
--- a/scripts/last-dotplot
+++ b/scripts/last-dotplot
@@ -9,15 +9,36 @@
 # according to the number of aligned nt-pairs within it, but the
 # result is too faint.  How can this be done better?
 
-import fileinput, fnmatch, itertools, optparse, os, re, sys
+import fnmatch, itertools, optparse, os, re, sys
 
 # Try to make PIL/PILLOW work:
 try: from PIL import Image, ImageDraw, ImageFont, ImageColor
 except ImportError: import Image, ImageDraw, ImageFont, ImageColor
 
+def myOpen(fileName):  # faster than fileinput
+    if fileName == "-":
+        return sys.stdin
+    return open(fileName)
+
 def warn(message):
-    prog = os.path.basename(sys.argv[0])
-    sys.stderr.write(prog + ": " + message + "\n")
+    if opts.verbose:
+        prog = os.path.basename(sys.argv[0])
+        sys.stderr.write(prog + ": " + message + "\n")
+
+def croppedBlocks(blocks, range1, range2):
+    cropBeg1, cropEnd1 = range1
+    cropBeg2, cropEnd2 = range2
+    if blocks[0][0] < 0: cropBeg1, cropEnd1 = -cropEnd1, -cropBeg1
+    if blocks[0][1] < 0: cropBeg2, cropEnd2 = -cropEnd2, -cropBeg2
+    for beg1, beg2, size in blocks:
+        b1 = max(cropBeg1, beg1)
+        e1 = min(cropEnd1, beg1 + size)
+        if b1 >= e1: continue
+        offset = beg2 - beg1
+        b2 = max(cropBeg2, b1 + offset)
+        e2 = min(cropEnd2, e1 + offset)
+        if b2 >= e2: continue
+        yield b2 - offset, b2, e2 - b2
 
 def tabBlocks(beg1, beg2, blocks):
     '''Get the gapless blocks of an alignment, from LAST tabular format.'''
@@ -78,28 +99,60 @@ def alignmentInput(lines):
                 yield chr1, seqlen1, chr2, seqlen2, blocks
                 mafCount = 0
 
-def isWantedSequenceName(name, patterns):
-    if not patterns: return True
+def seqRangeFromText(text):
+    if ":" in text:
+        pattern, interval = text.rsplit(":", 1)
+        if "-" in interval:
+            beg, end = interval.rsplit("-", 1)
+            return pattern, int(beg), int(end)  # beg may be negative
+    return text, 0, sys.maxsize
+
+def rangeFromSeqName(seqRanges, name, seqLen):
+    if not seqRanges: return 0, seqLen
     base = name.split(".")[-1]  # allow for names like hg19.chr7
-    for i in patterns:
-        if fnmatch.fnmatchcase(name, i): return True
-        if fnmatch.fnmatchcase(base, i): return True
-    return False
+    for pat, beg, end in seqRanges:
+        if fnmatch.fnmatchcase(name, pat) or fnmatch.fnmatchcase(base, pat):
+            return max(beg, 0), min(end, seqLen)
+    return None
+
+def updateSeqs(isTrim, seqNames, seqLimits, seqName, seqRange, blocks, index):
+    if seqName not in seqLimits:
+        seqNames.append(seqName)
+    if isTrim:
+        beg = blocks[0][index]
+        end = blocks[-1][index] + blocks[-1][2]
+        if beg < 0: beg, end = -end, -beg
+        if seqName in seqLimits:
+            b, e = seqLimits[seqName]
+            seqLimits[seqName] = min(b, beg), max(e, end)
+        else:
+            seqLimits[seqName] = beg, end
+    else:
+        seqLimits[seqName] = seqRange
 
 def readAlignments(fileName, opts):
-    '''Get alignments and sequence lengths, from MAF or tabular format.'''
+    '''Get alignments and sequence limits, from MAF or tabular format.'''
+    seqRanges1 = map(seqRangeFromText, opts.seq1)
+    seqRanges2 = map(seqRangeFromText, opts.seq2)
+
     alignments = []
-    seqLengths1 = {}
-    seqLengths2 = {}
-    lines = fileinput.input(fileName)
-    for chr1, seqlen1, chr2, seqlen2, blocks in alignmentInput(lines):
-        if not isWantedSequenceName(chr1, opts.seq1): continue
-        if not isWantedSequenceName(chr2, opts.seq2): continue
-        aln = chr1, chr2, blocks
+    seqNames1 = []
+    seqNames2 = []
+    seqLimits1 = {}
+    seqLimits2 = {}
+    lines = myOpen(fileName)
+    for seqName1, seqLen1, seqName2, seqLen2, blocks in alignmentInput(lines):
+        range1 = rangeFromSeqName(seqRanges1, seqName1, seqLen1)
+        if not range1: continue
+        range2 = rangeFromSeqName(seqRanges2, seqName2, seqLen2)
+        if not range2: continue
+        b = list(croppedBlocks(list(blocks), range1, range2))
+        if not b: continue
+        aln = seqName1, seqName2, b
         alignments.append(aln)
-        seqLengths1[chr1] = seqlen1
-        seqLengths2[chr2] = seqlen2
-    return alignments, seqLengths1, seqLengths2
+        updateSeqs(opts.trim1, seqNames1, seqLimits1, seqName1, range1, b, 0)
+        updateSeqs(opts.trim2, seqNames2, seqLimits2, seqName2, range2, b, 1)
+    return alignments, seqNames1, seqNames2, seqLimits1, seqLimits2
 
 def natural_sort_key(my_string):
     '''Return a sort key for "natural" ordering, e.g. chr9 < chr10.'''
@@ -115,34 +168,58 @@ def get_text_sizes(my_strings, font, fontsize, image_mode):
     draw = ImageDraw.Draw(im)
     return [draw.textsize(i, font=font) for i in my_strings]
 
-def get_seq_info(seq_size_dic, font, fontsize, image_mode):
+def sizeText(size):
+    suffixes = "bp", "kb", "Mb", "Gb"
+    for i, x in enumerate(suffixes):
+        j = 10 ** (i * 3)
+        if size < j * 10:
+            return "%.2g" % (1.0 * size / j) + x
+        if size < j * 1000 or i == len(suffixes) - 1:
+            return "%.0f" % (1.0 * size / j) + x
+
+def seqNameAndSizeText(seqName, seqSize):
+    return seqName + ": " + sizeText(seqSize)
+
+def getSeqInfo(sortOpt, seqNames, seqLimits,
+               font, fontsize, image_mode, isShowSize):
     '''Return miscellaneous information about the sequences.'''
-    seq_names = seq_size_dic.keys()
-    seq_names.sort(key=natural_sort_key)
-    seq_sizes = [seq_size_dic[i] for i in seq_names]
-    name_sizes = get_text_sizes(seq_names, font, fontsize, image_mode)
-    margin = max(zip(*name_sizes)[1])  # maximum text height
-    return seq_names, seq_sizes, name_sizes, margin
+    if sortOpt == 1:
+        seqNames.sort(key=natural_sort_key)
+    seqSizes = [seqLimits[i][1] - seqLimits[i][0] for i in seqNames]
+    for i in seqNames:
+        r = seqLimits[i]
+        out = i, str(r[0]), str(r[1])
+        warn("\t".join(out))
+    warn("")
+    if sortOpt == 2:
+        seqRecords = sorted(zip(seqSizes, seqNames), reverse=True)
+        seqSizes = [i[0] for i in seqRecords]
+        seqNames = [i[1] for i in seqRecords]
+    if isShowSize:
+        seqLabels = map(seqNameAndSizeText, seqNames, seqSizes)
+    else:
+        seqLabels = seqNames
+    labelSizes = get_text_sizes(seqLabels, font, fontsize, image_mode)
+    margin = max(zip(*labelSizes)[1])  # maximum text height
+    return seqNames, seqSizes, seqLabels, labelSizes, margin
 
 def div_ceil(x, y):
     '''Return x / y rounded up.'''
     q, r = divmod(x, y)
     return q + (r != 0)
 
-def tot_seq_pix(seq_sizes, bp_per_pix):
-    '''Return the total pixels needed for sequences of the given sizes.'''
-    return sum([div_ceil(i, bp_per_pix) for i in seq_sizes])
-
 def get_bp_per_pix(seq_sizes, pix_tween_seqs, pix_limit):
     '''Get the minimum bp-per-pixel that fits in the size limit.'''
     seq_num = len(seq_sizes)
     seq_pix_limit = pix_limit - pix_tween_seqs * (seq_num - 1)
     if seq_pix_limit < seq_num:
         raise Exception("can't fit the image: too many sequences?")
-    lower_bound = div_ceil(sum(seq_sizes), seq_pix_limit)
-    for bp_per_pix in itertools.count(lower_bound):  # slow linear search
-        if tot_seq_pix(seq_sizes, bp_per_pix) <= seq_pix_limit: break
-    return bp_per_pix
+    negLimit = -seq_pix_limit
+    negBpPerPix = sum(seq_sizes) // negLimit
+    while True:
+        if sum(i // negBpPerPix for i in seq_sizes) >= negLimit:
+            return -negBpPerPix
+        negBpPerPix -= 1
 
 def get_seq_starts(seq_pix, pix_tween_seqs, margin):
     '''Get the start pixel for each sequence.'''
@@ -161,86 +238,174 @@ def get_pix_info(seq_sizes, bp_per_pix, pix_tween_seqs, margin):
     tot_pix = seq_starts[-1] + seq_pix[-1]
     return seq_pix, seq_starts, tot_pix
 
-def drawLineForward(hits, width, bp_per_pix, origin, beg1, beg2, size):
+def drawLineForward(hits, width, bp_per_pix, beg1, beg2, size):
     while True:
         q1, r1 = divmod(beg1, bp_per_pix)
         q2, r2 = divmod(beg2, bp_per_pix)
-        hits[origin + q2 * width + q1] |= 1
+        hits[q2 * width + q1] |= 1
         next_pix = min(bp_per_pix - r1, bp_per_pix - r2)
         if next_pix >= size: break
         beg1 += next_pix
         beg2 += next_pix
         size -= next_pix
 
-def drawLineReverse(hits, width, bp_per_pix, origin, beg1, beg2, size):
+def drawLineReverse(hits, width, bp_per_pix, beg1, beg2, size):
     beg2 = -1 - beg2
     while True:
         q1, r1 = divmod(beg1, bp_per_pix)
         q2, r2 = divmod(beg2, bp_per_pix)
-        hits[origin + q2 * width + q1] |= 2
+        hits[q2 * width + q1] |= 2
         next_pix = min(bp_per_pix - r1, r2 + 1)
         if next_pix >= size: break
         beg1 += next_pix
         beg2 -= next_pix
         size -= next_pix
 
-def alignmentPixels(width, height, alignments, bp_per_pix,
-                    seq_start_dic1, seq_start_dic2):
+def alignmentPixels(width, height, alignments, bp_per_pix, origins1, origins2):
     hits = [0] * (width * height)  # the image data
     for seq1, seq2, blocks in alignments:
-        seq_start1 = seq_start_dic1[seq1]
-        seq_start2 = seq_start_dic2[seq2]
-        origin = seq_start2 * width + seq_start1
+        ori1 = origins1[seq1]
+        ori2 = origins2[seq2]
         for beg1, beg2, size in blocks:
             if beg1 < 0:
                 beg1 = -(beg1 + size)
                 beg2 = -(beg2 + size)
             if beg2 >= 0:
-                drawLineForward(hits, width, bp_per_pix, origin,
-                                beg1, beg2, size)
+                drawLineForward(hits, width, bp_per_pix,
+                                beg1 + ori1, beg2 + ori2, size)
             else:
-                drawLineReverse(hits, width, bp_per_pix, origin,
-                                beg1, beg2, size)
+                drawLineReverse(hits, width, bp_per_pix,
+                                beg1 + ori1, beg2 - ori2, size)
     return hits
 
 def expandedSeqDict(seqDict):
     '''Allow lookup by short sequence names, e.g. chr7 as well as hg19.chr7.'''
-    newDict = {}
+    newDict = seqDict.copy()
     for name, x in seqDict.items():
-        base = name.split(".")[-1]
-        newDict[name] = x
-        newDict[base] = x
+        if "." in name:
+            base = name.split(".")[-1]
+            if base in newDict:  # an ambiguous case was found:
+                return seqDict   # so give up completely
+            newDict[base] = x
     return newDict
 
+def readBed(fileName, seqLimits):
+    if not fileName: return
+    for line in myOpen(fileName):
+        w = line.split()
+        if not w: continue
+        seqName = w[0]
+        if seqName not in seqLimits: continue
+        beg = int(w[1])
+        end = int(w[2])
+        layer = 900
+        color = "#ffe4ff"
+        if len(w) > 4:
+            if w[4] != ".":
+                layer = float(w[4])
+            if len(w) > 5:
+                if len(w) > 8 and w[8].count(",") == 2:
+                    color = "rgb(" + w[8] + ")"
+                elif w[5] == "+":
+                    color = "#fff4f4"
+                elif w[5] == "-":
+                    color = "#f4f4ff"
+        yield layer, color, seqName, beg, end
+
+def commaSeparatedInts(text):
+    return map(int, text.rstrip(",").split(","))
+
+def readGenePred(opts, fileName, seqLimits):
+    if not fileName: return
+    for line in myOpen(fileName):
+        fields = line.split()
+        if not fields: continue
+        if fields[2] not in "+-": fields = fields[1:]
+        seqName = fields[1]
+        if seqName not in seqLimits: continue
+        #strand = fields[2]
+        cdsBeg = int(fields[5])
+        cdsEnd = int(fields[6])
+        exonBegs = commaSeparatedInts(fields[8])
+        exonEnds = commaSeparatedInts(fields[9])
+        for beg, end in zip(exonBegs, exonEnds):
+            yield 300, opts.exon_color, seqName, beg, end
+            b = max(beg, cdsBeg)
+            e = min(end, cdsEnd)
+            if b < e: yield 400, opts.cds_color, seqName, b, e
+
+def readRmsk(fileName, seqLimits):
+    if not fileName: return
+    for line in myOpen(fileName):
+        fields = line.split()
+        if len(fields) == 17:  # rmsk.txt
+            seqName = fields[5]
+            if seqName not in seqLimits: continue  # do this ASAP for speed
+            beg = int(fields[6])
+            end = int(fields[7])
+            strand = fields[9]
+            repeatClass = fields[11]
+        elif len(fields) == 15:  # .out
+            seqName = fields[4]
+            if seqName not in seqLimits: continue
+            beg = int(fields[5]) - 1
+            end = int(fields[6])
+            strand = fields[8]
+            repeatClass = fields[10]
+        else:
+            continue
+        if repeatClass in ("Low_complexity", "Simple_repeat"):
+            yield 200, "#ffe4ff", seqName, beg, end
+        elif strand == "+":
+            yield 100, "#fff4f4", seqName, beg, end
+        else:
+            yield 100, "#f4f4ff", seqName, beg, end
+
 def isExtraFirstGapField(fields):
     return fields[4].isdigit()
 
-def readGaps(fileName):
+def readGaps(opts, fileName, seqLimits):
     '''Read locations of unsequenced gaps, from an agp or gap file.'''
     if not fileName: return
-    for line in fileinput.input(fileName):
+    for line in myOpen(fileName):
         w = line.split()
         if not w or w[0][0] == "#": continue
         if isExtraFirstGapField(w): w = w[1:]
         if w[4] not in "NU": continue
         seqName = w[0]
+        if seqName not in seqLimits: continue
         end = int(w[2])
         beg = end - int(w[5])  # zero-based coordinate
-        bridgedText = w[7]
-        yield seqName, beg, end, bridgedText
-
-def drawUnsequencedGaps(im, gaps, start_dic, margin, limit, isTop, bridgedText,
-                        bp_per_pix, color):
-    '''Draw rectangles representing unsequenced gaps.'''
-    for seqName, beg, end, b in gaps:
-        if b != bridgedText: continue
-        if seqName not in start_dic: continue
-        origin = start_dic[seqName]
-        b = div_ceil(beg, bp_per_pix)  # use fully-covered pixels only
-        e = end // bp_per_pix
-        if e <= b: continue
-        if isTop: box = origin + b, margin, origin + e, limit
-        else:     box = margin, origin + b, limit, origin + e
+        if w[7] == "yes":
+            yield 3000, opts.bridged_color, seqName, beg, end
+        else:
+            yield 2000, opts.unbridged_color, seqName, beg, end
+
+def bedBoxes(beds, seqLimits, origins, margin, edge, isTop, bpPerPix):
+    for layer, color, seqName, beg, end in beds:
+        cropBeg, cropEnd = seqLimits[seqName]
+        beg = max(beg, cropBeg)
+        end = min(end, cropEnd)
+        if beg >= end: continue
+        ori = origins[seqName]
+        if layer <= 1000:
+            # include partly-covered pixels
+            b = (ori + beg) // bpPerPix
+            e = div_ceil(ori + end, bpPerPix)
+        else:
+            # exclude partly-covered pixels
+            b = div_ceil(ori + beg, bpPerPix)
+            e = (ori + end) // bpPerPix
+            if e <= b: continue
+        if isTop:
+            box = b, margin, e, edge
+        else:
+            box = margin, b, edge, e
+        yield layer, color, box
+
+def drawAnnotations(im, boxes):
+    # xxx use partial transparency for different-color overlaps?
+    for layer, color, box in boxes:
         im.paste(color, box)
 
 def make_label(text, text_size, range_start, range_size):
@@ -260,25 +425,28 @@ def get_nonoverlapping_labels(labels, label_space):
             nonoverlapping_labels.append(i)
     return nonoverlapping_labels
 
-def get_axis_image(seq_names, name_sizes, seq_starts, seq_pix,
+def get_axis_image(seqNames, name_sizes, seq_starts, seq_pix,
                    font, image_mode, opts):
     '''Make an image of axis labels.'''
     min_pos = seq_starts[0]
     max_pos = seq_starts[-1] + seq_pix[-1]
     height = max(zip(*name_sizes)[1])
-    labels = [make_label(i, j, k, l) for i, j, k, l in
-              zip(seq_names, name_sizes, seq_starts, seq_pix)]
+    labels = map(make_label, seqNames, name_sizes, seq_starts, seq_pix)
     labels = [i for i in labels if i[1] >= min_pos and i[2] <= max_pos]
     labels.sort()
     labels = get_nonoverlapping_labels(labels, opts.label_space)
     image_size = max_pos, height
-    im = Image.new(image_mode, image_size, opts.border_shade)
+    im = Image.new(image_mode, image_size, opts.border_color)
     draw = ImageDraw.Draw(im)
     for i in labels:
         position = i[1], 0
         draw.text(position, i[3], font=font, fill=opts.text_color)
     return im
 
+def seqOrigins(seqNames, seq_starts, seqLimits, bp_per_pix):
+    for i, j in zip(seqNames, seq_starts):
+        yield i, bp_per_pix * j - seqLimits[i][0]
+
 def lastDotplot(opts, args):
     if opts.fontfile:  font = ImageFont.truetype(opts.fontfile, opts.fontsize)
     else:              font = ImageFont.load_default()
@@ -290,52 +458,64 @@ def lastDotplot(opts, args):
     overlap_color = tuple([(i + j) // 2 for i, j in zipped_colors])
 
     warn("reading alignments...")
-    alignments, seq_size_dic1, seq_size_dic2 = readAlignments(args[0], opts)
+    alignmentInfo = readAlignments(args[0], opts)
+    alignments, seqNames1, seqNames2, seqLimits1, seqLimits2 = alignmentInfo
     warn("done")
-
     if not alignments: raise Exception("there are no alignments")
 
-    seq_info1 = get_seq_info(seq_size_dic1, font, opts.fontsize, image_mode)
-    seq_info2 = get_seq_info(seq_size_dic2, font, opts.fontsize, image_mode)
-    seq_names1, seq_sizes1, name_sizes1, margin1 = seq_info1
-    seq_names2, seq_sizes2, name_sizes2, margin2 = seq_info2
+    i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
+                    font, opts.fontsize, image_mode, opts.lengths1)
+    seqNames1, seqSizes1, seqLabels1, labelSizes1, margin1 = i1
+
+    i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
+                    font, opts.fontsize, image_mode, opts.lengths2)
+    seqNames2, seqSizes2, seqLabels2, labelSizes2, margin2 = i2
 
     warn("choosing bp per pixel...")
     pix_limit1 = opts.width  - margin1
     pix_limit2 = opts.height - margin2
-    bp_per_pix1 = get_bp_per_pix(seq_sizes1, opts.pix_tween_seqs, pix_limit1)
-    bp_per_pix2 = get_bp_per_pix(seq_sizes2, opts.pix_tween_seqs, pix_limit2)
-    bp_per_pix = max(bp_per_pix1, bp_per_pix2)
-    warn("bp per pixel = " + str(bp_per_pix))
-
-    seq_pix1, seq_starts1, width  = get_pix_info(seq_sizes1, bp_per_pix,
-                                                 opts.pix_tween_seqs, margin1)
-    seq_pix2, seq_starts2, height = get_pix_info(seq_sizes2, bp_per_pix,
-                                                 opts.pix_tween_seqs, margin2)
-    seq_start_dic1 = dict(zip(seq_names1, seq_starts1))
-    seq_start_dic2 = dict(zip(seq_names2, seq_starts2))
+    bpPerPix1 = get_bp_per_pix(seqSizes1, opts.border_pixels, pix_limit1)
+    bpPerPix2 = get_bp_per_pix(seqSizes2, opts.border_pixels, pix_limit2)
+    bpPerPix = max(bpPerPix1, bpPerPix2)
+    warn("bp per pixel = " + str(bpPerPix))
+
+    seq_pix1, seq_starts1, width  = get_pix_info(seqSizes1, bpPerPix,
+                                                 opts.border_pixels, margin1)
+    seq_pix2, seq_starts2, height = get_pix_info(seqSizes2, bpPerPix,
+                                                 opts.border_pixels, margin2)
+    warn("width:  " + str(width))
+    warn("height: " + str(height))
+
+    origins1 = dict(seqOrigins(seqNames1, seq_starts1, seqLimits1, bpPerPix))
+    origins2 = dict(seqOrigins(seqNames2, seq_starts2, seqLimits2, bpPerPix))
 
     warn("processing alignments...")
-    hits = alignmentPixels(width, height, alignments, bp_per_pix,
-                           seq_start_dic1, seq_start_dic2)
+    hits = alignmentPixels(width, height, alignments, bpPerPix,
+                           origins1, origins2)
     warn("done")
 
     image_size = width, height
     im = Image.new(image_mode, image_size, opts.background_color)
 
-    start_dic1 = expandedSeqDict(seq_start_dic1)
-    start_dic2 = expandedSeqDict(seq_start_dic2)
-    gaps1 = list(readGaps(opts.gap1))
-    gaps2 = list(readGaps(opts.gap2))
-    # draw bridged gaps first, then unbridged gaps on top:
-    drawUnsequencedGaps(im, gaps1, start_dic1, margin2, height, True, "yes",
-                        bp_per_pix, opts.bridged_color)
-    drawUnsequencedGaps(im, gaps2, start_dic2, margin1, width, False, "yes",
-                        bp_per_pix, opts.bridged_color)
-    drawUnsequencedGaps(im, gaps1, start_dic1, margin2, height, True, "no",
-                        bp_per_pix, opts.unbridged_color)
-    drawUnsequencedGaps(im, gaps2, start_dic2, margin1, width, False, "no",
-                        bp_per_pix, opts.unbridged_color)
+    seqLimits1 = expandedSeqDict(seqLimits1)
+    seqLimits2 = expandedSeqDict(seqLimits2)
+    origins1 = expandedSeqDict(origins1)
+    origins2 = expandedSeqDict(origins2)
+
+    beds1 = itertools.chain(readBed(opts.bed1, seqLimits1),
+                            readRmsk(opts.rmsk1, seqLimits1),
+                            readGenePred(opts, opts.genePred1, seqLimits1),
+                            readGaps(opts, opts.gap1, seqLimits1))
+    b1 = bedBoxes(beds1, seqLimits1, origins1, margin2, height, True, bpPerPix)
+
+    beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
+                            readRmsk(opts.rmsk2, seqLimits2),
+                            readGenePred(opts, opts.genePred2, seqLimits2),
+                            readGaps(opts, opts.gap2, seqLimits2))
+    b2 = bedBoxes(beds2, seqLimits2, origins2, margin1, width, False, bpPerPix)
+
+    boxes = sorted(itertools.chain(b1, b2))
+    drawAnnotations(im, boxes)
 
     for i in range(height):
         for j in range(width):
@@ -346,21 +526,21 @@ def lastDotplot(opts, args):
             elif store_value == 3: im.putpixel(xy, overlap_color)
 
     if opts.fontsize != 0:
-        axis1 = get_axis_image(seq_names1, name_sizes1, seq_starts1, seq_pix1,
+        axis1 = get_axis_image(seqLabels1, labelSizes1, seq_starts1, seq_pix1,
                                font, image_mode, opts)
-        axis2 = get_axis_image(seq_names2, name_sizes2, seq_starts2, seq_pix2,
+        axis2 = get_axis_image(seqLabels2, labelSizes2, seq_starts2, seq_pix2,
                                font, image_mode, opts)
-        axis2 = axis2.rotate(270, expand=1)
+        axis2 = axis2.transpose(Image.ROTATE_270)  # !!! bug hotspot
         im.paste(axis1, (0, 0))
         im.paste(axis2, (0, 0))
 
     for i in seq_starts1[1:]:
-        box = i - opts.pix_tween_seqs, margin2, i, height
-        im.paste(opts.border_shade, box)
+        box = i - opts.border_pixels, margin2, i, height
+        im.paste(opts.border_color, box)
 
     for i in seq_starts2[1:]:
-        box = margin1, i - opts.pix_tween_seqs, width, i
-        im.paste(opts.border_shade, box)
+        box = margin1, i - opts.border_pixels, width, i
+        im.paste(opts.border_color, box)
 
     im.save(args[1])
 
@@ -371,23 +551,72 @@ if __name__ == "__main__":
    or: ..."""
     description = "Draw a dotplot of pair-wise sequence alignments in MAF or tabular format."
     op = optparse.OptionParser(usage=usage, description=description)
+    op.add_option("-v", "--verbose", action="count",
+                  help="show progress messages & data about the plot")
     op.add_option("-1", "--seq1", metavar="PATTERN", action="append",
+                  default=[],
                   help="which sequences to show from the 1st genome")
     op.add_option("-2", "--seq2", metavar="PATTERN", action="append",
+                  default=[],
                   help="which sequences to show from the 2nd genome")
     # Replace "width" & "height" with a single "length" option?
     op.add_option("-x", "--width", type="int", default=1000,
                   help="maximum width in pixels (default: %default)")
     op.add_option("-y", "--height", type="int", default=1000,
                   help="maximum height in pixels (default: %default)")
-    op.add_option("-f", "--fontfile", metavar="FILE",
-                  help="TrueType or OpenType font file")
-    op.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=11,
-                  help="TrueType or OpenType font size (default: %default)")
     op.add_option("-c", "--forwardcolor", metavar="COLOR", default="red",
                   help="color for forward alignments (default: %default)")
     op.add_option("-r", "--reversecolor", metavar="COLOR", default="blue",
                   help="color for reverse alignments (default: %default)")
+    op.add_option("--sort1", type="int", default=1, metavar="N",
+                  help="genome1 sequence order: 0=input order, 1=name order, "
+                  "2=length order (default=%default)")
+    op.add_option("--sort2", type="int", default=1, metavar="N",
+                  help="genome2 sequence order: 0=input order, 1=name order, "
+                  "2=length order (default=%default)")
+    op.add_option("--trim1", action="store_true",
+                  help="trim unaligned sequence flanks from the 1st genome")
+    op.add_option("--trim2", action="store_true",
+                  help="trim unaligned sequence flanks from the 2nd genome")
+    op.add_option("--border-pixels", metavar="INT", type="int", default=1,
+                  help="number of pixels between sequences (default=%default)")
+    op.add_option("--border-color", metavar="COLOR", default="#dcdcdc",
+                  help="color for pixels between sequences (default=%default)")
+    # xxx --margin-color?
+
+    og = optparse.OptionGroup(op, "Text options")
+    og.add_option("-f", "--fontfile", metavar="FILE",
+                  help="TrueType or OpenType font file")
+    og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=11,
+                  help="TrueType or OpenType font size (default: %default)")
+    og.add_option("--lengths1", action="store_true",
+                  help="show sequence lengths for the 1st (horizontal) genome")
+    og.add_option("--lengths2", action="store_true",
+                  help="show sequence lengths for the 2nd (vertical) genome")
+    op.add_option_group(og)
+
+    og = optparse.OptionGroup(op, "Annotation options")
+    og.add_option("--bed1", metavar="FILE",
+                  help="read genome1 annotations from BED file")
+    og.add_option("--bed2", metavar="FILE",
+                  help="read genome2 annotations from BED file")
+    og.add_option("--rmsk1", metavar="FILE", help="read genome1 repeats from "
+                  "RepeatMasker .out or rmsk.txt file")
+    og.add_option("--rmsk2", metavar="FILE", help="read genome2 repeats from "
+                  "RepeatMasker .out or rmsk.txt file")
+    op.add_option_group(og)
+
+    og = optparse.OptionGroup(op, "Gene options")
+    og.add_option("--genePred1", metavar="FILE",
+                  help="read genome1 genes from genePred file")
+    og.add_option("--genePred2", metavar="FILE",
+                  help="read genome2 genes from genePred file")
+    og.add_option("--exon-color", metavar="COLOR", default="#dfd",
+                  help="color for exons (default=%default)")
+    og.add_option("--cds-color", metavar="COLOR", default="#bdb",
+                  help="color for protein-coding regions (default=%default)")
+    op.add_option_group(og)
+
     og = optparse.OptionGroup(op, "Unsequenced gap options")
     og.add_option("--gap1", metavar="FILE",
                   help="read genome1 unsequenced gaps from agp or gap file")
@@ -403,8 +632,6 @@ if __name__ == "__main__":
 
     opts.text_color = "black"
     opts.background_color = "white"
-    opts.pix_tween_seqs = 2  # number of border pixels between sequences
-    opts.border_shade = 239, 239, 239  # the shade of grey for border pixels
     opts.label_space = 5     # minimum number of pixels between axis labels
 
     try: lastDotplot(opts, args)
diff --git a/scripts/last-postmask b/scripts/last-postmask
index 6e7b9d4..669be10 100755
--- a/scripts/last-postmask
+++ b/scripts/last-postmask
@@ -56,12 +56,12 @@ def printIfGood(maf, seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
             print line,
         print
 
-def doOneFile(file):
+def doOneFile(lines):
     scoreMatrix = []
     maf = []
     seqs = []
 
-    for line in file:
+    for line in lines:
         if line[0] == "#":
             print line,
             w = line.split()
@@ -96,8 +96,8 @@ def lastPostmask(args):
             if i == "-":
                 doOneFile(sys.stdin)
             else:
-                with open(i) as file:
-                    doOneFile(file)
+                with open(i) as f:
+                    doOneFile(f)
     else:
         doOneFile(sys.stdin)
 
diff --git a/scripts/last-train b/scripts/last-train
index de39072..7fe5947 100755
--- a/scripts/last-train
+++ b/scripts/last-train
@@ -3,14 +3,33 @@
 
 import math, optparse, os, random, signal, subprocess, sys, tempfile
 
+def myOpen(fileName):  # faster than fileinput
+    if fileName == "-":
+        return sys.stdin
+    return open(fileName)
+
+def randomSample(things, sampleSize):
+    """Randomly get sampleSize things (or all if fewer)."""
+    reservoir = []  # "reservoir sampling" algorithm
+    for i, x in enumerate(things):
+        if i < sampleSize:
+            reservoir.append(x)
+        else:
+            r = random.randrange(i + 1)
+            if r < sampleSize:
+                reservoir[r] = x
+    return reservoir
+
 def writeWords(outFile, words):
     outFile.write(" ".join(words) + "\n")
 
 def seqInput(fileNames):
+    if not fileNames:
+        fileNames = ["-"]
     for name in fileNames:
-        with open(name) as file:
+        with myOpen(name) as f:
             seqType = 0
-            for line in file:
+            for line in f:
                 if seqType == 0:
                     if line[0] == ">":
                         seqType = 1
@@ -32,40 +51,66 @@ def seqInput(fileNames):
                     lineType = (lineType + 1) % 4
             if seqType == 1: yield "".join(seq), ""
 
-def randomNumbersAndLengths(lengths, sampleSize):
-    numbersAndLengths = list(enumerate(lengths))
-    random.shuffle(numbersAndLengths)
-    for i, x in numbersAndLengths:
-        if x < sampleSize:
-            yield i, x
-            sampleSize -= x
-        else:
-            yield i, sampleSize
-            break
-
-def writeSeqSample(numbersAndLengths, seqInput, outfile):
-    j = 0
-    for i, x in enumerate(seqInput):
-        if j < len(numbersAndLengths) and numbersAndLengths[j][0] == i:
-            seqlen = numbersAndLengths[j][1]
-            seq, qual = x
-            if qual:
-                outfile.write("@" + str(i) + "\n")
-                outfile.write(seq[:seqlen])
-                outfile.write("\n+\n")
-                outfile.write(qual[:seqlen])
-            else:
-                outfile.write(">" + str(i) + "\n")
-                outfile.write(seq[:seqlen])
-            outfile.write("\n")
-            j += 1
+def isGoodChunk(chunk):
+    for i in chunk:
+        for j in i[3]:
+            if j not in "Nn":
+                return True
+    return False
+
+def chunkInput(opts, sequences):
+    chunkCount = 0
+    chunk = []
+    wantedLength = opts.sample_length
+    for i, x in enumerate(sequences):
+        seq, qual = x
+        if all(i in "Nn" for i in seq): continue
+        seqLength = len(seq)
+        beg = 0
+        while beg < seqLength:
+            length = min(wantedLength, seqLength - beg)
+            end = beg + length
+            segment = i, beg, end, seq[beg:end], qual[beg:end]
+            chunk.append(segment)
+            wantedLength -= length
+            if not wantedLength:
+                if isGoodChunk(chunk):
+                    yield chunk
+                    chunkCount += 1
+                chunk = []
+                wantedLength = opts.sample_length
+            beg = end
+    if chunk and chunkCount < opts.sample_number:
+        yield chunk
+
+def writeSegment(outfile, segment):
+    if not segment: return
+    i, beg, end, seq, qual = segment
+    name = str(i) + ":" + str(beg)
+    if qual:
+        outfile.write("@" + name + "\n")
+        outfile.write(seq)
+        outfile.write("\n+\n")
+        outfile.write(qual)
+    else:
+        outfile.write(">" + name + "\n")
+        outfile.write(seq)
+    outfile.write("\n")
 
 def getSeqSample(opts, queryFiles, outfile):
-    lengths = (len(i[0]) for i in seqInput(queryFiles))
-    sampleSize = int(round(opts.sample))
-    numbersAndLengths = sorted(randomNumbersAndLengths(lengths, sampleSize))
-    writeSeqSample(numbersAndLengths, seqInput(queryFiles), outfile)
-    print "# sampled sequences:", len(numbersAndLengths)
+    sequences = seqInput(queryFiles)
+    chunks = chunkInput(opts, sequences)
+    sample = randomSample(chunks, opts.sample_number)
+    sample.sort()
+    x = None
+    for chunk in sample:
+        for y in chunk:
+            if x and y[0] == x[0] and y[1] == x[2]:
+                x = x[0], x[1], y[2], x[3] + y[3], x[4] + y[4]
+            else:
+                writeSegment(outfile, x)
+                x = y
+    writeSegment(outfile, x)
 
 def scaleFromHeader(lines):
     for line in lines:
@@ -290,6 +335,7 @@ def fixedLastalArgs(opts):
     if opts.E: x.append("-E" + opts.E)
     if opts.s: x.append("-s" + opts.s)
     if opts.S: x.append("-S" + opts.S)
+    if opts.C: x.append("-C" + opts.C)
     if opts.T: x.append("-T" + opts.T)
     if opts.m: x.append("-m" + opts.m)
     if opts.P: x.append("-P" + opts.P)
@@ -365,7 +411,7 @@ def doTraining(opts, args):
     writeMatrixWithLetters(sys.stdout, externalMatrix, "")
 
 def lastTrain(opts, args):
-    if opts.sample:
+    if opts.sample_number:
         random.seed(math.pi)
         refName = args[0]
         queryFiles = args[1:]
@@ -392,8 +438,10 @@ if __name__ == "__main__":
                   help="force insertion/deletion symmetry")
     og.add_option("--pid", type="float", default=100, help=
                   "skip alignments with > PID% identity (default: %default)")
-    og.add_option("--sample", type="float", default=1000000, metavar="N",
-                  help="use a random sample of N letters (default: %default)")
+    og.add_option("--sample-number", type="int", default=500, metavar="N",
+                  help="number of random sequence samples (default: %default)")
+    og.add_option("--sample-length", type="int", default=2000, metavar="L",
+                  help="length of each sample (default: %default)")
     op.add_option_group(og)
     og = optparse.OptionGroup(op, "Initial parameter options")
     og.add_option("-r", metavar="SCORE",
@@ -418,6 +466,8 @@ if __name__ == "__main__":
     og.add_option("-S", metavar="NUMBER", default="1", help=
                   "score matrix applies to forward strand of: " +
                   "0=reference, 1=query (default: %default)")
+    og.add_option("-C", metavar="COUNT", help=
+                  "omit gapless alignments in COUNT others with > score-per-length")
     og.add_option("-T", metavar="NUMBER",
                   help="type of alignment: 0=local, 1=overlap (default: 0)")
     og.add_option("-m", metavar="COUNT", help=
@@ -428,7 +478,10 @@ if __name__ == "__main__":
                   help="input format: 0=fasta, 1=fastq-sanger")
     op.add_option_group(og)
     (opts, args) = op.parse_args()
-    if len(args) < 2: op.error("I need a lastdb index and query sequences")
+    if len(args) < 1:
+        op.error("I need a lastdb index and query sequences")
+    if not opts.sample_number and (len(args) < 2 or "-" in args):
+        op.error("sorry, can't use stdin when --sample-number=0")
     if not opts.p and (not opts.Q or opts.Q == "0"):
         if not opts.r: opts.r = "5"
         if not opts.q: opts.q = "5"
diff --git a/scripts/maf-convert b/scripts/maf-convert
index 7b1d92c..850f3b1 100755
--- a/scripts/maf-convert
+++ b/scripts/maf-convert
@@ -25,9 +25,6 @@ def isMatch(alignmentColumn):
         if i.upper() != first: return False
     return True
 
-def isGapless(alignmentColumn):
-    return "-" not in alignmentColumn
-
 def gapRunCount(row):
     """Get the number of runs of gap characters."""
     return sum(k == "-" for k, v in groupby(row))
@@ -44,17 +41,27 @@ def insertSize(row, letterSize):
     """Get the length of sequence included in the row."""
     return (len(row) - row.count("-")) * letterSize - 4 * row.count("/") - 2 * row.count("\\")
 
-def insertSizeText(row, letterSize):
-    return str(insertSize(row, letterSize))
-
 def matchAndInsertSizes(alignmentColumns, letterSizes):
     """Get sizes of gapless blocks, and of the inserts between them."""
-    for k, v in groupby(alignmentColumns, isGapless):
-        if k:
-            yield str(sum(1 for i in v))
+    letterSizeA, letterSizeB = letterSizes
+    delSize = insSize = subSize = 0
+    for x, y in alignmentColumns:
+        if x == "-":
+            if subSize:
+                if delSize or insSize: yield str(delSize) + ":" + str(insSize)
+                yield str(subSize)
+                delSize = insSize = subSize = 0
+            insSize += symbolSize(y, letterSizeB)
+        elif y == "-":
+            if subSize:
+                if delSize or insSize: yield str(delSize) + ":" + str(insSize)
+                yield str(subSize)
+                delSize = insSize = subSize = 0
+            delSize += symbolSize(x, letterSizeA)
         else:
-            yield ":".join(imap(insertSizeText,
-                                alignmentRowsFromColumns(v), letterSizes))
+            subSize += 1
+    if delSize or insSize: yield str(delSize) + ":" + str(insSize)
+    if subSize: yield str(subSize)
 
 ##### Routines for reading MAF format: #####
 
@@ -265,12 +272,10 @@ def pslCommaString(things):
     # UCSC software seems to prefer a trailing comma
     return ",".join(map(str, things)) + ","
 
-def pslEnds(headMafFields, tailMafFields):
-    seqName, seqLen, strand, letterSize, beg = headMafFields[0:5]
-    end = tailMafFields[5]
+def pslEnds(seqLen, strand, beg, end):
     if strand == "-":
-        beg, end = seqLen - end, seqLen - beg
-    return seqName, seqLen, strand, letterSize, beg, end
+        return seqLen - end, seqLen - beg
+    return beg, end
 
 def writePsl(opts, mafs):
     matchCounts = [0] * 4
@@ -278,13 +283,19 @@ def writePsl(opts, mafs):
     matches, mismatches, repMatches, nCount = matchCounts
     numGaplessColumns = sum(matchCounts)
 
-    headA, headB = mafs[0][1]
-    tailA, tailB = mafs[-1][1]
+    if not blocks:
+        return
+
+    fieldsA, fieldsB = mafs[0][1]
+    headSize, headBegA, headBegB = blocks[0]
+    tailSize, tailBegA, tailBegB = blocks[-1]
 
-    seqNameA, seqLenA, strandA, letterSizeA, begA, endA = pslEnds(headA, tailA)
+    seqNameA, seqLenA, strandA, letterSizeA = fieldsA[0:4]
+    begA, endA = pslEnds(seqLenA, strandA, headBegA, tailBegA + tailSize)
     baseInsertA = endA - begA - numGaplessColumns * letterSizeA
 
-    seqNameB, seqLenB, strandB, letterSizeB, begB, endB = pslEnds(headB, tailB)
+    seqNameB, seqLenB, strandB, letterSizeB = fieldsB[0:4]
+    begB, endB = pslEnds(seqLenB, strandB, headBegB, tailBegB + tailSize)
     baseInsertB = endB - begB - numGaplessColumns * letterSizeB
 
     numInsertA, numInsertB = pslNumInserts(blocks, letterSizeA, letterSizeB)
@@ -828,7 +839,6 @@ if __name__ == "__main__":
         op.error("need file (not pipe) with option -d")
 
     try: mafConvert(opts, args)
-    except KeyboardInterrupt: pass  # avoid silly error message
     except Exception, e:
         prog = os.path.basename(sys.argv[0])
         sys.exit(prog + ": error: " + str(e))
diff --git a/src/Alignment.cc b/src/Alignment.cc
index ec2db83..0ef6c6c 100644
--- a/src/Alignment.cc
+++ b/src/Alignment.cc
@@ -262,6 +262,29 @@ bool Alignment::hasGoodSegment(const uchar *seq1, const uchar *seq2,
   return false;
 }
 
+static void getColumnAmbiguities(const Centroid& centroid,
+				 std::vector<uchar>& ambiguityCodes,
+				 const std::vector<SegmentPair>& chunks,
+				 bool isForward) {
+  for (size_t i = 0; i < chunks.size(); ++i) {
+    const SegmentPair& x = chunks[i];
+    centroid.getMatchAmbiguities(ambiguityCodes, x.end1(), x.end2(), x.size);
+    size_t j = i + 1;
+    bool isNext = (j < chunks.size());
+    size_t end1 = isNext ? chunks[j].end1() : 0;
+    size_t end2 = isNext ? chunks[j].end2() : 0;
+    // ASSUMPTION: if there is an insertion adjacent to a deletion,
+    // the deletion will get printed first.
+    if (isForward) {
+      centroid.getInsertAmbiguities(ambiguityCodes, x.beg2(), end2);
+      centroid.getDeleteAmbiguities(ambiguityCodes, x.beg1(), end1);
+    } else {
+      centroid.getDeleteAmbiguities(ambiguityCodes, x.beg1(), end1);
+      centroid.getInsertAmbiguities(ambiguityCodes, x.beg2(), end2);
+    }
+  }
+}
+
 void Alignment::extend( std::vector< SegmentPair >& chunks,
 			std::vector< uchar >& ambiguityCodes,
 			Centroid& centroid,
@@ -361,7 +384,7 @@ void Alignment::extend( std::vector< SegmentPair >& chunks,
       centroid.traceback( chunks, gamma );
     }
 
-    centroid.getColumnAmbiguities( ambiguityCodes, chunks, isForward );
+    getColumnAmbiguities( centroid, ambiguityCodes, chunks, isForward );
     extras.fullScore += centroid.logPartitionFunction();
 
     if( outputType == 7 ){
diff --git a/src/AlignmentWrite.cc b/src/AlignmentWrite.cc
index bfbd53e..51b7a32 100644
--- a/src/AlignmentWrite.cc
+++ b/src/AlignmentWrite.cc
@@ -457,12 +457,13 @@ AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
   }
   IntText s1(seqLen1);
   IntText s2(seqLen2);
+  IntText sc(score);
 
   size_t s =
     n2.size() + n1.size() + mp.size() + as.size() + mm.size() + go.size() +
     b2.size() + e2.size() + b1.size() + e1.size() + 10;
   if (evaluer.isGood()) s += ev.size() + bs.size() + 2;
-  if (isExtraColumns)   s += s1.size() + s2.size() + 2;
+  if (isExtraColumns)   s += s1.size() + s2.size() + sc.size() + 3;
 
   char *text = new char[s + 1];
   Writer w(text);
@@ -470,7 +471,7 @@ AlignmentText Alignment::writeBlastTab(const MultiSequence& seq1,
   w << n2 << t << n1 << t << mp << t << as << t << mm << t << go << t
     << b2 << t << e2 << t << b1 << t << e1;
   if (evaluer.isGood()) w << t << ev << t << bs;
-  if (isExtraColumns)   w << t << s2 << t << s1;
+  if (isExtraColumns)   w << t << s2 << t << s1 << t << sc;
   w << '\n' << '\0';
 
   return AlignmentText(seqNum2, alnBeg2, alnEnd2, strand, score,
diff --git a/src/Centroid.cc b/src/Centroid.cc
index 42c5286..36edd35 100644
--- a/src/Centroid.cc
+++ b/src/Centroid.cc
@@ -696,45 +696,27 @@ namespace cbrc{
     return static_cast<uchar>(k);
   }
 
-  static void getGapAmbiguities( std::vector<uchar>& ambiguityCodes,
-                                    const std::vector<double>& probs,
-                                    size_t rbeg, size_t rend ){
-    for( size_t i = rbeg; i > rend; --i ){
-      ambiguityCodes.push_back( asciiProbability( probs[ i ] ) );
+  void Centroid::getMatchAmbiguities(std::vector<uchar>& ambiguityCodes,
+				     size_t seq1end, size_t seq2end,
+				     size_t length) const {
+    while (length) {
+      size_t d = xa.diag(seq1end + seq2end, seq1end);
+      double p = fM[d] * bM[d];
+      ambiguityCodes.push_back(asciiProbability(p));
+      --seq1end;  --seq2end;  --length;
     }
   }
 
-  // Added by MCF:
-  void Centroid::getColumnAmbiguities( std::vector<uchar>& ambiguityCodes,
-                                       const std::vector<SegmentPair>& chunks,
-                                       bool isForward ){
-    for( CI(SegmentPair) i = chunks.begin(); i < chunks.end(); ++i ){
-      size_t seq1pos = i->end1();
-      size_t seq2pos = i->end2();
-
-      for( size_t j = 0; j < i->size; ++j ){
-	size_t d = xa.diag( seq1pos + seq2pos, seq1pos );
-	double p = fM[d] * bM[d];
-	ambiguityCodes.push_back( asciiProbability(p) );
-	--seq1pos;
-	--seq2pos;
-      }
-
-      CI(SegmentPair) j = i + 1;
-      size_t end1 = (j < chunks.end()) ? j->end1() : 0;
-      size_t end2 = (j < chunks.end()) ? j->end2() : 0;
+  void Centroid::getDeleteAmbiguities(std::vector<uchar>& ambiguityCodes,
+				      size_t seq1end, size_t seq1beg) const {
+    for (size_t i = seq1end; i > seq1beg; --i)
+      ambiguityCodes.push_back(asciiProbability(mD[i]));
+  }
 
-      // ASSUMPTION: if there is an insertion adjacent to a deletion,
-      // the deletion will get printed first.
-      if( isForward ){
-        getGapAmbiguities( ambiguityCodes, mI, seq2pos, end2 );
-        getGapAmbiguities( ambiguityCodes, mD, seq1pos, end1 );
-      }
-      else{
-        getGapAmbiguities( ambiguityCodes, mD, seq1pos, end1 );
-        getGapAmbiguities( ambiguityCodes, mI, seq2pos, end2 );
-      }
-    }
+  void Centroid::getInsertAmbiguities(std::vector<uchar>& ambiguityCodes,
+				      size_t seq2end, size_t seq2beg) const {
+    for (size_t i = seq2end; i > seq2beg; --i)
+      ambiguityCodes.push_back(asciiProbability(mI[i]));
   }
 
   double Centroid::logPartitionFunction() const{
diff --git a/src/Centroid.hh b/src/Centroid.hh
index 516bbb3..9c3b8f5 100644
--- a/src/Centroid.hh
+++ b/src/Centroid.hh
@@ -64,10 +64,15 @@ namespace cbrc{
     double dp_ama( double gamma );
     void traceback_ama( std::vector< SegmentPair >& chunks, double gamma ) const;
 
-    // Added by MCF: get the probability of each column in the alignment:
-    void getColumnAmbiguities( std::vector< uchar >& ambiguityCodes,
-                               const std::vector< SegmentPair >& chunks,
-                               bool isForward );
+    void getMatchAmbiguities(std::vector<uchar>& ambiguityCodes,
+			     size_t seq1end, size_t seq2end,
+			     size_t length) const;
+
+    void getDeleteAmbiguities(std::vector<uchar>& ambiguityCodes,
+			      size_t seq1end, size_t seq1beg) const;
+
+    void getInsertAmbiguities(std::vector<uchar>& ambiguityCodes,
+			      size_t seq2end, size_t seq2beg) const;
 
     double logPartitionFunction() const;  // a.k.a. full score, forward score
 
diff --git a/src/LastEvaluer.cc b/src/LastEvaluer.cc
index 2c7ac4c..b1951ca 100644
--- a/src/LastEvaluer.cc
+++ b/src/LastEvaluer.cc
@@ -357,10 +357,19 @@ void LastEvaluer::init(const char *matrixName,
 
     if (isGapped) {
       evaluer.set_gapped_computation_parameters_simplified(maxSeconds);
-      evaluer.initGapped(alphabetSize, &matrix[0], letterFreqs2, letterFreqs1,
-			 delOpen, delEpen, insOpen, insEpen,
-			 true, lambdaTolerance, kTolerance,
-			 0, maxMegabytes, randomSeed);
+      for (int i = 0; ; ++i) {
+	double t = Sls::default_importance_sampling_temperature + 0.01 * i;
+	try {
+	  evaluer.initGapped(alphabetSize, &matrix[0],
+			     letterFreqs2, letterFreqs1,
+			     delOpen, delEpen, insOpen, insEpen,
+			     true, lambdaTolerance, kTolerance,
+			     0, maxMegabytes, randomSeed, t);
+	  break;
+	} catch (const Sls::error& e) {
+	  if (i == 10) throw;
+	}
+      }
     } else {
       evaluer.initGapless(alphabetSize, &matrix[0],
 			  letterFreqs2, letterFreqs1);
diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc
index 709ce9d..8d389ff 100644
--- a/src/LastalArguments.cc
+++ b/src/LastalArguments.cc
@@ -2,7 +2,7 @@
 
 #include "LastalArguments.hh"
 #include "stringify.hh"
-#include <unistd.h>  // getopt
+#include "getoptUtil.hh"
 #include <algorithm>  // max
 #include <iostream>
 #include <sstream>
@@ -74,6 +74,7 @@ LastalArguments::LastalArguments() :
   maxHitDepth(-1),
   oneHitMultiplicity(10),
   maxGaplessAlignmentsPerQueryPosition(0),  // depends on oneHitMultiplicity
+  maxAlignmentsPerQueryStrand(-1),
   cullingLimitForGaplessAlignments(0),
   cullingLimitForFinalAlignments(0),
   queryStep(1),
@@ -144,6 +145,7 @@ Miscellaneous options (default settings):\n\
 -T: type of alignment: 0=local, 1=overlap ("
     + stringify(globality) + ")\n\
 -n: maximum gapless alignments per query position (infinity if m=0, else m)\n\
+-N: stop after the first N alignments per query strand\n\
 -R: repeat-marking options (the same as was used for lastdb)\n\
 -u: mask lowercase during extensions: 0=never, 1=gapless,\n\
     2=gapless+postmask, 3=always (2 if lastdb -c and Q<5, else 0)\n\
@@ -165,10 +167,9 @@ Report bugs to: last-align (ATmark) googlegroups (dot) com\n\
 LAST home page: http://last.cbrc.jp/\n\
 ";
 
-  optind = 1;  // allows us to scan arguments more than once(???)
   int c;
   const char optionString[] = "hVvf:" "r:q:p:a:b:A:B:c:F:x:y:z:d:e:" "D:E:"
-    "s:S:MT:m:l:L:n:C:K:k:W:i:P:R:u:w:t:g:G:j:Q:";
+    "s:S:MT:m:l:L:n:N:C:K:k:W:i:P:R:u:w:t:g:G:j:Q:";
   while( (c = myGetopt(argc, argv, optionString)) != -1 ){
     switch(c){
     case 'h':
@@ -285,6 +286,9 @@ LAST home page: http://last.cbrc.jp/\n\
       unstringify( maxGaplessAlignmentsPerQueryPosition, optarg );
       if( maxGaplessAlignmentsPerQueryPosition <= 0 ) badopt( c, optarg );
       break;
+    case 'N':
+      unstringify( maxAlignmentsPerQueryStrand, optarg );
+      break;
     case 'C':
       unstringify( cullingLimitForGaplessAlignments, optarg );
       break;
@@ -363,11 +367,14 @@ LAST home page: http://last.cbrc.jp/\n\
   if( isGreedy && maskLowercase == 3 )
     ERR( "can't combine option -M with option -u 3" );
 
-  if( optionsOnly ) return;
-  if( optind >= argc )
-    ERR( "please give me a database name and sequence file(s)\n\n" + usage );
-  lastdbName = argv[optind++];
-  inputStart = optind;
+  if( !optionsOnly ){
+    if( optind >= argc )
+      ERR( "please give me a database name and sequence file(s)\n\n" + usage );
+    lastdbName = argv[optind++];
+    inputStart = optind;
+  }
+
+  resetGetopt();
 }
 
 void LastalArguments::fromLine( const std::string& line ){
@@ -595,6 +602,8 @@ void LastalArguments::writeCommented( std::ostream& stream ) const{
   if( maxHitDepth + 1 > 0 )
     stream << " L=" << maxHitDepth;
   stream << " n=" << maxGaplessAlignmentsPerQueryPosition;
+  if( maxAlignmentsPerQueryStrand + 1 > 0 )
+    stream << " N=" << maxAlignmentsPerQueryStrand;
   if( cullingLimitForGaplessAlignments )
     stream << " C=" << cullingLimitForGaplessAlignments;
   if( cullingLimitForFinalAlignments )
diff --git a/src/LastalArguments.hh b/src/LastalArguments.hh
index 708f999..f4cc87c 100644
--- a/src/LastalArguments.hh
+++ b/src/LastalArguments.hh
@@ -87,6 +87,7 @@ struct LastalArguments{
   size_t maxHitDepth;
   size_t oneHitMultiplicity;
   size_t maxGaplessAlignmentsPerQueryPosition;
+  size_t maxAlignmentsPerQueryStrand;
   size_t cullingLimitForGaplessAlignments;
   size_t cullingLimitForFinalAlignments;
   size_t queryStep;
diff --git a/src/LastdbArguments.cc b/src/LastdbArguments.cc
index 7393685..95060ec 100644
--- a/src/LastdbArguments.cc
+++ b/src/LastdbArguments.cc
@@ -2,7 +2,7 @@
 
 #include "LastdbArguments.hh"
 #include "stringify.hh"
-#include <unistd.h>  // getopt
+#include "getoptUtil.hh"
 #include <iostream>
 #include <stdexcept>
 #include <cstdlib>  // EXIT_SUCCESS
@@ -56,7 +56,7 @@ Main Options:\n\
 -p: interpret the sequences as proteins\n\
 -R: repeat-marking options (default="
     + stringify(isKeepLowercase) + stringify(tantanSetting) + ")\n\
--c: soft-mask lowercase letters\n\
+-c: soft-mask lowercase letters (in reference *and* query sequences)\n\
 -u: seeding scheme (default: YASS for DNA, else exact-match seeds)";
 
   std::string help = usage + "\n\
@@ -86,7 +86,6 @@ Report bugs to: last-align (ATmark) googlegroups (dot) com\n\
 LAST home page: http://last.cbrc.jp/\n\
 ";
 
-  optind = 1;  // allows us to scan arguments more than once(???)
   int c;
   while( (c = myGetopt(argc, argv, "hVpR:cm:s:w:W:P:u:a:i:b:C:xvQ:")) != -1 ) {
     switch(c){
@@ -159,11 +158,14 @@ LAST home page: http://last.cbrc.jp/\n\
     }
   }
 
-  if( isOptionsOnly ) return;
-  if( optind >= argc )
-    ERR( "please give me an output name and sequence file(s)\n\n" + usage );
-  lastdbName = argv[optind++];
-  inputStart = optind;
+  if( !isOptionsOnly ){
+    if( optind >= argc )
+      ERR( "please give me an output name and sequence file(s)\n\n" + usage );
+    lastdbName = argv[optind++];
+    inputStart = optind;
+  }
+
+  resetGetopt();
 }
 
 void LastdbArguments::fromLine( const std::string& line ){
diff --git a/src/alp/sls_alignment_evaluer.cpp b/src/alp/sls_alignment_evaluer.cpp
index 8fd6f1a..aa7adc1 100644
--- a/src/alp/sls_alignment_evaluer.cpp
+++ b/src/alp/sls_alignment_evaluer.cpp
@@ -365,7 +365,8 @@ double eps_lambda_,//relative error for the parameter lambda
 double eps_K_,//relative error for the parameter K
 double max_time_,//maximum allowed calculation time in seconds
 double max_mem_,//maximum allowed memory usage in Mb
-long randomSeed_)//randomizaton seed
+long randomSeed_,//randomizaton seed
+double temperature_)
 {
 
 	struct_for_randomization *randomization_parameters=NULL;
@@ -512,6 +513,7 @@ long randomSeed_)//randomizaton seed
 		letterFreqs1_normalized,
 		letterFreqs2_normalized,
 
+		temperature_,
 		max_time_,//maximum allowed calculation time in seconds
 		max_mem_,//maximum allowed memory usage in MB
 		eps_lambda_,//relative error for lambda calculation
@@ -1014,11 +1016,6 @@ double seqlen2_) const//length of sequence #2
 	E,
 
 	area_res,
-	pvalues_obj.a_normal,
-	pvalues_obj.b_normal,
-	pvalues_obj.h_normal,
-	pvalues_obj.N_normal,
-	pvalues_obj.p_normal,
 	area_is_1_flag,
 	compute_only_area);
 
@@ -1093,11 +1090,6 @@ double &evalue_) const//resulted E-value
 	evalue_,
 
 	area,
-	pvalues_obj.a_normal,
-	pvalues_obj.b_normal,
-	pvalues_obj.h_normal,
-	pvalues_obj.N_normal,
-	pvalues_obj.p_normal,
 	area_is_1_flag);
 
 	
diff --git a/src/alp/sls_alignment_evaluer.hpp b/src/alp/sls_alignment_evaluer.hpp
index 4a398ae..a113f4e 100644
--- a/src/alp/sls_alignment_evaluer.hpp
+++ b/src/alp/sls_alignment_evaluer.hpp
@@ -40,6 +40,7 @@ Contents: library functions of main routines
 #include <math.h>
 
 namespace Sls {
+	const double default_importance_sampling_temperature = 1.07;
 
 	class AlignmentEvaluer {
 
@@ -100,7 +101,8 @@ namespace Sls {
 			double eps_K_,//relative error for the parameter K
 			double max_time_,//maximum allowed calculation time in seconds; 
 			double max_mem_,//maximum allowed memory usage in Mb
-			long randomSeed_);//randomizaton seed
+			long randomSeed_,//randomizaton seed
+			double temperature_=default_importance_sampling_temperature);
 
 
 	//Initializes Gumbel parameters using precalculated values:
diff --git a/src/alp/sls_alp_data.cpp b/src/alp/sls_alp_data.cpp
index 6d32520..73f5676 100644
--- a/src/alp/sls_alp_data.cpp
+++ b/src/alp/sls_alp_data.cpp
@@ -216,6 +216,7 @@ long int epen_,//gap extension penalty
 long int epen1_,//gap extension penalty for a gap in the sequence #1
 long int epen2_,//gap extension penalty for a gap in the sequence #2
 
+double temperature_,
 double max_time_,//maximum allowed calculation time in seconds
 double max_mem_,//maximum allowed memory usage in MB
 double eps_lambda_,//relative error for lambda calculation
@@ -294,6 +295,7 @@ bool insertions_after_deletions_)//if true, then insertions after deletions are
 
 			d_epen,
 
+			temperature_,
 			d_number_of_AA,
 			d_smatr,
 			d_RR1,
@@ -382,6 +384,7 @@ long int epen2_,//gap extension penalty for a gap in the sequence #2
 string smatr_file_name_,//scoring matrix file name
 string RR1_file_name_,//probabilities1 file name
 string RR2_file_name_,//probabilities2 file name
+double temperature_,
 double max_time_,//maximum allowed calculation time in seconds
 double max_mem_,//maximum allowed memory usage in MB
 double eps_lambda_,//relative error for lambda calculation
@@ -443,6 +446,7 @@ bool insertions_after_deletions_)//if true, then insertions after deletions are
 		epen1_,//gap extension penalty for a gap in the sequence #1
 		epen2_,//gap extension penalty for a gap in the sequence #2
 
+		temperature_,
 		max_time_,//maximum allowed calculation time in seconds
 		max_mem_,//maximum allowed memory usage in MB
 		eps_lambda_,//relative error for lambda calculation
@@ -488,6 +492,7 @@ const long *const *substitutionScoreMatrix_,
 const double *letterFreqs1_,
 const double *letterFreqs2_,
 
+double temperature_,
 double max_time_,//maximum allowed calculation time in seconds
 double max_mem_,//maximum allowed memory usage in MB
 double eps_lambda_,//relative error for lambda calculation
@@ -581,6 +586,7 @@ double max_time_with_computation_parameters_)//maximum allowed time in seconds f
 		epen1_,//gap extension penalty for a gap in the sequence #1
 		epen2_,//gap extension penalty for a gap in the sequence #2
 
+		temperature_,
 		max_time_,//maximum allowed calculation time in seconds
 		max_mem_,//maximum allowed memory usage in MB
 		eps_lambda_,//relative error for lambda calculation
@@ -1128,6 +1134,7 @@ long int open_,
 
 long int epen_,
 
+double temperature_,
 long int number_of_AA_,
 long int **smatr_,
 double *RR1_,
@@ -1272,7 +1279,7 @@ double *RR2_)
 
 			//cout<<"\nUngapped lambda is "<<d_ungap_lambda<<endl;
 
-			d_lambda*=1.07;
+			d_lambda*=temperature_;
 		};
 
 
diff --git a/src/alp/sls_alp_data.hpp b/src/alp/sls_alp_data.hpp
index 39a81de..2fcd777 100644
--- a/src/alp/sls_alp_data.hpp
+++ b/src/alp/sls_alp_data.hpp
@@ -245,6 +245,7 @@ namespace Sls {
 
 		long int epen_,
 
+		double temperature_,
 		long int number_of_AA_,
 		long int **smatr_,
 		double *RR1_,
@@ -309,6 +310,7 @@ namespace Sls {
 			std::string smatr_file_name_,//scoring matrix file name
 			std::string RR1_file_name_,//probabilities1 file name
 			std::string RR2_file_name_,//probabilities2 file name
+			double temperature_,
 			double max_time_,//maximum allowed calculation time in seconds
 			double max_mem_,//maximum allowed memory usage in MB
 			double eps_lambda_,//relative error for lambda calculation
@@ -332,6 +334,7 @@ namespace Sls {
 			const double *letterFreqs1_,
 			const double *letterFreqs2_,
 
+			double temperature_,
 			double max_time_,//maximum allowed calculation time in seconds
 			double max_mem_,//maximum allowed memory usage in MB
 			double eps_lambda_,//relative error for lambda calculation
@@ -368,6 +371,7 @@ namespace Sls {
 			long int epen1_,//gap extension penalty for a gap in the sequence #1
 			long int epen2_,//gap extension penalty for a gap in the sequence #2
 
+			double temperature_,
 			double max_time_,//maximum allowed calculation time in seconds
 			double max_mem_,//maximum allowed memory usage in MB
 			double eps_lambda_,//relative error for lambda calculation
diff --git a/src/alp/sls_basic.hpp b/src/alp/sls_basic.hpp
index d289ffd..f6e4353 100644
--- a/src/alp/sls_basic.hpp
+++ b/src/alp/sls_basic.hpp
@@ -48,7 +48,8 @@ Contents: Some basic functions and types
 #endif
 
 #include <iomanip>
-#include <cmath>
+#include <cmath>  // ?
+#include <math.h>
 #include <string>
 
 namespace Sls { 
@@ -189,6 +190,11 @@ namespace Sls {
 		static double one_minus_exp_function(
 		double y_);
 
+		static double normal_probability(double x_)
+		{
+			return 0.5*erfc(-sqrt(0.5)*x_);
+		}
+
 		static double normal_probability(
 		double x_,
 		double eps_);
diff --git a/src/alp/sls_falp_alignment_evaluer.cpp b/src/alp/sls_falp_alignment_evaluer.cpp
index a51f84d..113f485 100644
--- a/src/alp/sls_falp_alignment_evaluer.cpp
+++ b/src/alp/sls_falp_alignment_evaluer.cpp
@@ -977,11 +977,6 @@ double seqlen2_) const//length of sequence #2
 	E,
 
 	area_res,
-	pvalues_obj.a_normal,
-	pvalues_obj.b_normal,
-	pvalues_obj.h_normal,
-	pvalues_obj.N_normal,
-	pvalues_obj.p_normal,
 	area_is_1_flag,
 	compute_only_area);
 
@@ -1057,11 +1052,6 @@ double &evalue_) const//resulted E-value
 	evalue_,
 
 	area,
-	pvalues_obj.a_normal,
-	pvalues_obj.b_normal,
-	pvalues_obj.h_normal,
-	pvalues_obj.N_normal,
-	pvalues_obj.p_normal,
 	area_is_1_flag);
 }
 
diff --git a/src/alp/sls_fsa1_pvalues.cpp b/src/alp/sls_fsa1_pvalues.cpp
index 8b1dcc5..0ba0770 100644
--- a/src/alp/sls_fsa1_pvalues.cpp
+++ b/src/alp/sls_fsa1_pvalues.cpp
@@ -106,13 +106,6 @@ double &E_,
 double &E_error_,
 
 double &area_,
-
-double a_normal_,
-double b_normal_,
-double h_normal_,
-long int N_normal_,
-double *p_normal_,
-
 bool &area_is_1_flag_)
 {
 
@@ -181,8 +174,6 @@ bool &area_is_1_flag_)
 		tau_hat_error_=0;
 	};
 
-	double eps=0.000001;
-
 	double m_li_y_error=0;
 	double m_li_y=0;
 
@@ -218,7 +209,7 @@ bool &area_is_1_flag_)
 	};
 
 
-	double P_m_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,m_F,eps);
+	double P_m_F=sls_basic::normal_probability(m_F);
 	double P_m_F_error=const_val*exp(-0.5*m_F*m_F)*m_F_error;
 
 	double E_m_F=-const_val*exp(-0.5*m_F*m_F);
@@ -270,7 +261,7 @@ bool &area_is_1_flag_)
 		n_F=n_lj_y/sqrt_vj_y;
 	};
 
-	double P_n_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,n_F,eps);
+	double P_n_F=sls_basic::normal_probability(n_F);
 	double P_n_F_error=const_val*exp(-0.5*n_F*n_F)*n_F_error;
 
 	double E_n_F=-const_val*exp(-0.5*n_F*n_F);
@@ -385,13 +376,6 @@ double &P_,
 double &E_,
 
 double &area_,
-
-double a_normal_,
-double b_normal_,
-double h_normal_,
-long int N_normal_,
-double *p_normal_,
-
 bool &area_is_1_flag_,
 bool compute_only_area_)
 {
@@ -438,8 +422,6 @@ bool compute_only_area_)
 		tau_hat_=0;
 	};
 
-	double eps=0.000001;
-
 	double m_li_y=0;
 
 	double tmp=ai_hat_*y_+bi_hat_;
@@ -463,7 +445,7 @@ bool compute_only_area_)
 	};
 
 
-	double P_m_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,m_F,eps);
+	double P_m_F=sls_basic::normal_probability(m_F);
 
 	double E_m_F=-const_val*exp(-0.5*m_F*m_F);
 
@@ -498,7 +480,7 @@ bool compute_only_area_)
 		n_F=n_lj_y/sqrt_vj_y;
 	};
 
-	double P_n_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,n_F,eps);
+	double P_n_F=sls_basic::normal_probability(n_F);
 
 	double E_n_F=-const_val*exp(-0.5*n_F*n_F);
 
@@ -576,12 +558,6 @@ double &P_error_,
 double &E_,
 double &E_error_,
 
-double a_normal_,
-double b_normal_,
-double h_normal_,
-long int N_normal_,
-double *p_normal_,
-
 bool &area_is_1_flag_)
 {
 	long int dim=par_.m_LambdaSbs.size();
@@ -696,13 +672,6 @@ bool &area_is_1_flag_)
 		E_tmp,
 
 		area_tmp,
-
-		a_normal_,
-		b_normal_,
-		h_normal_,
-		N_normal_,
-		p_normal_,
-
 		area_is_1_flag_);
 
 		P_values[i]=P_tmp;
@@ -880,11 +849,6 @@ bool read_Sbs_par_flag)
 		E,
 
 		area,
-		a_normal,
-		b_normal,
-		h_normal,
-		N_normal,
-		p_normal,
 		area_is_1_flag);
 
 
@@ -906,11 +870,6 @@ bool read_Sbs_par_flag)
 			E_tmp,
 			E_error,
 
-			a_normal,
-			b_normal,
-			h_normal,
-			N_normal,
-			p_normal,
 			area_is_1_flag);
 
 
@@ -954,11 +913,6 @@ bool read_Sbs_par_flag)
 		E_error,
 
 		area,
-		a_normal,
-		b_normal,
-		h_normal,
-		N_normal,
-		p_normal,
 		area_is_1_flag);
 
 		P_value_error=P_error;
diff --git a/src/alp/sls_fsa1_pvalues.hpp b/src/alp/sls_fsa1_pvalues.hpp
index c32cac0..77fe8ff 100644
--- a/src/alp/sls_fsa1_pvalues.hpp
+++ b/src/alp/sls_fsa1_pvalues.hpp
@@ -206,13 +206,6 @@ namespace Sls {
 		double &E_error_,
 
 		double &area_,
-
-		double a_normal_,
-		double b_normal_,
-		double h_normal_,
-		long int N_normal_,
-		double *p_normal_,
-
 		bool &area_is_1_flag_);
 
 		static void compute_tmp_values(FALP_set_of_parameters &par_);
@@ -229,13 +222,6 @@ namespace Sls {
 		double &E_,
 
 		double &area_,
-
-		double a_normal_,
-		double b_normal_,
-		double h_normal_,
-		long int N_normal_,
-		double *p_normal_,
-
 		bool &area_is_1_flag_,
 		bool compute_only_area_=false);
 
@@ -253,13 +239,6 @@ namespace Sls {
 		double &E_,
 		double &E_error_,
 
-
-		double a_normal_,
-		double b_normal_,
-		double h_normal_,
-		long int N_normal_,
-		double *p_normal_,
-
 		bool &area_is_1_flag_);
 
 
diff --git a/src/alp/sls_pvalues.cpp b/src/alp/sls_pvalues.cpp
index 194d696..515a41c 100644
--- a/src/alp/sls_pvalues.cpp
+++ b/src/alp/sls_pvalues.cpp
@@ -60,13 +60,6 @@ double &E_,
 double &E_error_,
 
 double &area_,
-
-double a_normal_,
-double b_normal_,
-double h_normal_,
-long int N_normal_,
-double *p_normal_,
-
 bool &area_is_1_flag_)
 {
 
@@ -135,8 +128,6 @@ bool &area_is_1_flag_)
 		tau_hat_error_=0;
 	};
 
-	double eps=0.000001;
-
 	double m_li_y_error=0;
 	double m_li_y=0;
 
@@ -171,7 +162,7 @@ bool &area_is_1_flag_)
 	};
 
 
-	double P_m_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,m_F,eps);
+	double P_m_F=sls_basic::normal_probability(m_F);
 	double P_m_F_error=const_val*exp(-0.5*m_F*m_F)*m_F_error;
 
 	double E_m_F=-const_val*exp(-0.5*m_F*m_F);
@@ -222,7 +213,7 @@ bool &area_is_1_flag_)
 		n_F=n_lj_y/sqrt_vj_y;
 	};
 
-	double P_n_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,n_F,eps);
+	double P_n_F=sls_basic::normal_probability(n_F);
 	double P_n_F_error=const_val*exp(-0.5*n_F*n_F)*n_F_error;
 
 	double E_n_F=-const_val*exp(-0.5*n_F*n_F);
@@ -384,13 +375,6 @@ double &P_,
 double &E_,
 
 double &area_,
-
-double a_normal_,
-double b_normal_,
-double h_normal_,
-long int N_normal_,
-double *p_normal_,
-
 bool &area_is_1_flag_,
 bool compute_only_area_)
 {
@@ -436,8 +420,6 @@ bool compute_only_area_)
 		tau_hat_=0;
 	};
 
-	double eps=0.000001;
-
 	double m_li_y=0;
 
 	double tmp=ai_hat_*y_+bi_hat_;
@@ -463,7 +445,7 @@ bool compute_only_area_)
 	};
 
 
-	double P_m_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,m_F,eps);
+	double P_m_F=sls_basic::normal_probability(m_F);
 
 	double E_m_F=-const_val*exp(-0.5*m_F*m_F);
 
@@ -497,7 +479,7 @@ bool compute_only_area_)
 		n_F=n_lj_y/sqrt_vj_y;
 	};
 
-	double P_n_F=sls_basic::normal_probability(a_normal_,b_normal_,h_normal_,N_normal_,p_normal_,n_F,eps);
+	double P_n_F=sls_basic::normal_probability(n_F);
 
 	double E_n_F=-const_val*exp(-0.5*n_F*n_F);
 
@@ -575,12 +557,6 @@ double &P_error_,
 double &E_,
 double &E_error_,
 
-double a_normal_,
-double b_normal_,
-double h_normal_,
-long int N_normal_,
-double *p_normal_,
-
 bool &area_is_1_flag_)
 {
 	long int dim=par_.m_LambdaSbs.size();
@@ -693,13 +669,6 @@ bool &area_is_1_flag_)
 		E_tmp,
 
 		area_tmp,
-
-		a_normal_,
-		b_normal_,
-		h_normal_,
-		N_normal_,
-		p_normal_,
-
 		area_is_1_flag_);
 
 		P_values[i]=P_tmp;
@@ -888,11 +857,6 @@ bool read_Sbs_par_flag)
 		E,
 
 		area,
-		a_normal,
-		b_normal,
-		h_normal,
-		N_normal,
-		p_normal,
 		area_is_1_flag);
 
 
@@ -914,11 +878,6 @@ bool read_Sbs_par_flag)
 			E_tmp,
 			E_error,
 
-			a_normal,
-			b_normal,
-			h_normal,
-			N_normal,
-			p_normal,
 			area_is_1_flag);
 
 
@@ -962,11 +921,6 @@ bool read_Sbs_par_flag)
 		E_error,
 
 		area,
-		a_normal,
-		b_normal,
-		h_normal,
-		N_normal,
-		p_normal,
 		area_is_1_flag);
 
 		P_value_error=P_error;
diff --git a/src/alp/sls_pvalues.hpp b/src/alp/sls_pvalues.hpp
index c47dd37..023cd47 100644
--- a/src/alp/sls_pvalues.hpp
+++ b/src/alp/sls_pvalues.hpp
@@ -190,13 +190,6 @@ namespace Sls {
 		double &E_error_,
 
 		double &area_,
-
-		double a_normal_,
-		double b_normal_,
-		double h_normal_,
-		long int N_normal_,
-		double *p_normal_,
-
 		bool &area_is_1_flag_);
 
 		static void compute_tmp_values(ALP_set_of_parameters &par_);
@@ -213,13 +206,6 @@ namespace Sls {
 		double &E_,
 
 		double &area_,
-
-		double a_normal_,
-		double b_normal_,
-		double h_normal_,
-		long int N_normal_,
-		double *p_normal_,
-
 		bool &area_is_1_flag_,
 		bool compute_only_area_=false);
 
@@ -236,12 +222,6 @@ namespace Sls {
 		double &E_,
 		double &E_error_,
 
-		double a_normal_,
-		double b_normal_,
-		double h_normal_,
-		long int N_normal_,
-		double *p_normal_,
-
 		bool &area_is_1_flag_);
 
 
diff --git a/src/getoptUtil.hh b/src/getoptUtil.hh
new file mode 100644
index 0000000..25bd328
--- /dev/null
+++ b/src/getoptUtil.hh
@@ -0,0 +1,17 @@
+// Copyright 2017 Martin C. Frith
+
+#ifndef GETOPT_UTIL_HH
+#define GETOPT_UTIL_HH
+
+#include <unistd.h>
+
+inline void resetGetopt() {  // XXX fragile voodoo
+#ifdef __GLIBC__
+  optind = 0;
+#else
+  optind = 1;
+  //optreset = 1;  // XXX ???
+#endif
+}
+
+#endif
diff --git a/src/lastal.cc b/src/lastal.cc
index 467fad7..0e90c61 100644
--- a/src/lastal.cc
+++ b/src/lastal.cc
@@ -552,6 +552,7 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
   Dispatcher dis( Phase::gapless, aligner, queryNum, strand, querySeq );
   DiagonalTable dt;  // record already-covered positions on each diagonal
   countT matchCount = 0, gaplessExtensionCount = 0, gaplessAlignmentCount = 0;
+  size_t significantAlignmentCount = 0;
 
   size_t loopBeg = query.seqBeg(queryNum) - query.padBeg(queryNum);
   size_t loopEnd = query.seqEnd(queryNum) - query.padBeg(queryNum);
@@ -588,10 +589,11 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 	indexT j = *beg;  // coordinate in the reference sequence
 	if( dt.isCovered( i, j ) ) continue;
 	++gaplessExtensionCount;
+	int score;
 
 	if( isOverlap ){
 	  size_t revLen, fwdLen;
-	  int score = dis.gaplessOverlap( j, i, revLen, fwdLen );
+	  score = dis.gaplessOverlap( j, i, revLen, fwdLen );
 	  if( score < minScoreGapless ) continue;
 	  SegmentPair sp( j - revLen, i - revLen, revLen + fwdLen, score );
 	  dt.addEndpoint( sp.end2(), sp.end1() );
@@ -599,7 +601,7 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 	}else{
 	  int fs = dis.forwardGaplessScore( j, i );
 	  int rs = dis.reverseGaplessScore( j, i );
-	  int score = fs + rs;
+	  score = fs + rs;
 
 	  // Tried checking the score after isOptimal & addEndpoint,
 	  // but the number of extensions decreased by < 10%, and it
@@ -623,6 +625,12 @@ void alignGapless( LastAligner& aligner, SegmentPairPot& gaplessAlns,
 
 	++gaplessAlignmentsPerQueryPosition;
 	++gaplessAlignmentCount;
+
+	if( score >= args.minScoreGapped &&
+	    ++significantAlignmentCount >= args.maxAlignmentsPerQueryStrand ) {
+	  i = loopEnd;
+	  break;
+	}
       }
     }
   }
@@ -710,6 +718,7 @@ void alignGapped( LastAligner& aligner,
     else SegmentPairPot::markAsGood(sp);
 
     ++gappedAlignmentCount;
+    if( gappedAlignmentCount >= args.maxAlignmentsPerQueryStrand ) break;
   }
 
   LOG2( "gapped extensions=" << gappedExtensionCount );
diff --git a/src/makefile b/src/makefile
index cddf3b7..df3581a 100644
--- a/src/makefile
+++ b/src/makefile
@@ -175,9 +175,9 @@ LastEvaluer.o LastEvaluer.o8: LastEvaluer.cc LastEvaluer.hh ScoreMatrixRow.hh \
  alp/sls_falp_alignment_evaluer.hpp alp/sls_fsa1_pvalues.hpp \
  GeneticCode.hh
 LastalArguments.o LastalArguments.o8: LastalArguments.cc LastalArguments.hh \
- SequenceFormat.hh stringify.hh version.hh
+ SequenceFormat.hh stringify.hh getoptUtil.hh version.hh
 LastdbArguments.o LastdbArguments.o8: LastdbArguments.cc LastdbArguments.hh \
- SequenceFormat.hh stringify.hh version.hh
+ SequenceFormat.hh stringify.hh getoptUtil.hh version.hh
 MultiSequence.o MultiSequence.o8: MultiSequence.cc MultiSequence.hh ScoreMatrixRow.hh \
  VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh io.hh
 MultiSequenceQual.o MultiSequenceQual.o8: MultiSequenceQual.cc MultiSequence.hh \
diff --git a/src/split/cbrc_split_aligner.hh b/src/split/cbrc_split_aligner.hh
index 6e6885b..ff4bdb7 100644
--- a/src/split/cbrc_split_aligner.hh
+++ b/src/split/cbrc_split_aligner.hh
@@ -212,7 +212,7 @@ private:
     void initSpliceSignals(unsigned i);
     void initRnameAndStrandIds();
     void initRbegsAndEnds();
-    static size_t maxGenomeVolumes() { return sizeof genome / sizeof *genome; }
+    size_t maxGenomeVolumes() const { return sizeof genome / sizeof *genome; }
     void readGenomeVolume(const std::string& baseName,
 			  size_t seqCount, size_t volumeNumber);
 
diff --git a/src/split/last-split.cc b/src/split/last-split.cc
index 1dd714e..619f54c 100644
--- a/src/split/last-split.cc
+++ b/src/split/last-split.cc
@@ -83,21 +83,26 @@ static void doOneAlignmentPart(cbrc::SplitAligner& sa,
 			       bool isSenseStrand, double senseStrandLogOdds,
 			       const LastSplitOptions& opts,
 			       bool isAlreadySplit) {
+  unsigned qSliceBegTrimmed = qSliceBeg;
+  unsigned qSliceEndTrimmed = qSliceEnd;
   unsigned alnBeg, alnEnd;
-  cbrc::mafSliceBeg(a.ralign, a.qalign, a.qstart, qSliceBeg, alnBeg);
-  cbrc::mafSliceEnd(a.ralign, a.qalign, a.qend,   qSliceEnd, alnEnd);
+  cbrc::mafSliceBeg(a.ralign, a.qalign, a.qstart, qSliceBegTrimmed, alnBeg);
+  cbrc::mafSliceEnd(a.ralign, a.qalign, a.qend,   qSliceEndTrimmed, alnEnd);
 
-  int score = sa.segmentScore(alnNum, qSliceBeg, qSliceEnd);
+  int score =
+    sa.segmentScore(alnNum, qSliceBeg, qSliceEnd) -
+    sa.segmentScore(alnNum, qSliceBeg, qSliceBegTrimmed) -
+    sa.segmentScore(alnNum, qSliceEndTrimmed, qSliceEnd);
   if (score < opts.score) return;
 
   std::vector<double> p;
   if (opts.direction != 0) {
-    p = sa.marginalProbs(qSliceBeg, alnNum, alnBeg, alnEnd);
+    p = sa.marginalProbs(qSliceBegTrimmed, alnNum, alnBeg, alnEnd);
   }
   std::vector<double> pRev;
   if (opts.direction != 1) {
     sa.flipSpliceSignals();
-    pRev = sa.marginalProbs(qSliceBeg, alnNum, alnBeg, alnEnd);
+    pRev = sa.marginalProbs(qSliceBegTrimmed, alnNum, alnBeg, alnEnd);
     sa.flipSpliceSignals();
   }
   if (opts.direction == 0) p.swap(pRev);
diff --git a/src/version.hh b/src/version.hh
index d87fb52..7104b42 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"830"
+"869"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git



More information about the debian-med-commit mailing list