[med-svn] [last-align] 01/06: New upstream version 885

Sun Oct 15 13:22:58 UTC 2017

This is an automated email from the git hooks/post-receive script.

plessy pushed a commit to branch master
in repository last-align.

commit 899a68165f8864d3cfb67aca39ac97a2add9c8ac
Author: Charles Plessy <plessy at debian.org>
Date:   Sun Oct 15 22:05:37 2017 +0900

    New upstream version 885
---
 ChangeLog.txt            | 87 +++++++++++++++++++++++++++++++++++++++++++-
 doc/last-dotplot.html    | 17 +++++++--
 doc/last-dotplot.txt     | 15 ++++++--
 doc/last-parallel.html   |  4 +--
 doc/last-parallel.txt    |  4 +--
 doc/last-train.html      |  4 +++
 doc/last-train.txt       |  2 ++
 doc/last-tuning.html     |  7 ++--
 doc/last-tuning.txt      |  8 ++---
 doc/last.html            |  4 +--
 doc/last.txt             |  4 +--
 doc/lastal.html          | 13 ++++---
 doc/lastal.txt           |  9 +++--
 doc/lastdb.html          |  5 +--
 doc/lastdb.txt           |  5 +--
 doc/maf-convert.html     | 12 +++----
 doc/maf-convert.txt      | 18 +++++-----
 scripts/fastq-interleave | 10 ++----
 scripts/last-dotplot     | 75 +++++++++++++++++++++++---------------
 scripts/last-postmask    | 23 +++++++-----
 scripts/last-train       | 79 ++++++++++++++++++++++++++--------------
 scripts/maf-convert      | 93 +++++++++++++++++++++++++++++++++++-------------
 src/Alphabet.cc          |  8 -----
 src/Alphabet.hh          |  3 --
 src/LastEvaluer.cc       |  2 +-
 src/LastalArguments.cc   | 33 ++++++++++++-----
 src/MultiSequence.cc     |  7 ----
 src/MultiSequence.hh     | 66 +++++++++++++++++++++++++++++++---
 src/MultiSequenceQual.cc | 22 ++++--------
 src/io.cc                | 11 ++----
 src/io.hh                |  7 ++--
 src/last-pair-probs.cc   |  4 +--
 src/lastal.cc            | 32 ++---------------
 src/lastdb.cc            | 43 ++++++++++------------
 src/makefile             | 32 ++++++++---------
 src/mcf_zstream.hh       | 82 ++++++++++++++++++++++++++++++++++++++++++
 src/version.hh           |  2 +-
 37 files changed, 569 insertions(+), 283 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index 2203b24..7c46466 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,9 +1,94 @@
+2017-10-10  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-train:
+	Make last-train print the lastal version
+	[c2ce08b5a76a] [tip]
+
+	* src/Alphabet.cc, src/Alphabet.hh, src/MultiSequence.hh,
+	src/lastal.cc, src/lastdb.cc:
+	Refactor
+	[52565bc1c7d7]
+
+	* src/lastdb.cc:
+	Refactor (I think)
+	[02a9cb451bc7]
+
+	* src/MultiSequence.cc, src/MultiSequence.hh,
+	src/MultiSequenceQual.cc, src/lastal.cc:
+	Refactor some code
+	[88fa35342a52]
+
+	* scripts/last-train, test/last-train-test.out, test/last-train-
+	test.sh:
+	Make last-train work with lastdb8
+	[a7ef7848ad1e]
+
+2017-10-06  Martin C. Frith  <Martin C. Frith>
+
+	* doc/maf-convert.txt, scripts/maf-convert, test/maf-convert-test.out,
+	test/maf-convert-test.sh:
+	Add maf-convert -> chain
+	[6635b482e6e2]
+
+	* doc/last.txt, src/LastEvaluer.cc:
+	Avoid some "can't get E-value parameters" errors
+	[2c0cb7ef8842]
+
+2017-10-03  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-dotplot.txt, scripts/last-dotplot:
+	Add text-rotation options to last-dotplot
+	[20f5c97a3cfd]
+
+	* scripts/last-dotplot:
+	Fix mixed-up margin sizes in last-dotplot
+	[f5f93e51ef53]
+
+2017-09-27  Martin C. Frith  <Martin C. Frith>
+
+	* src/makefile:
+	Try to make the gz compilation work more portably
+	[bf8efe69f160]
+
+2017-09-26  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-dotplot, scripts/last-postmask, scripts/last-train,
+	scripts/maf-convert, test/last-postmask-test.out, test/last-
+	postmask-test.sh, test/last-test.sh:
+	Add gz reading to: dotplot postmask train convert
+	[592295375eb1]
+
+2017-09-22  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-parallel.txt, src/mcf_zstream.hh:
+	Make the gz stuff compile on old computers
+	[0602563ef4e2]
+
+	* src/makefile:
+	Try to make compilation more robust & portable
+	[8a2e4af79c99]
+
+	* doc/lastal.txt, doc/lastdb.txt, scripts/fastq-interleave, src/io.cc,
+	src/io.hh, src/last-pair-probs.cc, src/lastal.cc, src/lastdb.cc,
+	src/makefile, src/mcf_zstream.hh:
+	Enable lastal, lastdb, etc to read .gz files
+	[a530b1ee48f4]
+
+	* doc/last-train.txt, scripts/last-train:
+	Add option -k to last-train
+	[80a016b17089]
+
+	* doc/last-tuning.txt, doc/lastal.txt, src/LastalArguments.cc, test
+	/last-test.out, test/last-test.sh:
+	Add -x % option to lastal
+	[ad7f5f953412]
+
 2017-06-20  Martin C. Frith  <Martin C. Frith>
 
 	* doc/last-train.txt, scripts/last-train, test/last-train-test.out,
 	test/last-train-test.sh:
 	Make it easier to feed stdin/pipe to last-train
-	[b73f1146e688] [tip]
+	[b73f1146e688]
 
 	* doc/lastal.txt, src/AlignmentWrite.cc, test/last-test.out:
 	Add raw score to BlastTab+ format
diff --git a/doc/last-dotplot.html b/doc/last-dotplot.html
index e1717a0..21cc1d9 100644
--- a/doc/last-dotplot.html
+++ b/doc/last-dotplot.html
@@ -319,8 +319,9 @@ table.field-list { border: thin solid green }
 <h1 class="title">last-dotplot</h1>
 
 <p>This script makes a dotplot, a.k.a. Oxford Grid, of pair-wise sequence
-alignments in MAF or LAST tabular format.  It requires the Python
-Imaging Library to be installed.  It can be used like this:</p>
+alignments in MAF or LAST tabular format.  It requires the <a class="reference external" href="https://pillow.readthedocs.io/">Python
+Imaging Library</a> to be installed.
+It can be used like this:</p>
 <pre class="literal-block">
 last-dotplot my-alignments my-plot.png
 </pre>
@@ -330,7 +331,11 @@ last-dotplot alns alns.gif
 </pre>
 <p>To get a nicer font, try something like:</p>
 <pre class="literal-block">
-last-dotplot -f /usr/share/fonts/truetype/freefont/FreeSans.ttf alns alns.png
+last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
+</pre>
+<p>or:</p>
+<pre class="literal-block">
+last-dotplot -f /Library/Fonts/Arial.ttf alns alns.png
 </pre>
 <p>If the fonts are located somewhere different on your computer, change
 this as appropriate.</p>
@@ -450,6 +455,12 @@ appearance in the input (0), their names (1), their lengths (2).</td></tr>
 <kbd><span class="option">-s <var>SIZE</var></span>, <span class="option">--fontsize=<var>SIZE</var></span></kbd></td>
 <td>TrueType or OpenType font size.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">--rot1=<var>ROT</var></span></kbd></td>
+<td>Text rotation for the 1st genome: h(orizontal) or v(ertical).</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--rot2=<var>ROT</var></span></kbd></td>
+<td>Text rotation for the 2nd genome: h(orizontal) or v(ertical).</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">--lengths1</span></kbd></td>
 <td>Show sequence lengths for the 1st (horizontal) genome.</td></tr>
 <tr><td class="option-group">
diff --git a/doc/last-dotplot.txt b/doc/last-dotplot.txt
index 8642e59..2c1ff46 100644
--- a/doc/last-dotplot.txt
+++ b/doc/last-dotplot.txt
@@ -2,8 +2,9 @@ last-dotplot
 ============
 
 This script makes a dotplot, a.k.a. Oxford Grid, of pair-wise sequence
-alignments in MAF or LAST tabular format.  It requires the Python
-Imaging Library to be installed.  It can be used like this::
+alignments in MAF or LAST tabular format.  It requires the `Python
+Imaging Library <https://pillow.readthedocs.io/>`_ to be installed.
+It can be used like this::
 
   last-dotplot my-alignments my-plot.png
 
@@ -13,7 +14,11 @@ The output can be in any format supported by the Imaging Library::
 
 To get a nicer font, try something like::
 
-  last-dotplot -f /usr/share/fonts/truetype/freefont/FreeSans.ttf alns alns.png
+  last-dotplot -f /usr/share/fonts/liberation/LiberationSans-Regular.ttf alns alns.png
+
+or::
+
+  last-dotplot -f /Library/Fonts/Arial.ttf alns alns.png
 
 If the fonts are located somewhere different on your computer, change
 this as appropriate.
@@ -95,6 +100,10 @@ Text options
       TrueType or OpenType font file.
   -s SIZE, --fontsize=SIZE
       TrueType or OpenType font size.
+  --rot1=ROT
+      Text rotation for the 1st genome: h(orizontal) or v(ertical).
+  --rot2=ROT
+      Text rotation for the 2nd genome: h(orizontal) or v(ertical).
   --lengths1
       Show sequence lengths for the 1st (horizontal) genome.
   --lengths2
diff --git a/doc/last-parallel.html b/doc/last-parallel.html
index 5d2ad15..1cfe214 100644
--- a/doc/last-parallel.html
+++ b/doc/last-parallel.html
@@ -372,11 +372,11 @@ parallel-fastq "lastal -Q1 -D100 db | last-split" < q.fastq > ou
 </pre>
 <p>Instead of this:</p>
 <pre class="literal-block">
-zcat queries.fa.gz | lastal mydb > myalns.maf
+bzcat queries.fa.bz2 | lastal mydb > myalns.maf
 </pre>
 <p>try this:</p>
 <pre class="literal-block">
-zcat queries.fa.gz | parallel-fasta "lastal mydb" > myalns.maf
+bzcat queries.fa.bz2 | parallel-fasta "lastal mydb" > myalns.maf
 </pre>
 <p>Notes:</p>
 <ul>
diff --git a/doc/last-parallel.txt b/doc/last-parallel.txt
index 730157f..c98a451 100644
--- a/doc/last-parallel.txt
+++ b/doc/last-parallel.txt
@@ -61,11 +61,11 @@ try this::
 
 Instead of this::
 
-  zcat queries.fa.gz | lastal mydb > myalns.maf
+  bzcat queries.fa.bz2 | lastal mydb > myalns.maf
 
 try this::
 
-  zcat queries.fa.gz | parallel-fasta "lastal mydb" > myalns.maf
+  bzcat queries.fa.bz2 | parallel-fasta "lastal mydb" > myalns.maf
 
 Notes:
 
diff --git a/doc/last-train.html b/doc/last-train.html
index a4889bd..f510a51 100644
--- a/doc/last-train.html
+++ b/doc/last-train.html
@@ -455,6 +455,10 @@ reduce run time.</td></tr>
 <kbd><span class="option">-m <var>COUNT</var></span></kbd></td>
 <td>Maximum number of initial matches per query position.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-k <var>STEP</var></span></kbd></td>
+<td>Look for initial matches starting only at every STEP-th
+position in each query.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-P <var>COUNT</var></span></kbd></td>
 <td>Number of parallel threads.</td></tr>
 <tr><td class="option-group">
diff --git a/doc/last-train.txt b/doc/last-train.txt
index c9fcd93..c965ff3 100644
--- a/doc/last-train.txt
+++ b/doc/last-train.txt
@@ -84,6 +84,8 @@ Alignment options
              reduce run time.
   -T NUMBER  Type of alignment: 0=local, 1=overlap.
   -m COUNT   Maximum number of initial matches per query position.
+  -k STEP    Look for initial matches starting only at every STEP-th
+             position in each query.
   -P COUNT   Number of parallel threads.
   -Q NUMBER  Query sequence format: 0=fasta, 1=fastq-sanger.
              **Important:** if you use option -Q, last-train will only
diff --git a/doc/last-tuning.html b/doc/last-tuning.html
index a273b3b..a7c04de 100644
--- a/doc/last-tuning.html
+++ b/doc/last-tuning.html
@@ -408,11 +408,8 @@ whose query coordinates lie in those of 2 or more stronger alignments.</p>
 <p>This option can make lastal <strong>faster</strong> but <strong>less sensitive</strong>.  It
 sets the maximum score drop in alignments, in the gapped extension
 phase.  Lower values make it faster, by quitting unpromising
-extensions sooner.  The default aims at best accuracy.</p>
-<p>Unfortunately, the default is a complex function of the other
-parameters and the database size.  You can see it in the lastal header
-after "x=", e.g. by running lastal with no queries.  Then try, say,
-halving it.</p>
+extensions sooner.  The default aims at best accuracy.  For example,
+use -x50% to specify 50% of the default value.</p>
 </div>
 <div class="section" id="id2">
 <h3>lastal -M</h3>
diff --git a/doc/last-tuning.txt b/doc/last-tuning.txt
index 6493e62..9fc4c9b 100644
--- a/doc/last-tuning.txt
+++ b/doc/last-tuning.txt
@@ -109,12 +109,8 @@ lastal -x
 This option can make lastal **faster** but **less sensitive**.  It
 sets the maximum score drop in alignments, in the gapped extension
 phase.  Lower values make it faster, by quitting unpromising
-extensions sooner.  The default aims at best accuracy.
-
-Unfortunately, the default is a complex function of the other
-parameters and the database size.  You can see it in the lastal header
-after "x=", e.g. by running lastal with no queries.  Then try, say,
-halving it.
+extensions sooner.  The default aims at best accuracy.  For example,
+use -x50% to specify 50% of the default value.
 
 lastal -M
 ---------
diff --git a/doc/last.html b/doc/last.html
index 7c0f7f7..ef87985 100644
--- a/doc/last.html
+++ b/doc/last.html
@@ -354,8 +354,8 @@ compile the programs, type:</p>
 <pre class="literal-block">
 make
 </pre>
-<p>If your compiler is really old, you might get an error message like
-this:</p>
+<p>You might get some harmless warning messages.  If your compiler is
+really old, you might get an error message like this:</p>
 <pre class="literal-block">
 unrecognized command line option "-std=c++11"
 </pre>
diff --git a/doc/last.txt b/doc/last.txt
index 70123f1..20d2102 100644
--- a/doc/last.txt
+++ b/doc/last.txt
@@ -33,8 +33,8 @@ compile the programs, type::
 
   make
 
-If your compiler is really old, you might get an error message like
-this::
+You might get some harmless warning messages.  If your compiler is
+really old, you might get an error message like this::
 
   unrecognized command line option "-std=c++11"
 
diff --git a/doc/lastal.html b/doc/lastal.html
index 191dde2..22e63d2 100644
--- a/doc/lastal.html
+++ b/doc/lastal.html
@@ -329,9 +329,10 @@ lastal humanDb dna*.fasta > myalns.maf
 writes several files whose names begin with <tt class="docutils literal">humanDb</tt>.  The lastal
 command reads files called <tt class="docutils literal"><span class="pre">dna*.fasta</span></tt>, compares them to
 <tt class="docutils literal">humanDb</tt>, and writes alignments to a file called <tt class="docutils literal">myalns.maf</tt>.</p>
-<p>You can also pipe query sequences into lastal, for example:</p>
+<p>You can use gzip (.gz) compressed query files.  You can also pipe
+query sequences into lastal, for example:</p>
 <pre class="literal-block">
-zcat seqs.fasta.gz | lastal humanDb > myalns.maf
+bzcat seqs.fasta.bz2 | lastal humanDb > myalns.maf
 </pre>
 <div class="section" id="steps-in-lastal">
 <h2>Steps in lastal</h2>
@@ -516,10 +517,14 @@ looks like this:</p>
 </td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-x <var>DROP</var></span></kbd></td>
-<td>Maximum score drop for gapped alignments.  Gapped alignments are
+<td><p class="first">Maximum score drop for gapped alignments.  Gapped alignments are
 forbidden from having any internal region with score < -DROP.
 This serves two purposes: accuracy (avoid spurious internal
-regions in alignments) and speed (the smaller the faster).</td></tr>
+regions in alignments) and speed (the smaller the faster).</p>
+<p class="last">You can specify either a score (e.g. -x20), or a percentage; for
+example, -x50% specifies 50% of the default value (rounded down
+to the nearest integer).</p>
+</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-y <var>DROP</var></span></kbd></td>
 <td>Maximum score drop for gapless alignments.</td></tr>
diff --git a/doc/lastal.txt b/doc/lastal.txt
index db7eeb1..476d9f8 100644
--- a/doc/lastal.txt
+++ b/doc/lastal.txt
@@ -13,9 +13,10 @@ writes several files whose names begin with ``humanDb``.  The lastal
 command reads files called ``dna*.fasta``, compares them to
 ``humanDb``, and writes alignments to a file called ``myalns.maf``.
 
-You can also pipe query sequences into lastal, for example::
+You can use gzip (.gz) compressed query files.  You can also pipe
+query sequences into lastal, for example::
 
-  zcat seqs.fasta.gz | lastal humanDb > myalns.maf
+  bzcat seqs.fasta.bz2 | lastal humanDb > myalns.maf
 
 Steps in lastal
 ---------------
@@ -186,6 +187,10 @@ Score options
       This serves two purposes: accuracy (avoid spurious internal
       regions in alignments) and speed (the smaller the faster).
 
+      You can specify either a score (e.g. -x20), or a percentage; for
+      example, -x50% specifies 50% of the default value (rounded down
+      to the nearest integer).
+
   -y DROP
       Maximum score drop for gapless alignments.
 
diff --git a/doc/lastdb.html b/doc/lastdb.html
index 974c588..4b54522 100644
--- a/doc/lastdb.html
+++ b/doc/lastdb.html
@@ -336,9 +336,10 @@ TTTGGATATG
 >My2ndSequence
 TTTAGAGGGTTCTTCGGGATT
 </pre>
-<p>You can also pipe sequences into lastdb, for example:</p>
+<p>These files may be compressed in gzip (.gz) format.  You can also pipe
+sequences into lastdb, for example:</p>
 <pre class="literal-block">
-zcat humanChromosome*.fasta.gz | lastdb humanDb
+bzcat humanChromosome*.fasta.bz2 | lastdb humanDb
 </pre>
 </div>
 <div class="section" id="options">
diff --git a/doc/lastdb.txt b/doc/lastdb.txt
index c0b5fdb..38e5fe8 100644
--- a/doc/lastdb.txt
+++ b/doc/lastdb.txt
@@ -21,9 +21,10 @@ like this::
   >My2ndSequence
   TTTAGAGGGTTCTTCGGGATT
 
-You can also pipe sequences into lastdb, for example::
+These files may be compressed in gzip (.gz) format.  You can also pipe
+sequences into lastdb, for example::
 
-  zcat humanChromosome*.fasta.gz | lastdb humanDb
+  bzcat humanChromosome*.fasta.bz2 | lastdb humanDb
 
 Options
 -------
diff --git a/doc/maf-convert.html b/doc/maf-convert.html
index a98040e..295e74e 100644
--- a/doc/maf-convert.html
+++ b/doc/maf-convert.html
@@ -318,9 +318,9 @@ table.field-list { border: thin solid green }
 <div class="document" id="maf-convert">
 <h1 class="title">maf-convert</h1>
 
-<p>This script reads alignments in maf format, and writes them in another
-format.  It can write them in these formats: axt, blast, blasttab,
-html, psl, sam, tab.  You can use it like this:</p>
+<p>This script reads alignments in <a class="reference external" href="http://genome.ucsc.edu/FAQ/FAQformat.html#format5">maf</a> format, and writes them in
+another format.  It can write them in these formats: <a class="reference external" href="https://genome.ucsc.edu/goldenPath/help/axt.html">axt</a>, blast,
+blasttab, <a class="reference external" href="https://genome.ucsc.edu/goldenPath/help/chain.html">chain</a>, html, <a class="reference external" href="https://genome.ucsc.edu/FAQ/FAQformat.html#format2">psl</a>, sam, tab.  You can use it like this:</p>
 <pre class="literal-block">
 maf-convert psl my-alignments.maf > my-alignments.psl
 </pre>
@@ -328,9 +328,7 @@ maf-convert psl my-alignments.maf > my-alignments.psl
 <pre class="literal-block">
 ... | maf-convert psl > my-alignments.psl
 </pre>
-<p>The input should be "multiple alignment format" as described in the
-UCSC Genome FAQ (not "MIRA assembly format" or any other maf).</p>
-<p>This script takes the first (topmost) MAF sequence as the "reference"
+<p>This script takes the first (topmost) maf sequence as the "reference"
 / "subject" / "target", and the second sequence as the "query".</p>
 <p>For html: if the input includes probability lines starting with 'p',
 then the output will be coloured by column probability.  (To get lines
@@ -351,7 +349,7 @@ starting with 'p', run lastal with option -j set to 4 or higher.)</p>
 nucleotides.  This affects psl format only (the first 4
 columns).</td></tr>
 <tr><td class="option-group">
-<kbd><span class="option">-j</span>, <span class="option">--join=<var>N</var></span></kbd></td>
+<kbd><span class="option">-j <var>N</var></span>, <span class="option">--join=<var>N</var></span></kbd></td>
 <td>Join neighboring alignments if they are co-linear and
 separated by at most N letters.  This affects psl format
 only.</td></tr>
diff --git a/doc/maf-convert.txt b/doc/maf-convert.txt
index f78acee..d2d3f4d 100644
--- a/doc/maf-convert.txt
+++ b/doc/maf-convert.txt
@@ -1,9 +1,9 @@
 maf-convert
 ===========
 
-This script reads alignments in maf format, and writes them in another
-format.  It can write them in these formats: axt, blast, blasttab,
-html, psl, sam, tab.  You can use it like this::
+This script reads alignments in maf_ format, and writes them in
+another format.  It can write them in these formats: axt_, blast,
+blasttab, chain_, html, psl_, sam, tab.  You can use it like this::
 
   maf-convert psl my-alignments.maf > my-alignments.psl
 
@@ -11,16 +11,18 @@ It's often convenient to pipe in the input, like this::
 
   ... | maf-convert psl > my-alignments.psl
 
-The input should be "multiple alignment format" as described in the
-UCSC Genome FAQ (not "MIRA assembly format" or any other maf).
-
-This script takes the first (topmost) MAF sequence as the "reference"
+This script takes the first (topmost) maf sequence as the "reference"
 / "subject" / "target", and the second sequence as the "query".
 
 For html: if the input includes probability lines starting with 'p',
 then the output will be coloured by column probability.  (To get lines
 starting with 'p', run lastal with option -j set to 4 or higher.)
 
+.. _maf: http://genome.ucsc.edu/FAQ/FAQformat.html#format5
+.. _axt: https://genome.ucsc.edu/goldenPath/help/axt.html
+.. _chain: https://genome.ucsc.edu/goldenPath/help/chain.html
+.. _psl: https://genome.ucsc.edu/FAQ/FAQformat.html#format2
+
 Options
 -------
 
@@ -32,7 +34,7 @@ Options
          nucleotides.  This affects psl format only (the first 4
          columns).
 
-  -j, --join=N
+  -j N, --join=N
          Join neighboring alignments if they are co-linear and
          separated by at most N letters.  This affects psl format
          only.
diff --git a/scripts/fastq-interleave b/scripts/fastq-interleave
index ca33604..dd9174f 100755
--- a/scripts/fastq-interleave
+++ b/scripts/fastq-interleave
@@ -3,6 +3,7 @@
 test $# = 2 || {
     cat <<EOF
 Usage: $0 x.fastq y.fastq
+or:    $0 x.fastq.gz y.fastq.gz
 
 Read 2 fastq files, and write them interleaved.
 Assumes 1 fastq per 4 lines, i.e. no line wrapping.
@@ -10,10 +11,5 @@ EOF
     exit
 }
 
-paste <(cat "$1" | paste - - - -) <(cat "$2" | paste - - - -) | tr '\t' '\n'
-
-# Is this better?
-#paste <(zcat -f "$1"|paste - - - -) <(zcat -f "$2"|paste - - - -)|tr '\t' '\n'
-
-# This does not interpret "-" as stdin:
-#paste <(paste - - - - < "$1") <(paste - - - - < "$2") | tr '\t' '\n'
+paste <(gzip -cdf "$1" | paste - - - -) <(gzip -cdf "$2" | paste - - - -) |
+    tr '\t' '\n'
diff --git a/scripts/last-dotplot b/scripts/last-dotplot
index 6d4f1d2..f29fed2 100755
--- a/scripts/last-dotplot
+++ b/scripts/last-dotplot
@@ -9,6 +9,7 @@
 # according to the number of aligned nt-pairs within it, but the
 # result is too faint.  How can this be done better?
 
+import gzip
 import fnmatch, itertools, optparse, os, re, sys
 
 # Try to make PIL/PILLOW work:
@@ -18,6 +19,8 @@ except ImportError: import Image, ImageDraw, ImageFont, ImageColor
 def myOpen(fileName):  # faster than fileinput
     if fileName == "-":
         return sys.stdin
+    if fileName.endswith(".gz"):
+        return gzip.open(fileName)
     return open(fileName)
 
 def warn(message):
@@ -160,13 +163,17 @@ def natural_sort_key(my_string):
     parts[1::2] = map(int, parts[1::2])
     return parts
 
-def get_text_sizes(my_strings, font, fontsize, image_mode):
+def textDimensions(imageDraw, font, textRot, text):
+    x, y = imageDraw.textsize(text, font=font)
+    return (y, x) if textRot else (x, y)
+
+def get_text_sizes(my_strings, font, fontsize, image_mode, textRot):
     '''Get widths & heights, in pixels, of some strings.'''
     if fontsize == 0: return [(0, 0) for i in my_strings]
     image_size = 1, 1
     im = Image.new(image_mode, image_size)
     draw = ImageDraw.Draw(im)
-    return [draw.textsize(i, font=font) for i in my_strings]
+    return [textDimensions(draw, font, textRot, i) for i in my_strings]
 
 def sizeText(size):
     suffixes = "bp", "kb", "Mb", "Gb"
@@ -181,7 +188,7 @@ def seqNameAndSizeText(seqName, seqSize):
     return seqName + ": " + sizeText(seqSize)
 
 def getSeqInfo(sortOpt, seqNames, seqLimits,
-               font, fontsize, image_mode, isShowSize):
+               font, fontsize, image_mode, isShowSize, textRot):
     '''Return miscellaneous information about the sequences.'''
     if sortOpt == 1:
         seqNames.sort(key=natural_sort_key)
@@ -199,8 +206,9 @@ def getSeqInfo(sortOpt, seqNames, seqLimits,
         seqLabels = map(seqNameAndSizeText, seqNames, seqSizes)
     else:
         seqLabels = seqNames
-    labelSizes = get_text_sizes(seqLabels, font, fontsize, image_mode)
-    margin = max(zip(*labelSizes)[1])  # maximum text height
+    labelSizes = get_text_sizes(seqLabels, font, fontsize, image_mode, textRot)
+    margin = max(i[1] for i in labelSizes)
+    # xxx the margin may be too big, because some labels may get omitted
     return seqNames, seqSizes, seqLabels, labelSizes, margin
 
 def div_ceil(x, y):
@@ -410,11 +418,11 @@ def drawAnnotations(im, boxes):
 
 def make_label(text, text_size, range_start, range_size):
     '''Return an axis label with endpoint & sort-order information.'''
-    text_width  = text_size[0]
+    text_width, text_height = text_size
     label_start = range_start + (range_size - text_width) // 2
     label_end   = label_start + text_width
     sort_key    = text_width - range_size
-    return sort_key, label_start, label_end, text
+    return sort_key, label_start, label_end, text, text_height
 
 def get_nonoverlapping_labels(labels, label_space):
     '''Get a subset of non-overlapping axis labels, greedily.'''
@@ -425,21 +433,23 @@ def get_nonoverlapping_labels(labels, label_space):
             nonoverlapping_labels.append(i)
     return nonoverlapping_labels
 
-def get_axis_image(seqNames, name_sizes, seq_starts, seq_pix,
-                   font, image_mode, opts):
+def get_axis_image(seqNames, name_sizes, seq_starts, seq_pix, textRot,
+                   textAln, font, image_mode, opts):
     '''Make an image of axis labels.'''
     min_pos = seq_starts[0]
     max_pos = seq_starts[-1] + seq_pix[-1]
-    height = max(zip(*name_sizes)[1])
+    margin = max(i[1] for i in name_sizes)
     labels = map(make_label, seqNames, name_sizes, seq_starts, seq_pix)
     labels = [i for i in labels if i[1] >= min_pos and i[2] <= max_pos]
     labels.sort()
-    labels = get_nonoverlapping_labels(labels, opts.label_space)
-    image_size = max_pos, height
+    minPixTweenLabels = 0 if textRot else opts.label_space
+    labels = get_nonoverlapping_labels(labels, minPixTweenLabels)
+    image_size = (margin, max_pos) if textRot else (max_pos, margin)
     im = Image.new(image_mode, image_size, opts.border_color)
     draw = ImageDraw.Draw(im)
     for i in labels:
-        position = i[1], 0
+        base = margin - i[4] if textAln else 0
+        position = (base, i[1]) if textRot else (i[1], base)
         draw.text(position, i[3], font=font, fill=opts.text_color)
     return im
 
@@ -463,26 +473,28 @@ def lastDotplot(opts, args):
     warn("done")
     if not alignments: raise Exception("there are no alignments")
 
+    textRot1 = "vertical".startswith(opts.rot1)
     i1 = getSeqInfo(opts.sort1, seqNames1, seqLimits1,
-                    font, opts.fontsize, image_mode, opts.lengths1)
-    seqNames1, seqSizes1, seqLabels1, labelSizes1, margin1 = i1
+                    font, opts.fontsize, image_mode, opts.lengths1, textRot1)
+    seqNames1, seqSizes1, seqLabels1, labelSizes1, tMargin = i1
 
+    textRot2 = "horizontal".startswith(opts.rot2)
     i2 = getSeqInfo(opts.sort2, seqNames2, seqLimits2,
-                    font, opts.fontsize, image_mode, opts.lengths2)
-    seqNames2, seqSizes2, seqLabels2, labelSizes2, margin2 = i2
+                    font, opts.fontsize, image_mode, opts.lengths2, textRot2)
+    seqNames2, seqSizes2, seqLabels2, labelSizes2, lMargin = i2
 
     warn("choosing bp per pixel...")
-    pix_limit1 = opts.width  - margin1
-    pix_limit2 = opts.height - margin2
+    pix_limit1 = opts.width  - lMargin
+    pix_limit2 = opts.height - tMargin
     bpPerPix1 = get_bp_per_pix(seqSizes1, opts.border_pixels, pix_limit1)
     bpPerPix2 = get_bp_per_pix(seqSizes2, opts.border_pixels, pix_limit2)
     bpPerPix = max(bpPerPix1, bpPerPix2)
     warn("bp per pixel = " + str(bpPerPix))
 
     seq_pix1, seq_starts1, width  = get_pix_info(seqSizes1, bpPerPix,
-                                                 opts.border_pixels, margin1)
+                                                 opts.border_pixels, lMargin)
     seq_pix2, seq_starts2, height = get_pix_info(seqSizes2, bpPerPix,
-                                                 opts.border_pixels, margin2)
+                                                 opts.border_pixels, tMargin)
     warn("width:  " + str(width))
     warn("height: " + str(height))
 
@@ -506,13 +518,13 @@ def lastDotplot(opts, args):
                             readRmsk(opts.rmsk1, seqLimits1),
                             readGenePred(opts, opts.genePred1, seqLimits1),
                             readGaps(opts, opts.gap1, seqLimits1))
-    b1 = bedBoxes(beds1, seqLimits1, origins1, margin2, height, True, bpPerPix)
+    b1 = bedBoxes(beds1, seqLimits1, origins1, tMargin, height, True, bpPerPix)
 
     beds2 = itertools.chain(readBed(opts.bed2, seqLimits2),
                             readRmsk(opts.rmsk2, seqLimits2),
                             readGenePred(opts, opts.genePred2, seqLimits2),
                             readGaps(opts, opts.gap2, seqLimits2))
-    b2 = bedBoxes(beds2, seqLimits2, origins2, margin1, width, False, bpPerPix)
+    b2 = bedBoxes(beds2, seqLimits2, origins2, lMargin, width, False, bpPerPix)
 
     boxes = sorted(itertools.chain(b1, b2))
     drawAnnotations(im, boxes)
@@ -527,19 +539,22 @@ def lastDotplot(opts, args):
 
     if opts.fontsize != 0:
         axis1 = get_axis_image(seqLabels1, labelSizes1, seq_starts1, seq_pix1,
-                               font, image_mode, opts)
+                               textRot1, False, font, image_mode, opts)
+        if textRot1:
+            axis1 = axis1.transpose(Image.ROTATE_90)
         axis2 = get_axis_image(seqLabels2, labelSizes2, seq_starts2, seq_pix2,
-                               font, image_mode, opts)
-        axis2 = axis2.transpose(Image.ROTATE_270)  # !!! bug hotspot
+                               textRot2, textRot2, font, image_mode, opts)
+        if not textRot2:
+            axis2 = axis2.transpose(Image.ROTATE_270)
         im.paste(axis1, (0, 0))
         im.paste(axis2, (0, 0))
 
     for i in seq_starts1[1:]:
-        box = i - opts.border_pixels, margin2, i, height
+        box = i - opts.border_pixels, tMargin, i, height
         im.paste(opts.border_color, box)
 
     for i in seq_starts2[1:]:
-        box = margin1, i - opts.border_pixels, width, i
+        box = lMargin, i - opts.border_pixels, width, i
         im.paste(opts.border_color, box)
 
     im.save(args[1])
@@ -589,6 +604,10 @@ if __name__ == "__main__":
                   help="TrueType or OpenType font file")
     og.add_option("-s", "--fontsize", metavar="SIZE", type="int", default=11,
                   help="TrueType or OpenType font size (default: %default)")
+    og.add_option("--rot1", metavar="ROT", default="h",
+                  help="text rotation for the 1st genome (default=%default)")
+    og.add_option("--rot2", metavar="ROT", default="v",
+                  help="text rotation for the 2nd genome (default=%default)")
     og.add_option("--lengths1", action="store_true",
                   help="show sequence lengths for the 1st (horizontal) genome")
     og.add_option("--lengths2", action="store_true",
diff --git a/scripts/last-postmask b/scripts/last-postmask
index 669be10..b17f376 100755
--- a/scripts/last-postmask
+++ b/scripts/last-postmask
@@ -12,8 +12,16 @@
 # Limitations: doesn't (yet) handle sequence quality data,
 # frameshifts, or generalized affine gaps.
 
+import gzip
 import itertools, optparse, os, signal, sys
 
+def myOpen(fileName):
+    if fileName == "-":
+        return sys.stdin
+    if fileName.endswith(".gz"):
+        return gzip.open(fileName)
+    return open(fileName)
+
 def getScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost):
     defaultScore = min(map(min, matrix))
     scoreMatrix = [[defaultScore for i in range(128)] for j in range(128)]
@@ -91,15 +99,12 @@ def doOneFile(lines):
     if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
 
 def lastPostmask(args):
-    if args:
-        for i in args:
-            if i == "-":
-                doOneFile(sys.stdin)
-            else:
-                with open(i) as f:
-                    doOneFile(f)
-    else:
-        doOneFile(sys.stdin)
+    if not args:
+        args = ["-"]
+    for i in args:
+        f = myOpen(i)
+        doOneFile(f)
+        f.close()
 
 if __name__ == "__main__":
     signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # avoid silly error message
diff --git a/scripts/last-train b/scripts/last-train
index 7fe5947..4b360c7 100755
--- a/scripts/last-train
+++ b/scripts/last-train
@@ -1,11 +1,14 @@
 #! /usr/bin/env python
 # Copyright 2015 Martin C. Frith
 
+import gzip
 import math, optparse, os, random, signal, subprocess, sys, tempfile
 
 def myOpen(fileName):  # faster than fileinput
     if fileName == "-":
         return sys.stdin
+    if fileName.endswith(".gz"):
+        return gzip.open(fileName)
     return open(fileName)
 
 def randomSample(things, sampleSize):
@@ -27,29 +30,30 @@ def seqInput(fileNames):
     if not fileNames:
         fileNames = ["-"]
     for name in fileNames:
-        with myOpen(name) as f:
-            seqType = 0
-            for line in f:
-                if seqType == 0:
-                    if line[0] == ">":
-                        seqType = 1
-                        seq = []
-                    elif line[0] == "@":
-                        seqType = 2
-                        lineType = 1
-                elif seqType == 1:  # fasta
-                    if line[0] == ">":
-                        yield "".join(seq), ""
-                        seq = []
-                    else:
-                        seq.append(line.rstrip())
-                elif seqType == 2:  # fastq
-                    if lineType == 1:
-                        seq = line.rstrip()
-                    elif lineType == 3:
-                        yield seq, line.rstrip()
-                    lineType = (lineType + 1) % 4
-            if seqType == 1: yield "".join(seq), ""
+        f = myOpen(name)
+        seqType = 0
+        for line in f:
+            if seqType == 0:
+                if line[0] == ">":
+                    seqType = 1
+                    seq = []
+                elif line[0] == "@":
+                    seqType = 2
+                    lineType = 1
+            elif seqType == 1:  # fasta
+                if line[0] == ">":
+                    yield "".join(seq), ""
+                    seq = []
+                else:
+                    seq.append(line.rstrip())
+            elif seqType == 2:  # fastq
+                if lineType == 1:
+                    seq = line.rstrip()
+                elif lineType == 3:
+                    yield seq, line.rstrip()
+                lineType = (lineType + 1) % 4
+        if seqType == 1: yield "".join(seq), ""
+        f.close()
 
 def isGoodChunk(chunk):
     for i in chunk:
@@ -112,6 +116,13 @@ def getSeqSample(opts, queryFiles, outfile):
                 x = y
     writeSegment(outfile, x)
 
+def versionFromHeader(lines):
+    for line in lines:
+        words = line.split()
+        if "version" in words:
+            return words[-1]
+    raise Exception("couldn't read the version")
+
 def scaleFromHeader(lines):
     for line in lines:
         for i in line.split():
@@ -329,8 +340,16 @@ def tryToMakeChildProgramsFindable():
     # put srcDir first, to avoid getting older versions of LAST:
     os.environ["PATH"] = srcDir + os.pathsep + os.environ["PATH"]
 
-def fixedLastalArgs(opts):
-    x = ["lastal", "-j7"]
+def readLastalProgName(lastdbIndexName):
+    bitsPerInt = "32"
+    with open(lastdbIndexName + ".prj") as f:
+        for line in f:
+            if line.startswith("integersize="):
+                bitsPerInt = line.split("=")[1].strip()
+    return "lastal8" if bitsPerInt == "64" else "lastal"
+
+def fixedLastalArgs(opts, lastalProgName):
+    x = [lastalProgName, "-j7"]
     if opts.D: x.append("-D" + opts.D)
     if opts.E: x.append("-E" + opts.E)
     if opts.s: x.append("-s" + opts.s)
@@ -338,14 +357,16 @@ def fixedLastalArgs(opts):
     if opts.C: x.append("-C" + opts.C)
     if opts.T: x.append("-T" + opts.T)
     if opts.m: x.append("-m" + opts.m)
+    if opts.k: x.append("-k" + opts.k)
     if opts.P: x.append("-P" + opts.P)
     if opts.Q: x.append("-Q" + opts.Q)
     return x
 
 def doTraining(opts, args):
     tryToMakeChildProgramsFindable()
+    lastalProgName = readLastalProgName(args[0])
     scaleIncrease = 20  # while training, up-scale the scores by this amount
-    x = fixedLastalArgs(opts)
+    x = fixedLastalArgs(opts, lastalProgName)
     if opts.r: x.append("-r" + opts.r)
     if opts.q: x.append("-q" + opts.q)
     if opts.p: x.append("-p" + opts.p)
@@ -357,6 +378,7 @@ def doTraining(opts, args):
     y = ["last-split", "-n"]
     p = subprocess.Popen(x, stdout=subprocess.PIPE)
     q = subprocess.Popen(y, stdin=p.stdout, stdout=subprocess.PIPE)
+    lastalVersion = versionFromHeader(q.stdout)
     externalScale = scaleFromHeader(q.stdout)
     internalScale = externalScale * scaleIncrease
     if opts.Q:
@@ -364,6 +386,7 @@ def doTraining(opts, args):
         internalMatrix = scaledMatrix(externalMatrix, scaleIncrease)
     oldParameters = []
 
+    print "# lastal version:", lastalVersion
     print "# maximum percent identity:", opts.pid
     print "# scale of score parameters:", externalScale
     print "# scale used while training:", internalScale
@@ -390,7 +413,7 @@ def doTraining(opts, args):
             if parameters in oldParameters: break
             oldParameters.append(parameters)
             internalMatrix = matrixWithLetters(matScores)
-        x = fixedLastalArgs(opts)
+        x = fixedLastalArgs(opts, lastalProgName)
         x.append("-p-")
         x += args
         p = subprocess.Popen(x, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
@@ -472,6 +495,8 @@ if __name__ == "__main__":
                   help="type of alignment: 0=local, 1=overlap (default: 0)")
     og.add_option("-m", metavar="COUNT", help=
                   "maximum initial matches per query position (default: 10)")
+    og.add_option("-k", metavar="STEP", help="use initial matches starting at "
+                  "every STEP-th position in each query (default: 1)")
     og.add_option("-P", metavar="THREADS",
                   help="number of parallel threads")
     og.add_option("-Q", metavar="NUMBER",
diff --git a/scripts/maf-convert b/scripts/maf-convert
index 850f3b1..97e78b3 100755
--- a/scripts/maf-convert
+++ b/scripts/maf-convert
@@ -7,8 +7,16 @@
 # Genome FAQ, not e.g. "MIRA assembly format".
 
 from itertools import *
+import gzip
 import math, optparse, os, signal, sys
 
+def myOpen(fileName):
+    if fileName == "-":
+        return sys.stdin
+    if fileName.endswith(".gz"):
+        return gzip.open(fileName)
+    return open(fileName)
+
 def maxlen(s):
     return max(map(len, s))
 
@@ -161,7 +169,7 @@ def writeAxt(maf):
 
     head, body = ranges[0], ranges[1:]
 
-    outWords = [str(axtCounter.next())]
+    outWords = [str(next(axtCounter))]
     outWords.extend(head[:3])
     for i in body:
         outWords.extend(i)
@@ -210,6 +218,42 @@ def mafConvertToTab(opts, lines):
     for maf in mafInput(opts, lines):
         writeTab(maf)
 
+##### Routines for converting to chain format: #####
+
+def writeChain(maf):
+    aLine, sLines, qLines, pLines = maf
+
+    score = "0"
+    for i in aLine.split():
+        if i.startswith("score="):
+            score = i[6:]
+
+    outWords = ["chain", score]
+
+    for seqName, seqLen, strand, letterSize, beg, end, row in sLines:
+        x = seqName, str(seqLen), strand, str(beg), str(end)
+        outWords.extend(x)
+
+    outWords.append(str(next(axtCounter) + 1))
+
+    print " ".join(outWords)
+
+    letterSizes = [i[3] for i in sLines]
+    rows = [i[6] for i in sLines]
+    alignmentColumns = izip(*rows)
+    size = "0"
+    for i in matchAndInsertSizes(alignmentColumns, letterSizes):
+        if ":" in i:
+            print size + "\t" + i.replace(":", "\t")
+            size = "0"
+        else:
+            size = i
+    print size
+
+def mafConvertToChain(opts, lines):
+    for maf in mafInput(opts, lines):
+        writeChain(maf)
+
 ##### Routines for converting to PSL format: #####
 
 def pslBlocks(opts, mafs, outCounts):
@@ -335,16 +379,17 @@ def readGroupId(readGroupItems):
 def readSequenceLengths(fileNames):
     """Read name & length of topmost sequence in each maf block."""
     for i in fileNames:
-        with open(i) as f:
-            fields = None
-            for line in f:
-                if fields:
-                    if line.isspace():
-                        fields = None
-                else:
-                    if line[0] == "s":
-                        fields = line.split()
-                        yield fields[1], fields[5]
+        f = myOpen(i)
+        fields = None
+        for line in f:
+            if fields:
+                if line.isspace():
+                    fields = None
+            else:
+                if line[0] == "s":
+                    fields = line.split()
+                    yield fields[1], fields[5]
+        f.close()
 
 def naturalSortKey(s):
     """Return a key that sorts strings in "natural" order."""
@@ -372,11 +417,9 @@ def writeSamHeader(opts, fileNames):
             print "@SQ\tSN:%s\tLN:%s" % (k, sequenceLengths[k])
 
     if opts.dictfile:
-        if opts.dictfile == "-":
-            copyDictFile(sys.stdin)
-        else:
-            with open(opts.dictfile) as f:
-                copyDictFile(f)
+        f = myOpen(opts.dictfile)
+        copyDictFile(f)
+        f.close()
 
     if opts.readgroup:
         print "@RG\t" + "\t".join(opts.readgroup.split())
@@ -760,6 +803,8 @@ def mafConvertOneFile(opts, formatName, lines):
         mafConvertToBlast(opts, lines)
     elif isFormat(formatName, "blasttab"):
         mafConvertToBlastTab(opts, lines)
+    elif isFormat(formatName, "chain"):
+        mafConvertToChain(opts, lines)
     elif isFormat(formatName, "html"):
         mafConvertToHtml(opts, lines)
     elif isFormat(formatName, "psl"):
@@ -787,15 +832,12 @@ def mafConvert(opts, args):
         if isFormat(formatName, "tabular"):
             opts.isKeepComments = True
 
-    if fileNames:
-        for i in fileNames:
-            if i == "-":
-                mafConvertOneFile(opts, formatName, sys.stdin)
-            else:
-                with open(i) as f:
-                    mafConvertOneFile(opts, formatName, f)
-    else:
-        mafConvertOneFile(opts, formatName, sys.stdin)
+    if not fileNames:
+        fileNames = ["-"]
+    for i in fileNames:
+        f = myOpen(i)
+        mafConvertOneFile(opts, formatName, f)
+        f.close()
 
     if not opts.noheader:
         if isFormat(formatName, "html"):
@@ -809,6 +851,7 @@ if __name__ == "__main__":
   %prog axt mafFile(s)
   %prog blast mafFile(s)
   %prog blasttab mafFile(s)
+  %prog chain mafFile(s)
   %prog html mafFile(s)
   %prog psl mafFile(s)
   %prog sam mafFile(s)
diff --git a/src/Alphabet.cc b/src/Alphabet.cc
index 961b1f6..d0d2d2b 100644
--- a/src/Alphabet.cc
+++ b/src/Alphabet.cc
@@ -51,14 +51,6 @@ char* Alphabet::rtCopy( const uchar* beg, const uchar* end, char* dest ) const{
   return dest;
 }
 
-void Alphabet::rc( uchar* beg, uchar* end ) const{
-  std::reverse( beg, end );
-
-  for( /* noop */; beg < end; ++beg ){
-    *beg = complement[ *beg ];
-  }
-}
-
 void Alphabet::init(){
   for( std::string::iterator i = letters.begin(); i < letters.end(); ++i )
     *i = std::toupper( *i );
diff --git a/src/Alphabet.hh b/src/Alphabet.hh
index 0ec118c..540c884 100644
--- a/src/Alphabet.hh
+++ b/src/Alphabet.hh
@@ -42,9 +42,6 @@ struct Alphabet{
   // return the position after the last written position in dest
   char* rtCopy( const uchar* beg, const uchar* end, char* dest ) const;
 
-  // reverse and complement a sequence of numbers, in place
-  void rc( uchar* beg, uchar* end ) const;
-
   std::string letters;    // the "proper" letters, e.g. ACGT for DNA
   unsigned size;          // same as letters.size(): excludes delimiters
   uchar encode[capacity];  // translate ASCII letters to codes (small integers)
diff --git a/src/LastEvaluer.cc b/src/LastEvaluer.cc
index b1951ca..72b1dae 100644
--- a/src/LastEvaluer.cc
+++ b/src/LastEvaluer.cc
@@ -367,7 +367,7 @@ void LastEvaluer::init(const char *matrixName,
 			     0, maxMegabytes, randomSeed, t);
 	  break;
 	} catch (const Sls::error& e) {
-	  if (i == 10) throw;
+	  if (i == 20) throw;
 	}
       }
     } else {
diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc
index 8d389ff..995f2e1 100644
--- a/src/LastalArguments.cc
+++ b/src/LastalArguments.cc
@@ -41,6 +41,18 @@ static char parseOutputFormat( const char* text ){
   return 0;
 }
 
+static int parseScoreOrPercent(const std::string &s) {
+  int x;
+  std::istringstream iss(s);
+  if(!(iss >> x) || x < 0) ERR("bad value: " + s);
+  char c;
+  if (iss >> c) {
+    if (c != '%' || iss >> c) ERR("bad value: " + s);
+    x = -x;  // negative value indicates percentage (kludge)
+  }
+  return x;
+}
+
 namespace cbrc{
 
 LastalArguments::LastalArguments() :
@@ -66,7 +78,7 @@ LastalArguments::LastalArguments() :
   gapPairCost(-1),  // this means: OFF
   frameshiftCost(-1),  // this means: ordinary, non-translated alignment
   matrixFile(""),
-  maxDropGapped(-1),  // depends on minScoreGapped & maxDropGapless
+  maxDropGapped(-100),  // depends on minScoreGapped & maxDropGapless
   maxDropGapless(-1),  // depends on the score matrix
   maxDropFinal(-1),  // depends on maxDropGapped
   inputFormat(sequenceFormat::fasta),
@@ -116,8 +128,8 @@ Score options (default settings):\n\
 -B: insertion extension cost (b)\n\
 -c: unaligned residue pair cost (off)\n\
 -F: frameshift cost (off)\n\
--x: maximum score drop for gapped alignments (max[y, e-1])\n\
--y: maximum score drop for gapless alignments (t*10)\n\
+-x: maximum score drop for gapped alignments (e-1)\n\
+-y: maximum score drop for gapless alignments (min[t*10, x])\n\
 -z: maximum score drop for final gapped alignments (x)\n\
 -d: minimum score for gapless alignments (min[e, t*ln(1000*refSize/n)])\n\
 -e: minimum score for gapped alignments\n\
@@ -222,8 +234,7 @@ LAST home page: http://last.cbrc.jp/\n\
       if( frameshiftCost < 0 ) badopt( c, optarg );
       break;
     case 'x':
-      unstringify( maxDropGapped, optarg );
-      if( maxDropGapped < 0 ) badopt( c, optarg );
+      maxDropGapped = parseScoreOrPercent(optarg);
       break;
     case 'y':
       unstringify( maxDropGapless, optarg );
@@ -525,13 +536,17 @@ To proceed without E-values, set a score threshold with option -e.");
 
   if( temperature < 0 ) temperature = 1 / lambda;
 
+  if( maxDropGapped < 0 ){
+    int percent = -maxDropGapped;
+    maxDropGapped = std::max( minScoreGapped - 1, 0 );
+    maxDropGapped = std::max( maxDropGapped, maxDropGapless );
+    if (percent != 100) maxDropGapped = maxDropGapped * percent / 100;
+  }
+
   if( maxDropGapless < 0 ){  // should it depend on temperature or lambda?
     if( temperature < 0 ) maxDropGapless = 0;  // shouldn't happen
     else                  maxDropGapless = int( 10.0 * temperature + 0.5 );
-  }
-
-  if( maxDropGapped < 0 ){
-    maxDropGapped = std::max( minScoreGapped - 1, maxDropGapless );
+    maxDropGapless = std::min( maxDropGapless, maxDropGapped );
   }
 
   if( maxDropFinal < 0 ) maxDropFinal = maxDropGapped;
diff --git a/src/MultiSequence.cc b/src/MultiSequence.cc
index f9b0077..1f1d05a 100644
--- a/src/MultiSequence.cc
+++ b/src/MultiSequence.cc
@@ -58,13 +58,6 @@ void MultiSequence::toFiles( const std::string& baseName ) const{
                       baseName + ".qua" );
 }
 
-void MultiSequence::addName( std::string& name ){
-  names.v.insert( names.v.end(), name.begin(), name.end() );
-  nameEnds.v.push_back( names.v.size() );
-  if( nameEnds.v.back() < names.v.size() )
-    throw std::runtime_error("the sequence names are too long");
-}
-
 std::istream& MultiSequence::readFastaName( std::istream& stream ){
   std::string line, word;
   getline( stream, line );
diff --git a/src/MultiSequence.hh b/src/MultiSequence.hh
index df388dc..3876058 100644
--- a/src/MultiSequence.hh
+++ b/src/MultiSequence.hh
@@ -17,10 +17,11 @@
 
 namespace cbrc{
 
+typedef unsigned char uchar;
+
 class MultiSequence{
  public:
   typedef LAST_INT_TYPE indexT;
-  typedef unsigned char uchar;
 
   // initialize with leftmost delimiter pad, ready for appending sequences
   void initForAppending( indexT padSizeIn );
@@ -130,13 +131,67 @@ class MultiSequence{
   // read the letters above PSSM columns, so we know which column is which
   std::istream& readPssmHeader( std::istream& stream );
 
-  // add a new sequence name
-  void addName( std::string& name );
+  void finishName() {  // finish adding a sequence name: store its end coord
+    nameEnds.v.push_back(names.v.size());
+    if (nameEnds.v.back() < names.v.size()) {
+      throw std::runtime_error("the sequence names are too long");
+    }
+  }
+
+  void addName(const std::string &name) {  // add a new sequence name
+    names.v.insert(names.v.end(), name.begin(), name.end());
+    finishName();
+  }
+
+  void finishQual() {  // add delimiter to the end of the quality scores
+    uchar padQualityScore = 64;  // should never be used, but a valid value
+    size_t s = padSize * qualityScoresPerLetter;
+    qualityScores.v.insert(qualityScores.v.end(), s, padQualityScore);
+  }
+
+  void finishPssm() {  // add delimiter to the end of the PSSM
+    pssm.insert(pssm.end(), padSize * scoreMatrixRowSize, -INF);
+  }
 
   // can we finish the last sequence and stay within the memory limit?
   bool isFinishable( indexT maxSeqLen ) const;
 };
 
+inline void reverseComplementPssm(ScoreMatrixRow *beg, ScoreMatrixRow *end,
+				  const uchar *complement) {
+  while (beg < end) {
+    --end;
+    for (unsigned i = 0; i < scoreMatrixRowSize; ++i) {
+      unsigned j = complement[i];
+      if (beg < end || i < j) std::swap((*beg)[i], (*end)[j]);
+    }
+    ++beg;
+  }
+}
+
+inline void reverseComplementOneSequence(MultiSequence &m,
+					 const uchar *complement,
+					 size_t seqNum) {
+  size_t b = m.seqBeg(seqNum);
+  size_t e = m.seqEnd(seqNum);
+
+  uchar *s = m.seqWriter();
+  std::reverse(s + b, s + e);
+  for (size_t i = b; i < e; ++i) {
+    s[i] = complement[s[i]];
+  }
+
+  size_t qpl = m.qualsPerLetter();
+  if (qpl) {
+    std::reverse(m.qualityWriter() + b * qpl, m.qualityWriter() + e * qpl);
+  }
+
+  ScoreMatrixRow *p = m.pssmWriter();
+  if (p) {
+    reverseComplementPssm(p + b, p + e, complement);
+  }
+}
+
 // Divide the sequences into a given number of roughly-equally-sized
 // chunks, and return the first sequence in the Nth chunk.
 inline size_t firstSequenceInChunk(const MultiSequence &m,
@@ -152,5 +207,6 @@ inline size_t firstSequenceInChunk(const MultiSequence &m,
   return (begDistance < endDistance) ? seqNum : seqNum + 1;
 }
 
-}  // end namespace cbrc
-#endif  // MULTISEQUENCE_HH
+}
+
+#endif
diff --git a/src/MultiSequenceQual.cc b/src/MultiSequenceQual.cc
index 8e33f84..415fa14 100644
--- a/src/MultiSequenceQual.cc
+++ b/src/MultiSequenceQual.cc
@@ -15,12 +15,9 @@ using namespace cbrc;
 
 std::istream&
 MultiSequence::appendFromFastq( std::istream& stream, indexT maxSeqLen ){
-  const uchar padQualityScore = 64;  // should never be used, but a valid value
-
   // initForAppending:
   qualityScoresPerLetter = 1;
-  if( qualityScores.v.empty() )
-    qualityScores.v.insert( qualityScores.v.end(), padSize, padQualityScore );
+  if( qualityScores.v.empty() ) finishQual();
 
   // reinitForAppending:
   if( qualityScores.v.size() > seq.v.size() )
@@ -51,7 +48,7 @@ MultiSequence::appendFromFastq( std::istream& stream, indexT maxSeqLen ){
 
   if( isFinishable(maxSeqLen) ){
     finish();
-    qualityScores.v.insert( qualityScores.v.end(), padSize, padQualityScore );
+    finishQual();
   }
 
   return stream;
@@ -60,15 +57,11 @@ MultiSequence::appendFromFastq( std::istream& stream, indexT maxSeqLen ){
 std::istream&
 MultiSequence::appendFromPrb( std::istream& stream, indexT maxSeqLen,
 			      unsigned alphSize, const uchar decode[] ){
-  const uchar padQualityScore = 64;  // should never be used, but a valid value
-  size_t qualPadSize = padSize * alphSize;
   size_t qualSize = seq.v.size() * alphSize;
 
   // initForAppending:
   qualityScoresPerLetter = alphSize;
-  if( qualityScores.v.empty() )
-    qualityScores.v.insert( qualityScores.v.end(), qualPadSize,
-                            padQualityScore );
+  if( qualityScores.v.empty() ) finishQual();
 
   // reinitForAppending:
   if( qualityScores.v.size() > qualSize )
@@ -104,8 +97,7 @@ MultiSequence::appendFromPrb( std::istream& stream, indexT maxSeqLen,
 
   if( isFinishable(maxSeqLen) ){
     finish();
-    qualityScores.v.insert( qualityScores.v.end(), qualPadSize,
-                            padQualityScore );
+    finishQual();
   }
 
   return stream;
@@ -148,12 +140,10 @@ std::istream&
 MultiSequence::appendFromPssm( std::istream& stream, indexT maxSeqLen,
                                const uchar* lettersToNumbers,
                                bool isMaskLowercase ){
-  size_t pssmPadSize = padSize * scoreMatrixRowSize;
   size_t pssmSize = seq.v.size() * scoreMatrixRowSize;
 
   // initForAppending:
-  if( pssm.empty() )
-    pssm.insert( pssm.end(), pssmPadSize, -INF );
+  if( pssm.empty() ) finishPssm();
 
   // reinitForAppending:
   if( pssm.size() > pssmSize )
@@ -200,7 +190,7 @@ MultiSequence::appendFromPssm( std::istream& stream, indexT maxSeqLen,
 
   if( isFinishable(maxSeqLen) ){
     finish();
-    pssm.insert( pssm.end(), pssmPadSize, -INF );
+    finishPssm();
   }
 
   if( !stream.bad() ) stream.clear();
diff --git a/src/io.cc b/src/io.cc
index 0c8d3d3..f1abcce 100644
--- a/src/io.cc
+++ b/src/io.cc
@@ -6,22 +6,15 @@
 
 namespace cbrc{
 
-std::istream& openIn( const std::string& fileName, std::ifstream& ifs ){
+std::istream& openIn( const std::string& fileName, mcf::izstream& ifs ){
   if( fileName == "-" ) return std::cin;
   ifs.open( fileName.c_str() );
   if( !ifs ) throw std::runtime_error("can't open file: " + fileName);
   return ifs;
 }
 
-std::ostream& openOut( const std::string& fileName, std::ofstream& ofs ){
-  if( fileName == "-" ) return std::cout;
-  ofs.open( fileName.c_str() );
-  if( !ofs ) throw std::runtime_error("can't open file: " + fileName);
-  return ofs;
-}
-
 std::string slurp( const std::string& fileName ){
-  std::ifstream inFileStream;
+  mcf::izstream inFileStream;
   std::istream& in = openIn( fileName, inFileStream );
   std::istreambuf_iterator<char> beg(in), end; 
   return std::string( beg, end );
diff --git a/src/io.hh b/src/io.hh
index 9162898..c08cd8c 100644
--- a/src/io.hh
+++ b/src/io.hh
@@ -6,6 +6,8 @@
 #ifndef IO_H
 #define IO_H
 
+#include "mcf_zstream.hh"
+
 #include <vector>
 #include <string>
 #include <stdexcept>
@@ -15,10 +17,7 @@
 namespace cbrc{
 
 // open an input file, but if the name is "-", just return cin
-std::istream& openIn( const std::string& fileName, std::ifstream& ifs );
-
-// open an output file, but if the name is "-", just return cout
-std::ostream& openOut( const std::string& fileName, std::ofstream& ofs );
+std::istream& openIn( const std::string& fileName, mcf::izstream& ifs );
 
 // read a file into a string, but if the name is "-", read cin
 std::string slurp( const std::string& fileName );
diff --git a/src/last-pair-probs.cc b/src/last-pair-probs.cc
index 5d4436d..21adc33 100644
--- a/src/last-pair-probs.cc
+++ b/src/last-pair-probs.cc
@@ -616,7 +616,7 @@ void lastPairProbs(LastPairProbsOptions& opts) {
   size_t n = inputs.size();
 
   if (!opts.isFraglen || !opts.isSdev) {
-    std::ifstream inFile1, inFile2;
+    mcf::izstream inFile1, inFile2;
     std::istream& in1 = (n > 0) ? cbrc::openIn(inputs[0], inFile1) : std::cin;
     std::istream& in2 = (n > 1) ? cbrc::openIn(inputs[1], inFile2) : in1;
     if (n < 2) skipOneBatchMarker(in1);
@@ -626,7 +626,7 @@ void lastPairProbs(LastPairProbsOptions& opts) {
   }
 
   if (!opts.estdist) {
-    std::ifstream inFile1, inFile2;
+    mcf::izstream inFile1, inFile2;
     std::istream& in1 = (n > 0) ? cbrc::openIn(inputs[0], inFile1) : std::cin;
     std::istream& in2 = (n > 1) ? cbrc::openIn(inputs[1], inFile2) : in1;
     AlignmentParameters params1 = readHeaderOrDie(in1);
diff --git a/src/lastal.cc b/src/lastal.cc
index 0e90c61..47bff6e 100644
--- a/src/lastal.cc
+++ b/src/lastal.cc
@@ -950,42 +950,16 @@ void translateAndScan( LastAligner& aligner, size_t queryNum, char strand ){
   cullFinalAlignments( aligner.textAlns, oldNumOfAlns );
 }
 
-static void reverseComplementPssm( size_t queryNum ){
-  ScoreMatrixRow* beg = query.pssmWriter() + query.seqBeg(queryNum);
-  ScoreMatrixRow* end = query.pssmWriter() + query.seqEnd(queryNum);
-
-  while( beg < end ){
-    --end;
-    for( unsigned i = 0; i < scoreMatrixRowSize; ++i ){
-      unsigned j = queryAlph.complement[i];
-      if( beg < end || i < j ) std::swap( (*beg)[i], (*end)[j] );
-    }
-    ++beg;
-  }
-}
-
-static void reverseComplementQuery( size_t queryNum ){
-  size_t b = query.seqBeg(queryNum);
-  size_t e = query.seqEnd(queryNum);
-  queryAlph.rc( query.seqWriter() + b, query.seqWriter() + e );
-  if( isQuality( args.inputFormat ) ){
-    std::reverse( query.qualityWriter() + b * query.qualsPerLetter(),
-		  query.qualityWriter() + e * query.qualsPerLetter() );
-  }else if( args.inputFormat == sequenceFormat::pssm ){
-    reverseComplementPssm(queryNum);
-  }
-}
-
 static void alignOneQuery(LastAligner &aligner,
 			  size_t queryNum, bool isFirstVolume) {
   if (args.strand == 2 && !isFirstVolume)
-    reverseComplementQuery(queryNum);
+    reverseComplementOneSequence(query, queryAlph.complement, queryNum);
 
   if (args.strand != 0)
     translateAndScan(aligner, queryNum, '+');
 
   if (args.strand == 2 || (args.strand == 0 && isFirstVolume))
-    reverseComplementQuery(queryNum);
+    reverseComplementOneSequence(query, queryAlph.complement, queryNum);
 
   if (args.strand != 1)
     translateAndScan(aligner, queryNum, '-');
@@ -1251,7 +1225,7 @@ void lastal( int argc, char** argv ){
   char** inputBegin = argv + args.inputStart;
 
   for( char** i = *inputBegin ? inputBegin : defaultInput; *i; ++i ){
-    std::ifstream inFileStream;
+    mcf::izstream inFileStream;
     std::istream& in = openIn( *i, inFileStream );
     LOG( "reading " << *i << "..." );
 
diff --git a/src/lastdb.cc b/src/lastdb.cc
index 085d9d3..daea418 100644
--- a/src/lastdb.cc
+++ b/src/lastdb.cc
@@ -180,12 +180,21 @@ static void preprocessSeqs(MultiSequence &multi,
 // Make one database volume, from one batch of sequences
 void makeVolume( std::vector< CyclicSubsetSeed >& seeds,
 		 MultiSequence& multi, const LastdbArguments& args,
-		 const Alphabet& alph, const std::vector<countT>& letterCounts,
+		 const Alphabet& alph, std::vector<countT>& letterCounts,
 		 const TantanMasker& masker, unsigned numOfThreads,
 		 const std::string& seedText, const std::string& baseName ){
   size_t numOfIndexes = seeds.size();
   size_t numOfSequences = multi.finishedSequences();
   size_t textLength = multi.finishedSize();
+  const uchar* seq = multi.seqReader();
+
+  std::vector<countT> letterCountsInThisVolume(alph.size);
+  alph.count(seq, seq + textLength, &letterCountsInThisVolume[0]);
+  for (unsigned c = 0; c < alph.size; ++c) {
+    letterCounts[c] += letterCountsInThisVolume[c];
+  }
+
+  if (args.isCountsOnly) return;
 
   if( args.tantanSetting ){
     LOG( "masking..." );
@@ -194,9 +203,8 @@ void makeVolume( std::vector< CyclicSubsetSeed >& seeds,
 
   LOG( "writing..." );
   writePrjFile( baseName + ".prj", args, alph, numOfSequences,
-		letterCounts, -1, numOfIndexes, seedText );
+		letterCountsInThisVolume, -1, numOfIndexes, seedText );
   multi.toFiles( baseName );
-  const uchar* seq = multi.seqReader();
 
   for( unsigned x = 0; x < numOfIndexes; ++x ){
     SubsetSuffixArray myIndex;
@@ -244,12 +252,11 @@ static indexT maxLettersPerVolume( const LastdbArguments& args,
   return z;
 }
 
-// Read the next sequence, adding it to the MultiSequence and the SuffixArray
+// Read the next sequence, adding it to the MultiSequence
 std::istream&
-appendFromFasta( MultiSequence& multi, unsigned numOfIndexes,
+appendFromFasta( MultiSequence& multi, indexT maxSeqLen,
 		 const LastdbArguments& args, const Alphabet& alph,
 		 std::istream& in ){
-  indexT maxSeqLen = maxLettersPerVolume( args, numOfIndexes );
   if( multi.finishedSequences() == 0 ) maxSeqLen = indexT(-1);
 
   size_t oldSize = multi.unfinishedSize();
@@ -303,18 +310,18 @@ void lastdb( int argc, char** argv ){
   unsigned volumeNumber = 0;
   countT sequenceCount = 0;
   std::vector<countT> letterCounts( alph.size );
-  std::vector<countT> letterTotals( alph.size );
+  indexT maxSeqLen = maxLettersPerVolume( args, seeds.size() );
 
   char defaultInputName[] = "-";
   char* defaultInput[] = { defaultInputName, 0 };
   char** inputBegin = argv + args.inputStart;
 
   for( char** i = *inputBegin ? inputBegin : defaultInput; *i; ++i ){
-    std::ifstream inFileStream;
+    mcf::izstream inFileStream;
     std::istream& in = openIn( *i, inFileStream );
     LOG( "reading " << *i << "..." );
 
-    while( appendFromFasta( multi, seeds.size(), args, alph, in ) ){
+    while( appendFromFasta( multi, maxSeqLen, args, alph, in ) ){
       if( !args.isProtein && args.userAlphabet.empty() &&
           sequenceCount == 0 && isDubiousDna( alph, multi ) ){
         std::cerr << args.programName << ": that's some funny-lookin DNA\n";
@@ -322,30 +329,18 @@ void lastdb( int argc, char** argv ){
 
       if( multi.isFinished() ){
         ++sequenceCount;
-	const uchar* seq = multi.seqReader();
-	size_t lastSeq = multi.finishedSequences() - 1;
-	size_t beg = multi.seqBeg( lastSeq );
-	size_t end = multi.seqEnd( lastSeq );
-	alph.count( seq + beg, seq + end, &letterCounts[0] );
-	if( args.isCountsOnly ){
-	  // memory-saving, which seems to be important on 32-bit systems:
-	  multi.reinitForAppending();
-	}
       }
       else{
 	std::string baseName = args.lastdbName + stringify(volumeNumber++);
 	makeVolume( seeds, multi, args, alph, letterCounts,
 		    tantanMasker, numOfThreads, seedText, baseName );
-	for( unsigned c = 0; c < alph.size; ++c )
-	  letterTotals[c] += letterCounts[c];
-	letterCounts.assign( alph.size, 0 );
 	multi.reinitForAppending();
       }
     }
   }
 
   if( multi.finishedSequences() > 0 ){
-    if( volumeNumber == 0 ){
+    if( volumeNumber == 0 && !args.isCountsOnly ){
       makeVolume( seeds, multi, args, alph, letterCounts,
 		  tantanMasker, numOfThreads, seedText, args.lastdbName );
       return;
@@ -355,10 +350,8 @@ void lastdb( int argc, char** argv ){
 		tantanMasker, numOfThreads, seedText, baseName );
   }
 
-  for( unsigned c = 0; c < alph.size; ++c ) letterTotals[c] += letterCounts[c];
-
   writePrjFile( args.lastdbName + ".prj", args, alph, sequenceCount,
-		letterTotals, volumeNumber, seeds.size(), seedText );
+		letterCounts, volumeNumber, seeds.size(), seedText );
 }
 
 int main( int argc, char** argv )
diff --git a/src/makefile b/src/makefile
index df3581a..de49694 100644
--- a/src/makefile
+++ b/src/makefile
@@ -1,6 +1,3 @@
-CXX = g++
-CC  = gcc
-
 CXXFLAGS = -O3 -Wall -Wextra -Wcast-qual -Wswitch-enum -Wundef	\
 -Wcast-align -pedantic -g -std=c++11 -pthread -DHAS_CXX_THREADS
 # -Wconversion
@@ -59,19 +56,19 @@ all: $(ALL)
 
 indexAllObj4 = $(indexObj0) $(indexObj4)
 lastdb: $(indexAllObj4)
-	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(indexAllObj4)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(indexAllObj4) -lz
 
 indexAllObj8 = $(indexObj0) $(indexObj8)
 lastdb8: $(indexAllObj8)
-	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(indexAllObj8)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(indexAllObj8) -lz
 
 alignAllObj4 = $(alignObj0) $(alignObj4)
 lastal: $(alignAllObj4)
-	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(alignAllObj4)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(alignAllObj4) -lz
 
 alignAllObj8 = $(alignObj0) $(alignObj8)
 lastal8: $(alignAllObj8)
-	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(alignAllObj8)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(alignAllObj8) -lz
 
 splitAllObj4 = $(splitObj0) $(splitObj4)
 last-split: $(splitAllObj4)
@@ -82,7 +79,7 @@ last-split8: $(splitAllObj8)
 	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(splitAllObj8)
 
 last-pair-probs: $(PPOBJ)
-	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(PPOBJ)
+	$(CXX) $(CXXFLAGS) $(LDFLAGS) -o $@ $(PPOBJ) -lz
 
 last-merge-batches: $(MBOBJ)
 	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(MBOBJ)
@@ -151,7 +148,7 @@ Centroid.o Centroid.o8: Centroid.cc Centroid.hh GappedXdropAligner.hh \
  ScoreMatrixRow.hh GeneralizedAffineGapCosts.hh SegmentPair.hh \
  OneQualityScoreMatrix.hh GappedXdropAlignerInl.hh
 CyclicSubsetSeed.o CyclicSubsetSeed.o8: CyclicSubsetSeed.cc CyclicSubsetSeed.hh \
- CyclicSubsetSeedData.hh io.hh stringify.hh
+ CyclicSubsetSeedData.hh io.hh mcf_zstream.hh stringify.hh
 DiagonalTable.o DiagonalTable.o8: DiagonalTable.cc DiagonalTable.hh
 GappedXdropAligner.o GappedXdropAligner.o8: GappedXdropAligner.cc GappedXdropAligner.hh \
  ScoreMatrixRow.hh GappedXdropAlignerInl.hh
@@ -179,7 +176,7 @@ LastalArguments.o LastalArguments.o8: LastalArguments.cc LastalArguments.hh \
 LastdbArguments.o LastdbArguments.o8: LastdbArguments.cc LastdbArguments.hh \
  SequenceFormat.hh stringify.hh getoptUtil.hh version.hh
 MultiSequence.o MultiSequence.o8: MultiSequence.cc MultiSequence.hh ScoreMatrixRow.hh \
- VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh io.hh
+ VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh io.hh mcf_zstream.hh
 MultiSequenceQual.o MultiSequenceQual.o8: MultiSequenceQual.cc MultiSequence.hh \
  ScoreMatrixRow.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh
 OneQualityScoreMatrix.o OneQualityScoreMatrix.o8: OneQualityScoreMatrix.cc \
@@ -187,14 +184,15 @@ OneQualityScoreMatrix.o OneQualityScoreMatrix.o8: OneQualityScoreMatrix.cc \
  stringify.hh
 QualityPssmMaker.o QualityPssmMaker.o8: QualityPssmMaker.cc QualityPssmMaker.hh \
  ScoreMatrixRow.hh qualityScoreUtil.hh stringify.hh
-ScoreMatrix.o ScoreMatrix.o8: ScoreMatrix.cc ScoreMatrix.hh ScoreMatrixData.hh io.hh
+ScoreMatrix.o ScoreMatrix.o8: ScoreMatrix.cc ScoreMatrix.hh ScoreMatrixData.hh io.hh \
+ mcf_zstream.hh
 SegmentPair.o SegmentPair.o8: SegmentPair.cc SegmentPair.hh
 SegmentPairPot.o SegmentPairPot.o8: SegmentPairPot.cc SegmentPairPot.hh SegmentPair.hh
 SubsetMinimizerFinder.o SubsetMinimizerFinder.o8: SubsetMinimizerFinder.cc \
  SubsetMinimizerFinder.hh CyclicSubsetSeed.hh
 SubsetSuffixArray.o SubsetSuffixArray.o8: SubsetSuffixArray.cc SubsetSuffixArray.hh \
  CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh fileMap.hh stringify.hh \
- SubsetMinimizerFinder.hh io.hh
+ SubsetMinimizerFinder.hh io.hh mcf_zstream.hh
 SubsetSuffixArraySearch.o SubsetSuffixArraySearch.o8: SubsetSuffixArraySearch.cc \
  SubsetSuffixArray.hh CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh \
  fileMap.hh stringify.hh
@@ -211,11 +209,11 @@ gaplessPssmXdrop.o gaplessPssmXdrop.o8: gaplessPssmXdrop.cc gaplessPssmXdrop.hh
 gaplessTwoQualityXdrop.o gaplessTwoQualityXdrop.o8: gaplessTwoQualityXdrop.cc \
  gaplessTwoQualityXdrop.hh TwoQualityScoreMatrix.hh ScoreMatrixRow.hh
 gaplessXdrop.o gaplessXdrop.o8: gaplessXdrop.cc gaplessXdrop.hh ScoreMatrixRow.hh
-io.o io.o8: io.cc io.hh
+io.o io.o8: io.cc io.hh mcf_zstream.hh
 last-pair-probs-main.o last-pair-probs-main.o8: last-pair-probs-main.cc last-pair-probs.hh \
  stringify.hh version.hh
 last-pair-probs.o last-pair-probs.o8: last-pair-probs.cc last-pair-probs.hh io.hh \
- stringify.hh
+ mcf_zstream.hh stringify.hh
 lastal.o lastal.o8: lastal.cc LastalArguments.hh SequenceFormat.hh \
  QualityPssmMaker.hh ScoreMatrixRow.hh OneQualityScoreMatrix.hh \
  TwoQualityScoreMatrix.hh qualityScoreUtil.hh stringify.hh \
@@ -228,12 +226,12 @@ lastal.o lastal.o8: lastal.cc LastalArguments.hh SequenceFormat.hh \
  SegmentPairPot.hh ScoreMatrix.hh Alphabet.hh MultiSequence.hh \
  TantanMasker.hh tantan.hh DiagonalTable.hh GreedyXdropAligner.hh \
  gaplessXdrop.hh gaplessPssmXdrop.hh gaplessTwoQualityXdrop.hh io.hh \
- threadUtil.hh version.hh
+ mcf_zstream.hh threadUtil.hh version.hh
 lastdb.o lastdb.o8: lastdb.cc LastdbArguments.hh SequenceFormat.hh \
  SubsetSuffixArray.hh CyclicSubsetSeed.hh VectorOrMmap.hh Mmap.hh \
  fileMap.hh stringify.hh Alphabet.hh MultiSequence.hh ScoreMatrixRow.hh \
- TantanMasker.hh tantan.hh io.hh qualityScoreUtil.hh threadUtil.hh \
- version.hh
+ TantanMasker.hh tantan.hh io.hh mcf_zstream.hh qualityScoreUtil.hh \
+ threadUtil.hh version.hh
 tantan.o tantan.o8: tantan.cc tantan.hh
 last-merge-batches.o: last-merge-batches.c version.hh
 alp/njn_dynprogprob.o: alp/njn_dynprogprob.cpp alp/njn_dynprogprob.hpp \
diff --git a/src/mcf_zstream.hh b/src/mcf_zstream.hh
new file mode 100644
index 0000000..fd527c5
--- /dev/null
+++ b/src/mcf_zstream.hh
@@ -0,0 +1,82 @@
+// Copyright 2017 Martin C. Frith
+
+// mcf::izstream is similar to std::ifstream.  The difference is, if
+// you give it a gzip-compressed file, it will decompress what it
+// reads.
+
+#ifndef MCF_ZSTREAM
+#define MCF_ZSTREAM
+
+#include <zlib.h>
+
+#include <cstdio>  // BUFSIZ
+#include <istream>
+#include <streambuf>
+
+namespace mcf {
+
+class zbuf : public std::streambuf {
+public:
+  zbuf() : input(0) {}
+
+  ~zbuf() { close(); }
+
+  bool is_open() const { return input; }
+
+  zbuf *open(const char *fileName) {
+    if (is_open()) return 0;
+    input = gzopen(fileName, "rb");
+    if (!is_open()) return 0;
+    return this;
+  }
+
+  zbuf *close() {
+    if (!is_open()) return 0;
+    int e = gzclose(input);
+    input = 0;
+    return (e == Z_OK || e == Z_BUF_ERROR) ? this : 0;
+  }
+
+protected:
+  int underflow() {
+    if (gptr() == egptr()) {
+      int size = gzread(input, buffer, BUFSIZ);
+      if (size < 0) throw std::ios_base::failure("gzread error");
+      setg(buffer, buffer, buffer + size);
+    }
+    return (gptr() == egptr()) ?
+      traits_type::eof() : traits_type::to_int_type(*gptr());
+  }
+
+private:
+  gzFile input;
+  char buffer[BUFSIZ];
+};
+
+class izstream : public std::istream {
+public:
+  izstream() : std::istream(&buf) {}
+
+  izstream(const char *fileName) : std::istream(&buf) {
+    open(fileName);
+  }
+
+  bool is_open() const { return buf.is_open(); }
+
+  void open(const char *fileName) {
+    // do something special if fileName is "-"?
+    if (!buf.open(fileName)) setstate(failbit);
+    else clear();
+  }
+
+  void close() {
+    if (!buf.close()) setstate(failbit);
+  }
+
+private:
+  zbuf buf;
+};
+
+}
+
+#endif
diff --git a/src/version.hh b/src/version.hh
index 7104b42..0fe2c52 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"869"
+"885"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git