[med-svn] [last-align] 01/06: New upstream version 809

Wed Nov 30 16:43:45 UTC 2016

This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository last-align.

commit fe2831f574fda25dad1cda3b6da2a834d8bbd9d5
Author: Andreas Tille <tille at debian.org>
Date:   Wed Nov 30 16:02:08 2016 +0100

    New upstream version 809
---
 ChangeLog.txt                   | 225 ++++++++++-
 doc/last-split.html             |   8 +
 doc/last-split.txt              |   8 +
 doc/last-train.html             |  18 +-
 doc/last-train.txt              |  21 +-
 doc/maf-convert.html            |   5 +
 doc/maf-convert.txt             |   5 +
 scripts/last-postmask           |  17 +-
 scripts/last-train              | 166 ++++++--
 scripts/maf-convert             | 867 ++++++++++++++++++++++------------------
 src/LastalArguments.cc          |   2 +
 src/split/cbrc_split_aligner.cc | 558 ++++++++++++++------------
 src/split/cbrc_split_aligner.hh |  62 +--
 src/split/last-split-main.cc    |  20 +-
 src/split/last-split.cc         |  11 +-
 src/split/last-split.hh         |   2 +
 src/stringify.hh                |  24 +-
 src/version.hh                  |   2 +-
 18 files changed, 1286 insertions(+), 735 deletions(-)

diff --git a/ChangeLog.txt b/ChangeLog.txt
index 00416a1..c286315 100644
--- a/ChangeLog.txt
+++ b/ChangeLog.txt
@@ -1,8 +1,231 @@
+2016-11-24  Martin C. Frith  <Martin C. Frith>
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Made last-split a bit faster.
+	[604434e30b9b] [tip]
+
+	* src/split/cbrc_split_aligner.cc:
+	Enabled last-split -g to read multi-volume genomes.
+	[39d60e63649c]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Refactoring.
+	[e390c8a6b953]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Refactoring.
+	[a651f673eccd]
+
+	* src/split/cbrc_split_aligner.cc:
+	Refactoring.
+	[3cf4563e1e06]
+
+	* src/split/cbrc_split_aligner.cc:
+	Made last-split -g check for alignment coordinates beyond ends of
+	sequences.
+	[271148f2fd22]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh,
+	test/last-test.sh:
+	Refactoring.
+	[3bcd06103556]
+
+2016-11-22  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-split.txt, src/split/cbrc_split_aligner.cc,
+	src/split/cbrc_split_aligner.hh, src/split/last-split-main.cc,
+	src/split/last-split.cc, src/split/last-split.hh, src/stringify.hh,
+	test/last-split-test.out:
+	Added last-split -b option, to skip troublesome query sequences.
+	[99208596e2df]
+
+2016-11-15  Martin C. Frith  <Martin C. Frith>
+
+	* src/split/cbrc_split_aligner.cc:
+	Made last-split's spliced alignment a bit faster.
+	[7a7418e343ec]
+
+	* src/split/cbrc_split_aligner.hh, src/split/last-split.cc:
+	Put memory info in last-split verbose output.
+	[a9b828b51430]
+
+	* test/maf-convert-test.out, test/maf-convert-test.sh:
+	Added tests for maf-convert.
+	[b9fb391b111f]
+
+	* doc/maf-convert.txt, scripts/maf-convert:
+	Added maf-convert -j option!
+	[e80789dbe5a9]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[60a355a2060c]
+
+2016-11-11  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/maf-convert:
+	Refactoring.
+	[640ec6e760a4]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[2b817a03775e]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[2e56e3884ce1]
+
+	* scripts/maf-convert:
+	Minor modifications.
+	[3a530469efd4]
+
+	* scripts/maf-convert:
+	Minor modifications.
+	[a6f7e46b53e8]
+
+	* scripts/maf-convert:
+	Minor modifications.
+	[4b680ca58d11]
+
+	* scripts/maf-convert:
+	Minor modifications.
+	[ed5ab3b3b096]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[49af9e116654]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[90cdd16c533d]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[5a56c7267682]
+
+	* scripts/maf-convert:
+	Minor modifications.
+	[5718d170c521]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[5e862e6f4593]
+
+	* scripts/maf-convert:
+	Minor modifications.
+	[1b0f793a2df3]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[fd3de758e253]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[17d5f147e3e8]
+
+	* scripts/maf-convert:
+	Minor refactoring.
+	[9137dbc02893]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[b6a9b0636a43]
+
+	* scripts/maf-convert:
+	Made maf-convert a bit faster.
+	[6c8368485b73]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[21080ba28e06]
+
+2016-11-10  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/maf-convert:
+	Refactoring.
+	[e289a5a60161]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[08dff31a448d]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[a6e60fafb4c7]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[3e274b1d6e63]
+
+	* scripts/maf-convert:
+	Refactoring.
+	[3fdccdc68124]
+
+	* src/LastalArguments.cc:
+	Reduced memory use of lastal with multi-threads.
+	[3d55bdf04914]
+
+	* doc/last-train.txt, src/split/cbrc_split_aligner.cc, test/102.maf,
+	test/last-split-test.out, test/last-split-test.sh:
+	Reverted an unintended minor change in last-split output.
+	[41a1f195a956]
+
+2016-11-04  Martin C. Frith  <Martin C. Frith>
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Refactoring.
+	[ebfe8b16e512]
+
+	* scripts/last-train:
+	Reduced last-train's default sample size to 1 million.
+	[3fa8121533e6]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Made last-split's spliced alignment a bit faster, maybe.
+	[9e43ca208315]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Refactoring.
+	[15807aae76c1]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Refactoring.
+	[a08e93976d22]
+
+	* src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Refactoring.
+	[5a2487a536ff]
+
+	* doc/last-train.txt, scripts/last-train,
+	src/split/cbrc_split_aligner.cc, src/split/cbrc_split_aligner.hh:
+	Documented rows/columns of last-train matrices.
+	[ef684382d11c]
+
+2016-11-01  Martin C. Frith  <Martin C. Frith>
+
+	* scripts/last-train:
+	Made last-train print matrices more prettily.
+	[ea0e36883c48]
+
+	* doc/last-train.txt, scripts/last-train:
+	Improved last-train doc.
+	[567a4a35cd4b]
+
+2016-10-25  Martin C. Frith  <Martin C. Frith>
+
+	* doc/last-train.txt, scripts/last-train:
+	Made last-train use a random sample of the query sequences.
+	[ae037299752f]
+
+	* scripts/last-postmask:
+	Made last-postmask a bit faster.
+	[5e99b6e235e0]
+
 2016-09-29  Martin C. Frith  <Martin C. Frith>
 
 	* test/last-test.out, test/last-test.sh:
 	Added (incomplete) tests for gapless overlap alignment.
-	[19b58d988059] [tip]
+	[19b58d988059]
 
 	* src/LastalArguments.cc, src/gaplessPssmXdrop.cc,
 	src/gaplessPssmXdrop.hh, src/gaplessTwoQualityXdrop.cc,
diff --git a/doc/last-split.html b/doc/last-split.html
index c54d670..32bad5c 100644
--- a/doc/last-split.html
+++ b/doc/last-split.html
@@ -623,6 +623,14 @@ above INT will not necessarily get high mismap probabilities.</p>
 probabilities.  Note that the mismap and score limits still
 apply.</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-b</span>, <span class="option">--bytes=<var>B</var></span></kbd></td>
+<td>Skip any query sequence that would require more than B bytes
+of memory to process.  (This only limits the size of some
+core data-structures: the total memory use will be greater.)
+A warning is written for each skipped sequence.  You can use
+suffixes such as K (KibiBytes), M (MebiBytes), G (GibiBytes),
+T (TebiBytes), e.g. -b20G.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-v</span>, <span class="option">--verbose</span></kbd></td>
 <td>Show progress information on the screen.</td></tr>
 <tr><td class="option-group">
diff --git a/doc/last-split.txt b/doc/last-split.txt
index 609f3ad..0bda7ff 100644
--- a/doc/last-split.txt
+++ b/doc/last-split.txt
@@ -231,6 +231,14 @@ Options
          probabilities.  Note that the mismap and score limits still
          apply.
 
+  -b, --bytes=B
+         Skip any query sequence that would require more than B bytes
+         of memory to process.  (This only limits the size of some
+         core data-structures: the total memory use will be greater.)
+         A warning is written for each skipped sequence.  You can use
+         suffixes such as K (KibiBytes), M (MebiBytes), G (GibiBytes),
+         T (TebiBytes), e.g. -b20G.
+
   -v, --verbose
          Show progress information on the screen.
 
diff --git a/doc/last-train.html b/doc/last-train.html
index 9e0202a..a8aa00f 100644
--- a/doc/last-train.html
+++ b/doc/last-train.html
@@ -330,8 +330,8 @@ lastdb mydb reference.fasta
 last-train mydb queries.fasta
 </pre>
 <p>last-train prints a summary of each alignment step, followed by the
-final score parameters in a format that can be read by lastal's -p
-option.</p>
+final score parameters in a format that can be read by <a class="reference external" href="lastal.html#score-options">lastal's -p
+option</a>.</p>
 <div class="section" id="options">
 <h2>Options</h2>
 <blockquote>
@@ -369,9 +369,15 @@ e.g. score(A→G) = score(G→A).</td></tr>
 <td>Ignore alignments with > PID% identity.  This aims to
 optimize the parameters for low-similarity alignments,
 similarly to the BLOSUM matrices.</td></tr>
+<tr><td class="option-group">
+<kbd><span class="option">--sample=<var>N</var></span></kbd></td>
+<td>Use only the first N query letters, after randomizing the
+order of sequences.  0 means use all query letters.</td></tr>
 </tbody>
 </table>
 </blockquote>
+<p>All options below this point are passed to lastal to do the
+alignments: they are described in more detail at <a class="reference external" href="lastal.html">lastal.html</a>.</p>
 </div>
 <div class="section" id="initial-parameter-options">
 <h3>Initial parameter options</h3>
@@ -414,10 +420,10 @@ similarly to the BLOSUM matrices.</td></tr>
 <tbody valign="top">
 <tr><td class="option-group">
 <kbd><span class="option">-D <var>LENGTH</var></span></kbd></td>
-<td>Query letters per random alignment.</td></tr>
+<td>Query letters per random alignment.  (See <a class="reference external" href="last-evalues.html">here</a>.)</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-E <var>EG2</var></span></kbd></td>
-<td>Maximum expected alignments per square giga.</td></tr>
+<td>Maximum expected alignments per square giga.  (See <a class="reference external" href="last-evalues.html">here</a>.)</td></tr>
 <tr><td class="option-group">
 <kbd><span class="option">-s <var>NUMBER</var></span></kbd></td>
 <td>Which query strand to use: 0=reverse, 1=forward, 2=both.</td></tr>
@@ -454,7 +460,9 @@ distribution.  If they do not (which is often the case), the results
 may be poor.</p>
 </li>
 <li><p class="first">last-train can fail for various reasons, e.g. if the sequences are
-too dissimilar.</p>
+too dissimilar.  If it fails to find any alignments, you could try
+<a class="reference external" href="last-tutorial.html#example-6-find-very-short-dna-alignments">reducing the alignment significance threshold</a> with
+option -D.</p>
 </li>
 </ul>
 </div>
diff --git a/doc/last-train.txt b/doc/last-train.txt
index 97cf0f9..8ee7885 100644
--- a/doc/last-train.txt
+++ b/doc/last-train.txt
@@ -15,8 +15,8 @@ The usage is like this::
   last-train mydb queries.fasta
 
 last-train prints a summary of each alignment step, followed by the
-final score parameters in a format that can be read by lastal's -p
-option.
+final score parameters in a format that can be read by `lastal's -p
+option <lastal.html#score-options>`_.
 
 Options
 -------
@@ -40,6 +40,12 @@ Training options
          Ignore alignments with > PID% identity.  This aims to
          optimize the parameters for low-similarity alignments,
          similarly to the BLOSUM matrices.
+  --sample=N
+         Use only the first N query letters, after randomizing the
+         order of sequences.  0 means use all query letters.
+
+All options below this point are passed to lastal to do the
+alignments: they are described in more detail at `<lastal.html>`_.
 
 Initial parameter options
 ~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -55,8 +61,10 @@ Initial parameter options
 Alignment options
 ~~~~~~~~~~~~~~~~~
 
-  -D LENGTH  Query letters per random alignment.
-  -E EG2     Maximum expected alignments per square giga.
+  -D LENGTH  Query letters per random alignment.  (See `here
+             <last-evalues.html>`_.)
+  -E EG2     Maximum expected alignments per square giga.  (See `here
+             <last-evalues.html>`_.)
   -s NUMBER  Which query strand to use: 0=reverse, 1=forward, 2=both.
   -S NUMBER  Score matrix applies to forward strand of: 0=reference,
              1=query.  This matters only if the scores lack
@@ -77,4 +85,7 @@ Bugs
   may be poor.
 
 * last-train can fail for various reasons, e.g. if the sequences are
-  too dissimilar.
+  too dissimilar.  If it fails to find any alignments, you could try
+  `reducing the alignment significance threshold
+  <last-tutorial.html#example-6-find-very-short-dna-alignments>`_ with
+  option -D.
diff --git a/doc/maf-convert.html b/doc/maf-convert.html
index 7590a60..a98040e 100644
--- a/doc/maf-convert.html
+++ b/doc/maf-convert.html
@@ -351,6 +351,11 @@ starting with 'p', run lastal with option -j set to 4 or higher.)</p>
 nucleotides.  This affects psl format only (the first 4
 columns).</td></tr>
 <tr><td class="option-group">
+<kbd><span class="option">-j</span>, <span class="option">--join=<var>N</var></span></kbd></td>
+<td>Join neighboring alignments if they are co-linear and
+separated by at most N letters.  This affects psl format
+only.</td></tr>
+<tr><td class="option-group">
 <kbd><span class="option">-n</span>, <span class="option">--noheader</span></kbd></td>
 <td>Omit any header lines from the output.  This may be useful if
 you concatenate outputs, e.g. from parallel jobs.</td></tr>
diff --git a/doc/maf-convert.txt b/doc/maf-convert.txt
index a4ffcd9..f78acee 100644
--- a/doc/maf-convert.txt
+++ b/doc/maf-convert.txt
@@ -32,6 +32,11 @@ Options
          nucleotides.  This affects psl format only (the first 4
          columns).
 
+  -j, --join=N
+         Join neighboring alignments if they are co-linear and
+         separated by at most N letters.  This affects psl format
+         only.
+
   -n, --noheader
          Omit any header lines from the output.  This may be useful if
          you concatenate outputs, e.g. from parallel jobs.
diff --git a/scripts/last-postmask b/scripts/last-postmask
index 750ee8e..6e7b9d4 100755
--- a/scripts/last-postmask
+++ b/scripts/last-postmask
@@ -12,7 +12,7 @@
 # Limitations: doesn't (yet) handle sequence quality data,
 # frameshifts, or generalized affine gaps.
 
-import fileinput, itertools, optparse, os, signal, sys
+import itertools, optparse, os, signal, sys
 
 def getScoreMatrix(rowHeads, colHeads, matrix, deleteCost, insertCost):
     defaultScore = min(map(min, matrix))
@@ -56,12 +56,12 @@ def printIfGood(maf, seqs, scoreMatrix, delOpenCost, insOpenCost, minScore):
             print line,
         print
 
-def lastPostmask(args):
+def doOneFile(file):
     scoreMatrix = []
     maf = []
     seqs = []
 
-    for line in fileinput.input(args):
+    for line in file:
         if line[0] == "#":
             print line,
             w = line.split()
@@ -90,6 +90,17 @@ def lastPostmask(args):
             if line[0] == "s": seqs.append(line.split()[6])
     if seqs: printIfGood(maf, seqs, scoreMatrix, aDel, aIns, minScore)
 
+def lastPostmask(args):
+    if args:
+        for i in args:
+            if i == "-":
+                doOneFile(sys.stdin)
+            else:
+                with open(i) as file:
+                    doOneFile(file)
+    else:
+        doOneFile(sys.stdin)
+
 if __name__ == "__main__":
     signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # avoid silly error message
 
diff --git a/scripts/last-train b/scripts/last-train
index a96532a..de39072 100755
--- a/scripts/last-train
+++ b/scripts/last-train
@@ -1,10 +1,71 @@
 #! /usr/bin/env python
 # Copyright 2015 Martin C. Frith
 
-import fileinput, math, optparse, os, signal, subprocess, sys
+import math, optparse, os, random, signal, subprocess, sys, tempfile
+
+def writeWords(outFile, words):
+    outFile.write(" ".join(words) + "\n")
+
+def seqInput(fileNames):
+    for name in fileNames:
+        with open(name) as file:
+            seqType = 0
+            for line in file:
+                if seqType == 0:
+                    if line[0] == ">":
+                        seqType = 1
+                        seq = []
+                    elif line[0] == "@":
+                        seqType = 2
+                        lineType = 1
+                elif seqType == 1:  # fasta
+                    if line[0] == ">":
+                        yield "".join(seq), ""
+                        seq = []
+                    else:
+                        seq.append(line.rstrip())
+                elif seqType == 2:  # fastq
+                    if lineType == 1:
+                        seq = line.rstrip()
+                    elif lineType == 3:
+                        yield seq, line.rstrip()
+                    lineType = (lineType + 1) % 4
+            if seqType == 1: yield "".join(seq), ""
+
+def randomNumbersAndLengths(lengths, sampleSize):
+    numbersAndLengths = list(enumerate(lengths))
+    random.shuffle(numbersAndLengths)
+    for i, x in numbersAndLengths:
+        if x < sampleSize:
+            yield i, x
+            sampleSize -= x
+        else:
+            yield i, sampleSize
+            break
 
-def tabSeparatedString(things):
-    return "\t".join(map(str, things))
+def writeSeqSample(numbersAndLengths, seqInput, outfile):
+    j = 0
+    for i, x in enumerate(seqInput):
+        if j < len(numbersAndLengths) and numbersAndLengths[j][0] == i:
+            seqlen = numbersAndLengths[j][1]
+            seq, qual = x
+            if qual:
+                outfile.write("@" + str(i) + "\n")
+                outfile.write(seq[:seqlen])
+                outfile.write("\n+\n")
+                outfile.write(qual[:seqlen])
+            else:
+                outfile.write(">" + str(i) + "\n")
+                outfile.write(seq[:seqlen])
+            outfile.write("\n")
+            j += 1
+
+def getSeqSample(opts, queryFiles, outfile):
+    lengths = (len(i[0]) for i in seqInput(queryFiles))
+    sampleSize = int(round(opts.sample))
+    numbersAndLengths = sorted(randomNumbersAndLengths(lengths, sampleSize))
+    writeSeqSample(numbersAndLengths, seqInput(queryFiles), outfile)
+    print "# sampled sequences:", len(numbersAndLengths)
 
 def scaleFromHeader(lines):
     for line in lines:
@@ -70,6 +131,45 @@ def scoreFromProb(scale, prob):
 def costFromProb(scale, prob):
     return -scoreFromProb(scale, prob)
 
+def guessAlphabet(matrixSize):
+    if matrixSize ==  4: return "ACGT"
+    if matrixSize == 20: return "ACDEFGHIKLMNPQRSTVWY"
+    raise Exception("can't handle unusual alphabets")
+
+def matrixWithLetters(matrix):
+    alphabet = guessAlphabet(len(matrix))
+    return [alphabet] + [[a] + i for a, i in zip(alphabet, matrix)]
+
+def writeMatrixHead(outFile, prefix, alphabet, formatString):
+    writeWords(outFile, [prefix + " "] + [formatString % k for k in alphabet])
+
+def writeMatrixBody(outFile, prefix, alphabet, matrix, formatString):
+    for i, j in zip(alphabet, matrix):
+        writeWords(outFile, [prefix + i] + [formatString % k for k in j])
+
+def writeCountMatrix(outFile, matrix, prefix):
+    alphabet = guessAlphabet(len(matrix))
+    writeMatrixHead(outFile, prefix, alphabet, "%-14s")
+    writeMatrixBody(outFile, prefix, alphabet, matrix, "%-14s")
+
+def writeProbMatrix(outFile, matrix, prefix):
+    alphabet = guessAlphabet(len(matrix))
+    writeMatrixHead(outFile, prefix, alphabet, "%-14s")
+    writeMatrixBody(outFile, prefix, alphabet, matrix, "%-14g")
+
+def writeScoreMatrix(outFile, matrix, prefix):
+    alphabet = guessAlphabet(len(matrix))
+    writeMatrixHead(outFile, prefix, alphabet, "%6s")
+    writeMatrixBody(outFile, prefix, alphabet, matrix, "%6s")
+
+def writeMatrixWithLetters(outFile, matrix, prefix):
+    head = matrix[0]
+    tail = matrix[1:]
+    left = [i[0] for i in tail]
+    body = [i[1:] for i in tail]
+    writeMatrixHead(outFile, prefix, head, "%6s")
+    writeMatrixBody(outFile, prefix, left, body, "%6s")
+
 def matProbsFromCounts(counts, opts):
     r = range(len(counts))
     if opts.revsym:  # add complement (reverse strand) substitutions
@@ -82,23 +182,15 @@ def matProbsFromCounts(counts, opts):
 
     print "# substitution percent identity: %g" % (100 * identities / total)
     print
-    print "# count matrix:"
-    for i in counts:
-        print "#", tabSeparatedString(i)
+    print "# count matrix (query letters = columns, reference letters = rows):"
+    writeCountMatrix(sys.stdout, counts, "# ")
     print
-    print "# probability matrix:"
-    for i in probs:
-        print "#", tabSeparatedString("%g" % j for j in i)
+    print "# probability matrix (query letters = columns, reference letters = rows):"
+    writeProbMatrix(sys.stdout, probs, "# ")
     print
 
     return probs
 
-def printMatScores(scores):
-    print "# score matrix:"
-    for i in scores:
-        print "#", tabSeparatedString(i)
-    print
-
 def gapProbsFromCounts(counts, opts):
     matches, deletes, inserts, delOpens, insOpens, alignments = counts
     if not alignments: raise Exception("no alignments")
@@ -168,23 +260,9 @@ def gapCostsFromProbs(scale, probs):
     if insExtendCost == 0: insExtendCost = 1
     return delExistCost, insExistCost, delExtendCost, insExtendCost
 
-def guessAlphabet(matrixSize):
-    if matrixSize ==  4: return "ACGT"
-    if matrixSize == 20: return "ACDEFGHIKLMNPQRSTVWY"
-    raise Exception("can't handle unusual alphabets")
-
-def matrixWithLetters(matrix):
-    alphabet = guessAlphabet(len(matrix))
-    return [alphabet] + [[a] + i for a, i in zip(alphabet, matrix)]
-
 def writeLine(out, *things):
     out.write(" ".join(map(str, things)) + "\n")
 
-def writeMatrixWithLetters(matrix, out):
-    writeLine(out, " ", tabSeparatedString(matrix[0]))
-    for i in matrix[1:]:
-        writeLine(out, i[0], tabSeparatedString(i[1:]))
-
 def writeGapCosts(gapCosts, out):
     delExistCost, insExistCost, delExtendCost, insExtendCost = gapCosts
     writeLine(out, "#last -a", delExistCost)
@@ -218,7 +296,7 @@ def fixedLastalArgs(opts):
     if opts.Q: x.append("-Q" + opts.Q)
     return x
 
-def lastTrain(opts, args):
+def doTraining(opts, args):
     tryToMakeChildProgramsFindable()
     scaleIncrease = 20  # while training, up-scale the scores by this amount
     x = fixedLastalArgs(opts)
@@ -248,6 +326,7 @@ def lastTrain(opts, args):
     while True:
         print "#", " ".join(x)
         print
+        sys.stdout.flush()
         matCounts, gapCounts = countsFromLastOutput(q.stdout, opts)
         gapProbs = gapProbsFromCounts(gapCounts, opts)
         gapCosts = gapCostsFromProbs(internalScale, gapProbs)
@@ -258,7 +337,9 @@ def lastTrain(opts, args):
         else:
             matProbs = matProbsFromCounts(matCounts, opts)
             matScores = matScoresFromProbs(internalScale, matProbs)
-            printMatScores(matScores)
+            print "# score matrix (query letters = columns, reference letters = rows):"
+            writeScoreMatrix(sys.stdout, matScores, "# ")
+            print
             parameters = gapCosts, matScores
             if parameters in oldParameters: break
             oldParameters.append(parameters)
@@ -268,7 +349,7 @@ def lastTrain(opts, args):
         x += args
         p = subprocess.Popen(x, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
         writeGapCosts(gapCosts, p.stdin)
-        writeMatrixWithLetters(internalMatrix, p.stdin)
+        writeMatrixWithLetters(p.stdin, internalMatrix, "")
         p.stdin.close()
         # in python2.6, the next line must come after p.stdin.close()
         q = subprocess.Popen(y, stdin=p.stdout, stdout=subprocess.PIPE)
@@ -280,11 +361,26 @@ def lastTrain(opts, args):
     if not opts.Q:
         matScores = matScoresFromProbs(externalScale, matProbs)
         externalMatrix = matrixWithLetters(matScores)
-    writeMatrixWithLetters(externalMatrix, sys.stdout)
+    print "# score matrix (query letters = columns, reference letters = rows):"
+    writeMatrixWithLetters(sys.stdout, externalMatrix, "")
+
+def lastTrain(opts, args):
+    if opts.sample:
+        random.seed(math.pi)
+        refName = args[0]
+        queryFiles = args[1:]
+        try:
+            with tempfile.NamedTemporaryFile(delete=False) as f:
+                getSeqSample(opts, queryFiles, f)
+            doTraining(opts, [refName, f.name])
+        finally:
+            os.remove(f.name)
+    else:
+        doTraining(opts, args)
 
 if __name__ == "__main__":
     signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # avoid silly error message
-    usage = "%prog lastdb-name sequence-file(s)"
+    usage = "%prog [options] lastdb-name sequence-file(s)"
     description = "Try to find suitable score parameters for aligning the given sequences."
     op = optparse.OptionParser(usage=usage, description=description)
     og = optparse.OptionGroup(op, "Training options")
@@ -296,6 +392,8 @@ if __name__ == "__main__":
                   help="force insertion/deletion symmetry")
     og.add_option("--pid", type="float", default=100, help=
                   "skip alignments with > PID% identity (default: %default)")
+    og.add_option("--sample", type="float", default=1000000, metavar="N",
+                  help="use a random sample of N letters (default: %default)")
     op.add_option_group(og)
     og = optparse.OptionGroup(op, "Initial parameter options")
     og.add_option("-r", metavar="SCORE",
diff --git a/scripts/maf-convert b/scripts/maf-convert
index 8a67408..7b1d92c 100755
--- a/scripts/maf-convert
+++ b/scripts/maf-convert
@@ -1,131 +1,177 @@
 #! /usr/bin/env python
 # Copyright 2010, 2011, 2013, 2014 Martin C. Frith
 # Read MAF-format alignments: write them in other formats.
-# Seems to work with Python 2.x, x>=4
+# Seems to work with Python 2.x, x>=6
 
 # By "MAF" we mean "multiple alignment format" described in the UCSC
 # Genome FAQ, not e.g. "MIRA assembly format".
 
 from itertools import *
-import sys, os, fileinput, math, operator, optparse, signal, string
+import math, optparse, os, signal, sys
 
 def maxlen(s):
     return max(map(len, s))
 
-def joined(things, delimiter):
-    return delimiter.join(map(str, things))
-
-identityTranslation = string.maketrans("", "")
-def deleted(myString, deleteChars):
-    return myString.translate(identityTranslation, deleteChars)
-
-def quantify(iterable, pred=bool):
-    """Count how many times the predicate is true."""
-    return sum(map(pred, iterable))
-
-class Maf:
-    def __init__(self, aLine, sLines, qLines, pLines):
-        self.namesAndValues = dict(i.split("=") for i in aLine[1:])
-
-        if not sLines: raise Exception("empty alignment")
-        cols = zip(*sLines)
-        self.seqNames = cols[1]
-        self.alnStarts = map(int, cols[2])
-        self.alnSizes = map(int, cols[3])
-        self.strands = cols[4]
-        self.seqSizes = map(int, cols[5])
-        self.alnStrings = cols[6]
-
-        self.qLines = qLines
-        self.pLines = pLines
-
-def dieUnlessPairwise(maf):
-    if len(maf.alnStrings) != 2:
-        raise Exception("pairwise alignments only, please")
-
-def insertionCounts(alnString):
-    gaps = alnString.count("-")
-    forwardFrameshifts = alnString.count("\\")
-    reverseFrameshifts = alnString.count("/")
-    letters = len(alnString) - gaps - forwardFrameshifts - reverseFrameshifts
-    return letters, forwardFrameshifts, reverseFrameshifts
-
-def coordinatesPerLetter(alnString, alnSize):
-    letters, forwardShifts, reverseShifts = insertionCounts(alnString)
-    if forwardShifts or reverseShifts or letters < alnSize: return 3
-    else: return 1
+def pairOrDie(sLines, formatName):
+    if len(sLines) != 2:
+        e = "for %s, each alignment must have 2 sequences" % formatName
+        raise Exception(e)
+    return sLines
 
-def mafLetterSizes(maf):
-    return map(coordinatesPerLetter, maf.alnStrings, maf.alnSizes)
+def isMatch(alignmentColumn):
+    # No special treatment of ambiguous bases/residues: same as NCBI BLAST.
+    first = alignmentColumn[0].upper()
+    for i in alignmentColumn[1:]:
+        if i.upper() != first: return False
+    return True
 
-def alnLetterSizes(sLines):
-    return [coordinatesPerLetter(i[6], int(i[3])) for i in sLines]
+def isGapless(alignmentColumn):
+    return "-" not in alignmentColumn
 
-def isTranslated(sLines):
-    for i in sLines:
-        alnString = i[6]
-        if "\\" in alnString or "/" in alnString: return True
-        if len(alnString) - alnString.count("-") < int(i[3]): return True
-    return False
+def gapRunCount(row):
+    """Get the number of runs of gap characters."""
+    return sum(k == "-" for k, v in groupby(row))
 
-def insertionSize(alnString, letterSize):
-    """Get the length of sequence included in the alnString."""
-    letters, forwardShifts, reverseShifts = insertionCounts(alnString)
-    return letters * letterSize + forwardShifts - reverseShifts
+def alignmentRowsFromColumns(columns):
+    return imap(''.join, izip(*columns))
 
 def symbolSize(symbol, letterSize):
-    if symbol == "-": return 0
     if symbol == "\\": return 1
     if symbol == "/": return -1
     return letterSize
 
-def isMatch(alignmentColumn):
-    # No special treatment of ambiguous bases/residues: same as NCBI BLAST.
-    first = alignmentColumn[0].upper()
-    for i in alignmentColumn[1:]:
-        if i.upper() != first: return False
-    return True
+def insertSize(row, letterSize):
+    """Get the length of sequence included in the row."""
+    return (len(row) - row.count("-")) * letterSize - 4 * row.count("/") - 2 * row.count("\\")
 
-def isGapless(alignmentColumn):
-    return "-" not in alignmentColumn
+def insertSizeText(row, letterSize):
+    return str(insertSize(row, letterSize))
 
 def matchAndInsertSizes(alignmentColumns, letterSizes):
     """Get sizes of gapless blocks, and of the inserts between them."""
     for k, v in groupby(alignmentColumns, isGapless):
         if k:
-            sizeOfGaplessBlock = len(list(v))
-            yield str(sizeOfGaplessBlock)
+            yield str(sum(1 for i in v))
         else:
-            blockRows = izip(*v)
-            blockRows = imap(''.join, blockRows)
-            insertSizes = imap(insertionSize, blockRows, letterSizes)
-            yield joined(insertSizes, ":")
+            yield ":".join(imap(insertSizeText,
+                                alignmentRowsFromColumns(v), letterSizes))
+
+##### Routines for reading MAF format: #####
+
+def updateEvalueParameters(opts, line):
+    for field in line.split():
+        try:
+            k, v = field.split("=")
+            x = float(v)
+            if k == "lambda":
+                opts.bitScoreA = x / math.log(2)
+            if k == "K":
+                opts.bitScoreB = math.log(x, 2)
+        except ValueError:
+            pass
+
+def scoreAndEvalue(aLine):
+    score = evalue = None
+    for i in aLine.split():
+        if i.startswith("score="):
+            score = i[6:]
+        elif i.startswith("E="):
+            evalue = i[2:]
+    return score, evalue
+
+def mafInput(opts, lines):
+    aLine = ""
+    sLines = []
+    qLines = []
+    pLines = []
+    for line in lines:
+        if line[0] == "s":
+            junk, seqName, beg, span, strand, seqLen, row = line.split()
+            beg = int(beg)
+            span = int(span)
+            seqLen = int(seqLen)
+            if "\\" in row or "/" in row or len(row) - row.count("-") < span:
+                letterSize = 3
+            else:
+                letterSize = 1
+            fields = seqName, seqLen, strand, letterSize, beg, beg + span, row
+            sLines.append(fields)
+        elif line[0] == "a":
+            aLine = line
+        elif line[0] == "q":
+            qLines.append(line)
+        elif line[0] == "p":
+            pLines.append(line)
+        elif line.isspace():
+            if sLines: yield aLine, sLines, qLines, pLines
+            aLine = ""
+            sLines = []
+            qLines = []
+            pLines = []
+        elif line[0] == "#":
+            updateEvalueParameters(opts, line)
+            if opts.isKeepComments:
+                print line,
+    if sLines: yield aLine, sLines, qLines, pLines
+
+def isJoinable(opts, oldMaf, newMaf):
+    x = oldMaf[1]
+    y = newMaf[1]
+    if x[-1][2] == "-":
+        x, y = y, x
+    return all(i[:4] == j[:4] and i[5] <= j[4] and j[4] - i[5] <= opts.join
+               for i, j in zip(x, y))
+
+def fixOrder(mafs):
+    sLines = mafs[0][1]
+    if sLines[-1][2] == "-":
+        mafs.reverse()
+
+def mafGroupInput(opts, lines):
+    x = []
+    for i in mafInput(opts, lines):
+        if x and not isJoinable(opts, x[-1], i):
+            fixOrder(x)
+            yield x
+            x = []
+        x.append(i)
+    if x:
+        fixOrder(x)
+        yield x
 
 ##### Routines for converting to AXT format: #####
 
 axtCounter = count()
 
 def writeAxt(maf):
-    if maf.strands[0] != "+":
+    aLine, sLines, qLines, pLines = maf
+
+    if sLines[0][2] != "+":
         raise Exception("for AXT, the 1st strand in each alignment must be +")
 
     # Convert to AXT's 1-based coordinates:
-    alnStarts = imap(operator.add, maf.alnStarts, repeat(1))
-    alnEnds = imap(operator.add, maf.alnStarts, maf.alnSizes)
+    ranges = [(i[0], str(i[4] + 1), str(i[5]), i[2]) for i in sLines]
 
-    rows = zip(maf.seqNames, alnStarts, alnEnds, maf.strands)
-    head, tail = rows[0], rows[1:]
+    head, body = ranges[0], ranges[1:]
 
-    outWords = []
-    outWords.append(axtCounter.next())
+    outWords = [str(axtCounter.next())]
     outWords.extend(head[:3])
-    outWords.extend(chain(*tail))
-    outWords.append(maf.namesAndValues["score"])
+    for i in body:
+        outWords.extend(i)
+
+    score, evalue = scoreAndEvalue(aLine)
+    if score:
+        outWords.append(score)
 
-    print joined(outWords, " ")
-    for i in maf.alnStrings: print i
+    print " ".join(outWords)
+    for i in sLines:
+        print i[6]
     print  # print a blank line at the end
 
+def mafConvertToAxt(opts, lines):
+    for maf in mafInput(opts, lines):
+        writeAxt(maf)
+
 ##### Routines for converting to tabular format: #####
 
 def writeTab(maf):
@@ -133,117 +179,139 @@ def writeTab(maf):
 
     score = "0"
     endWords = []
-    for i in aLine:
-        if   i.startswith("score="): score = i[6:]
-        elif len(i) > 1: endWords.append(i)
-
-    outWords = []
-    outWords.append(score)
-
-    for i in sLines: outWords.extend(i[1:6])
-
-    alnStrings = [i[6] for i in sLines]
-    alignmentColumns = zip(*alnStrings)
-    letterSizes = alnLetterSizes(sLines)
-    gapStrings = matchAndInsertSizes(alignmentColumns, letterSizes)
-    gapWord = ",".join(gapStrings)
+    for i in aLine.split():
+        if   i.startswith("score="):
+            score = i[6:]
+        elif len(i) > 1:
+            endWords.append(i)
+
+    outWords = [score]
+
+    for seqName, seqLen, strand, letterSize, beg, end, row in sLines:
+        x = seqName, str(beg), str(end - beg), strand, str(seqLen)
+        outWords.extend(x)
+
+    letterSizes = [i[3] for i in sLines]
+    rows = [i[6] for i in sLines]
+    alignmentColumns = izip(*rows)
+    gapWord = ",".join(matchAndInsertSizes(alignmentColumns, letterSizes))
     outWords.append(gapWord)
 
     print "\t".join(outWords + endWords)
 
+def mafConvertToTab(opts, lines):
+    for maf in mafInput(opts, lines):
+        writeTab(maf)
+
 ##### Routines for converting to PSL format: #####
 
-def pslBlocks(alignmentColumns, alnStarts, letterSizes):
+def pslBlocks(opts, mafs, outCounts):
     """Get sizes and start coordinates of gapless blocks in an alignment."""
-    start1, start2 = alnStarts
-    letterSize1, letterSize2 = letterSizes
-    size = 0
-    for x, y in alignmentColumns:
-        if x == "-":
-            if size:
-                yield size, start1, start2
-                start1 += size * letterSize1
-                start2 += size * letterSize2
-                size = 0
-            start2 += symbolSize(y, letterSize2)
-        elif y == "-":
-            if size:
-                yield size, start1, start2
-                start1 += size * letterSize1
-                start2 += size * letterSize2
-                size = 0
-            start1 += symbolSize(x, letterSize1)
-        else:
-            size += 1
-    if size: yield size, start1, start2
+    # repMatches is always zero
+    # for proteins, nCount is always zero, because that's what BLATv34 does
+    normalBases = "ACGTU"
+    matches = mismatches = repMatches = nCount = 0
+
+    for maf in mafs:
+        sLines = maf[1]
+        fieldsA, fieldsB = pairOrDie(sLines, "PSL")
+        letterSizeA, begA, endA, rowA = fieldsA[3:7]
+        letterSizeB, begB, endB, rowB = fieldsB[3:7]
+
+        size = 0
+        for x, y in izip(rowA.upper(), rowB.upper()):
+            if x == "-":
+                if size:
+                    yield size, begA, begB
+                    begA += size * letterSizeA
+                    begB += size * letterSizeB
+                    size = 0
+                begB += symbolSize(y, letterSizeB)
+            elif y == "-":
+                if size:
+                    yield size, begA, begB
+                    begA += size * letterSizeA
+                    begB += size * letterSizeB
+                    size = 0
+                begA += symbolSize(x, letterSizeA)
+            else:
+                size += 1
+                if x in normalBases and y in normalBases or opts.protein:
+                    if x == y:
+                        matches += 1
+                    else:
+                        mismatches += 1
+                else:
+                    nCount += 1
+        if size:
+            yield size, begA, begB
+
+    outCounts[0:4] = matches, mismatches, repMatches, nCount
+
+def pslNumInserts(blocks, letterSizeA, letterSizeB):
+    numInsertA = numInsertB = 0
+    for i, x in enumerate(blocks):
+        size, begA, begB = x
+        if i:
+            if begA > endA:
+                numInsertA += 1
+            if begB > endB:
+                numInsertB += 1
+        endA = begA + size * letterSizeA
+        endB = begB + size * letterSizeB
+    return numInsertA, numInsertB
 
 def pslCommaString(things):
     # UCSC software seems to prefer a trailing comma
-    return joined(things, ",") + ","
-
-def gapRunCount(letters):
-    """Get the number of runs of gap characters."""
-    uniqLetters = map(operator.itemgetter(0), groupby(letters))
-    return uniqLetters.count("-")
+    return ",".join(map(str, things)) + ","
 
-def pslEndpoints(alnStart, alnSize, strand, seqSize):
-    alnEnd = alnStart + alnSize
-    if strand == "+": return alnStart, alnEnd
-    else: return seqSize - alnEnd, seqSize - alnStart
+def pslEnds(headMafFields, tailMafFields):
+    seqName, seqLen, strand, letterSize, beg = headMafFields[0:5]
+    end = tailMafFields[5]
+    if strand == "-":
+        beg, end = seqLen - end, seqLen - beg
+    return seqName, seqLen, strand, letterSize, beg, end
 
-def caseSensitivePairwiseMatchCounts(columns, isProtein):
-    # repMatches is always zero
-    # for proteins, nCount is always zero, because that's what BLATv34 does
-    standardBases = "ACGTU"
-    matches = mismatches = repMatches = nCount = 0
-    for i in columns:
-        if "-" in i: continue
-        x, y = i
-        if x in standardBases and y in standardBases or isProtein:
-            if x == y: matches += 1
-            else: mismatches += 1
-        else: nCount += 1
-    return matches, mismatches, repMatches, nCount
-
-def writePsl(maf, isProtein):
-    dieUnlessPairwise(maf)
-
-    alnStrings = map(str.upper, maf.alnStrings)
-    alignmentColumns = zip(*alnStrings)
-    letterSizes = mafLetterSizes(maf)
-
-    matchCounts = caseSensitivePairwiseMatchCounts(alignmentColumns, isProtein)
+def writePsl(opts, mafs):
+    matchCounts = [0] * 4
+    blocks = list(pslBlocks(opts, mafs, matchCounts))
     matches, mismatches, repMatches, nCount = matchCounts
     numGaplessColumns = sum(matchCounts)
 
-    qNumInsert = gapRunCount(maf.alnStrings[0])
-    qBaseInsert = maf.alnSizes[1] - numGaplessColumns * letterSizes[1]
+    headA, headB = mafs[0][1]
+    tailA, tailB = mafs[-1][1]
 
-    tNumInsert = gapRunCount(maf.alnStrings[1])
-    tBaseInsert = maf.alnSizes[0] - numGaplessColumns * letterSizes[0]
+    seqNameA, seqLenA, strandA, letterSizeA, begA, endA = pslEnds(headA, tailA)
+    baseInsertA = endA - begA - numGaplessColumns * letterSizeA
 
-    strand = maf.strands[1]
-    if max(letterSizes) > 1:
-        strand += maf.strands[0]
-    elif maf.strands[0] != "+":
-        raise Exception("for non-translated PSL, the 1st strand in each alignment must be +")
+    seqNameB, seqLenB, strandB, letterSizeB, begB, endB = pslEnds(headB, tailB)
+    baseInsertB = endB - begB - numGaplessColumns * letterSizeB
 
-    tName, qName = maf.seqNames
-    tSeqSize, qSeqSize = maf.seqSizes
+    numInsertA, numInsertB = pslNumInserts(blocks, letterSizeA, letterSizeB)
 
-    rows = zip(maf.alnStarts, maf.alnSizes, maf.strands, maf.seqSizes)
-    tStart, tEnd = pslEndpoints(*rows[0])
-    qStart, qEnd = pslEndpoints(*rows[1])
+    strand = strandB
+    if letterSizeA > 1 or letterSizeB > 1:
+        strand += strandA
+    elif strandA != "+":
+        raise Exception("for non-translated PSL, the 1st strand in each alignment must be +")
 
-    blocks = list(pslBlocks(alignmentColumns, maf.alnStarts, letterSizes))
     blockCount = len(blocks)
-    blockSizes, tStarts, qStarts = map(pslCommaString, zip(*blocks))
+    blockSizes, blockStartsA, blockStartsB = map(pslCommaString, zip(*blocks))
 
     outWords = (matches, mismatches, repMatches, nCount,
-                qNumInsert, qBaseInsert, tNumInsert, tBaseInsert, strand,
-                qName, qSeqSize, qStart, qEnd, tName, tSeqSize, tStart, tEnd,
-                blockCount, blockSizes, qStarts, tStarts)
-    print joined(outWords, "\t")
+                numInsertB, baseInsertB, numInsertA, baseInsertA, strand,
+                seqNameB, seqLenB, begB, endB, seqNameA, seqLenA, begA, endA,
+                blockCount, blockSizes, blockStartsB, blockStartsA)
+
+    print "\t".join(map(str, outWords))
+
+def mafConvertToPsl(opts, lines):
+    if opts.join:
+        for i in mafGroupInput(opts, lines):
+            writePsl(opts, i)
+    else:
+        for i in mafInput(opts, lines):
+            writePsl(opts, [i])
 
 ##### Routines for converting to SAM format: #####
 
@@ -253,19 +321,19 @@ def readGroupId(readGroupItems):
             return i[3:]
     raise Exception("readgroup must include ID")
 
-def readSequenceLengths(lines):
+def readSequenceLengths(fileNames):
     """Read name & length of topmost sequence in each maf block."""
-    sequenceLengths = {}  # an OrderedDict might be nice
-    isSearching = True
-    for line in lines:
-        if line.isspace(): isSearching = True
-        elif isSearching:
-            w = line.split()
-            if w[0] == "s":
-                seqName, seqSize = w[1], w[5]
-                sequenceLengths[seqName] = seqSize
-                isSearching = False
-    return sequenceLengths
+    for i in fileNames:
+        with open(i) as f:
+            fields = None
+            for line in f:
+                if fields:
+                    if line.isspace():
+                        fields = None
+                else:
+                    if line[0] == "s":
+                        fields = line.split()
+                        yield fields[1], fields[5]
 
 def naturalSortKey(s):
     """Return a key that sorts strings in "natural" order."""
@@ -277,16 +345,30 @@ def karyotypicSortKey(s):
     if s == "MT": return ["~"]
     return naturalSortKey(s)
 
-def writeSamHeader(sequenceLengths, dictFile, readGroupItems):
+def copyDictFile(lines):
+    for line in lines:
+        if line.startswith("@SQ"):
+            sys.stdout.write(line)
+        elif not line[0] == "@":
+            break
+
+def writeSamHeader(opts, fileNames):
     print "@HD\tVN:1.3\tSO:unsorted"
-    for k in sorted(sequenceLengths, key=karyotypicSortKey):
-        print "@SQ\tSN:%s\tLN:%s" % (k, sequenceLengths[k])
-    if dictFile:
-        for i in fileinput.input(dictFile):
-            if i.startswith("@SQ"): print i,
-            elif not i.startswith("@"): break
-    if readGroupItems:
-        print "@RG\t" + "\t".join(readGroupItems)
+
+    if opts.dictionary:
+        sequenceLengths = dict(readSequenceLengths(fileNames))
+        for k in sorted(sequenceLengths, key=karyotypicSortKey):
+            print "@SQ\tSN:%s\tLN:%s" % (k, sequenceLengths[k])
+
+    if opts.dictfile:
+        if opts.dictfile == "-":
+            copyDictFile(sys.stdin)
+        else:
+            with open(opts.dictfile) as f:
+                copyDictFile(f)
+
+    if opts.readgroup:
+        print "@RG\t" + "\t".join(opts.readgroup.split())
 
 mapqMissing = "255"
 mapqMaximum = "254"
@@ -338,16 +420,22 @@ def cigarParts(beg, alignmentColumns, end):
 
     if end: yield str(end) + "H"
 
-def writeSam(maf, rg):
+def writeSam(readGroup, maf):
     aLine, sLines, qLines, pLines = maf
+    fieldsA, fieldsB = pairOrDie(sLines, "SAM")
+    seqNameA, seqLenA, strandA, letterSizeA, begA, endA, rowA = fieldsA
+    seqNameB, seqLenB, strandB, letterSizeB, begB, endB, rowB = fieldsB
 
-    if isTranslated(sLines):
+    if letterSizeA > 1 or letterSizeB > 1:
         raise Exception("this looks like translated DNA - can't convert to SAM format")
 
+    if strandA != "+":
+        raise Exception("for SAM, the 1st strand in each alignment must be +")
+
     score = None
     evalue = None
     mapq = mapqMissing
-    for i in aLine:
+    for i in aLine.split():
         if i.startswith("score="):
             v = i[6:]
             if v.isdigit(): score = "AS:i:" + v  # it must be an integer
@@ -356,176 +444,195 @@ def writeSam(maf, rg):
         elif i.startswith("mismap="):
             mapq = mapqFromProb(i[7:])
 
-    if len(sLines) != 2:
-        raise Exception("for SAM, each alignment must have 2 sequences")
-    rFields, qFields = sLines
-    s, rName, rStart, rAlnSize, rStrand, rSeqSize, rAlnString = rFields
-    s, qName, qStart, qAlnSize, qStrand, qSeqSize, qAlnString = qFields
-    if rStrand != "+":
-        raise Exception("for SAM, the 1st strand in each alignment must be +")
-    pos = str(int(rStart) + 1)  # convert to 1-based coordinate
+    pos = str(begA + 1)  # convert to 1-based coordinate
 
-    alignmentColumns = zip(rAlnString.upper(), qAlnString.upper())
+    alignmentColumns = zip(rowA.upper(), rowB.upper())
 
-    qStart = int(qStart)
-    qRevStart = int(qSeqSize) - qStart - int(qAlnSize)
-    cigar = "".join(cigarParts(qStart, iter(alignmentColumns), qRevStart))
+    revBegB = seqLenB - endB
+    cigar = "".join(cigarParts(begB, iter(alignmentColumns), revBegB))
 
-    seq = deleted(qAlnString, "-")
+    seq = rowB.translate(None, "-")
 
-    if qLines and qLines[-1][1] == qName:
-        qual = ''.join(j
-                       for i, j in izip(qAlnString, qLines[-1][2]) if i != "-")
-    else:
-        qual = "*"
+    qual = "*"
+    if qLines:
+        qFields = qLines[-1].split()
+        if qFields[1] == seqNameB:
+            qual = ''.join(j for i, j in izip(rowB, qFields[2]) if i != "-")
 
     # It's hard to get all the pair info, so this is very
     # incomplete, but hopefully good enough.
     # I'm not sure whether to add 2 and/or 8 to flag.
-    if qName.endswith("/1"):
-        qName = qName[:-2]
-        if qStrand == "+": flag = "99"  # 1 + 2 + 32 + 64
+    if seqNameB.endswith("/1"):
+        seqNameB = seqNameB[:-2]
+        if strandB == "+": flag = "99"  # 1 + 2 + 32 + 64
         else:              flag = "83"  # 1 + 2 + 16 + 64
-    elif qName.endswith("/2"):
-        qName = qName[:-2]
-        if qStrand == "+": flag = "163"  # 1 + 2 + 32 + 128
+    elif seqNameB.endswith("/2"):
+        seqNameB = seqNameB[:-2]
+        if strandB == "+": flag = "163"  # 1 + 2 + 32 + 128
         else:              flag = "147"  # 1 + 2 + 16 + 128
     else:
-        if qStrand == "+": flag = "0"
+        if strandB == "+": flag = "0"
         else:              flag = "16"
 
-    editDistance = sum(1 for x, y in alignmentColumns if x != y)
+    editDistance = sum(x != y for x, y in alignmentColumns)
     # no special treatment of ambiguous bases: might be a minor bug
     editDistance = "NM:i:" + str(editDistance)
 
-    outWords = [qName, flag, rName, pos, mapq, cigar, "*\t0\t0", seq, qual]
-    outWords.append(editDistance)
-    if score: outWords.append(score)
-    if evalue: outWords.append(evalue)
-    if rg: outWords.append(rg)
-    print "\t".join(outWords)
+    out = [seqNameB, flag, seqNameA, pos, mapq, cigar, "*\t0\t0", seq, qual]
+    out.append(editDistance)
+    if score: out.append(score)
+    if evalue: out.append(evalue)
+    if readGroup: out.append(readGroup)
+    print "\t".join(out)
+
+def mafConvertToSam(opts, lines):
+    readGroup = ""
+    if opts.readgroup:
+        readGroup = "RG:Z:" + readGroupId(opts.readgroup.split())
+    for maf in mafInput(opts, lines):
+        writeSam(readGroup, maf)
 
 ##### Routines for converting to BLAST-like format: #####
 
 def pairwiseMatchSymbol(alignmentColumn):
-    if isMatch(alignmentColumn): return "|"
-    else: return " "
+    if isMatch(alignmentColumn):
+        return "|"
+    else:
+        return " "
 
 def strandText(strand):
-    if strand == "+": return "Plus"
-    else: return "Minus"
+    if strand == "+":
+        return "Plus"
+    else:
+        return "Minus"
 
-def blastCoordinate(oneBasedCoordinate, strand, seqSize):
-    if strand == "-":
-        oneBasedCoordinate = seqSize - oneBasedCoordinate + 1
-    return str(oneBasedCoordinate)
+def blastBegCoordinate(zeroBasedCoordinate, strand, seqLen):
+    if strand == "+":
+        return str(zeroBasedCoordinate + 1)
+    else:
+        return str(seqLen - zeroBasedCoordinate)
+
+def blastEndCoordinate(zeroBasedCoordinate, strand, seqLen):
+    if strand == "+":
+        return str(zeroBasedCoordinate)
+    else:
+        return str(seqLen - zeroBasedCoordinate + 1)
+
+def nextCoordinate(coordinate, row, letterSize):
+    return coordinate + insertSize(row, letterSize)
 
 def chunker(things, chunkSize):
     for i in range(0, len(things), chunkSize):
         yield things[i:i+chunkSize]
 
-def blastChunker(maf, lineSize, alignmentColumns):
-    letterSizes = mafLetterSizes(maf)
-    coords = maf.alnStarts
+def blastChunker(sLines, lineSize, alignmentColumns):
+    seqLens = [i[1] for i in sLines]
+    strands = [i[2] for i in sLines]
+    letterSizes = [i[3] for i in sLines]
+    coords = [i[4] for i in sLines]
     for chunkCols in chunker(alignmentColumns, lineSize):
-        chunkRows = zip(*chunkCols)
-        chunkRows = map(''.join, chunkRows)
-        starts = [i + 1 for i in coords]  # change to 1-based coordinates
-        starts = map(blastCoordinate, starts, maf.strands, maf.seqSizes)
-        increments = map(insertionSize, chunkRows, letterSizes)
-        coords = map(operator.add, coords, increments)
-        ends = map(blastCoordinate, coords, maf.strands, maf.seqSizes)
-        yield chunkCols, chunkRows, starts, ends
-
-def writeBlast(maf, opts, oldQueryName):
-    dieUnlessPairwise(maf)
-
-    if maf.seqNames[1] != oldQueryName:
-        print "Query= %s" % maf.seqNames[1]
-        print "         (%s letters)" % maf.seqSizes[1]
+        chunkRows = list(alignmentRowsFromColumns(chunkCols))
+        begs = map(blastBegCoordinate, coords, strands, seqLens)
+        coords = map(nextCoordinate, coords, chunkRows, letterSizes)
+        ends = map(blastEndCoordinate, coords, strands, seqLens)
+        yield chunkCols, chunkRows, begs, ends
+
+def writeBlast(opts, maf, oldQueryName):
+    aLine, sLines, qLines, pLines = maf
+    fieldsA, fieldsB = pairOrDie(sLines, "Blast")
+    seqNameA, seqLenA, strandA, letterSizeA, begA, endA, rowA = fieldsA
+    seqNameB, seqLenB, strandB, letterSizeB, begB, endB, rowB = fieldsB
+
+    if seqNameB != oldQueryName:
+        print "Query= " + seqNameB
+        print "         (%s letters)" % seqLenB
         print
 
-    print ">%s" % maf.seqNames[0]
-    print "          Length = %s" % maf.seqSizes[0]
+    print ">" + seqNameA
+    print "          Length = %s" % seqLenA
     print
 
-    score = maf.namesAndValues["score"]
-    if opts.bitScoreA is not None and opts.bitScoreB is not None:
+    score, evalue = scoreAndEvalue(aLine)
+
+    if score and opts.bitScoreA is not None and opts.bitScoreB is not None:
         bitScore = opts.bitScoreA * float(score) - opts.bitScoreB
         scoreLine = " Score = %.3g bits (%s)" % (bitScore, score)
     else:
         scoreLine = " Score = %s" % score
-    try: scoreLine += ", Expect = %s" % maf.namesAndValues["E"]
-    except KeyError: pass
+
+    if evalue:
+        scoreLine += ", Expect = %s" % evalue
+
     print scoreLine
 
-    alignmentColumns = zip(*maf.alnStrings)
+    alignmentColumns = zip(rowA, rowB)
 
     alnSize = len(alignmentColumns)
-    matches = quantify(alignmentColumns, isMatch)
+    matches = sum(x.upper() == y.upper() for x, y in alignmentColumns)
     matchPercent = 100 * matches // alnSize  # round down, like BLAST
     identLine = " Identities = %s/%s (%s%%)" % (matches, alnSize, matchPercent)
-    gaps = alnSize - quantify(alignmentColumns, isGapless)
-    gapPercent = 100 * gaps // alnSize  # round down, like BLAST
-    if gaps: identLine += ", Gaps = %s/%s (%s%%)" % (gaps, alnSize, gapPercent)
+    gaps = rowA.count("-") + rowB.count("-")
+    if gaps:
+        gapPercent = 100 * gaps // alnSize  # round down, like BLAST
+        identLine += ", Gaps = %s/%s (%s%%)" % (gaps, alnSize, gapPercent)
     print identLine
 
-    strands = map(strandText, maf.strands)
-    print " Strand = %s / %s" % (strands[1], strands[0])
+    print " Strand = %s / %s" % (strandText(strandB), strandText(strandA))
     print
 
-    for chunk in blastChunker(maf, opts.linesize, alignmentColumns):
-        cols, rows, starts, ends = chunk
-        startWidth = maxlen(starts)
-        matchSymbols = map(pairwiseMatchSymbol, cols)
-        matchSymbols = ''.join(matchSymbols)
-        print "Query: %-*s %s %s" % (startWidth, starts[1], rows[1], ends[1])
-        print "       %-*s %s"    % (startWidth, " ", matchSymbols)
-        print "Sbjct: %-*s %s %s" % (startWidth, starts[0], rows[0], ends[0])
+    for chunk in blastChunker(sLines, opts.linesize, alignmentColumns):
+        cols, rows, begs, ends = chunk
+        begWidth = maxlen(begs)
+        matchSymbols = ''.join(map(pairwiseMatchSymbol, cols))
+        print "Query: %-*s %s %s" % (begWidth, begs[1], rows[1], ends[1])
+        print "       %-*s %s"    % (begWidth, " ", matchSymbols)
+        print "Sbjct: %-*s %s %s" % (begWidth, begs[0], rows[0], ends[0])
         print
 
-def blastEndsFromMafLine(fields):
-    beg = int(fields[2])
-    span = int(fields[3])
-    if fields[4] == "+":
-        return beg + 1, beg + span
+def mafConvertToBlast(opts, lines):
+    oldQueryName = ""
+    for maf in mafInput(opts, lines):
+        writeBlast(opts, maf, oldQueryName)
+        sLines = maf[1]
+        oldQueryName = sLines[1][0]
+
+def blastDataFromMafFields(fields):
+    seqName, seqLen, strand, letterSize, beg, end, row = fields
+    if strand == "+":
+        beg += 1
     else:
-        seqSize = int(fields[5])
-        beg2 = seqSize - beg
-        return beg2, beg2 - span + 1
+        beg = seqLen - beg
+        end = seqLen - end + 1
+    return seqName, str(beg), str(end), row.upper()
 
-def writeBlastTab(maf, opts):
+def writeBlastTab(opts, maf):
     aLine, sLines, qLines, pLines = maf
+    fieldsA, fieldsB = pairOrDie(sLines, "BlastTab")
+    seqNameA, begA, endA, rowA = blastDataFromMafFields(fieldsA)
+    seqNameB, begB, endB, rowB = blastDataFromMafFields(fieldsB)
 
-    score = None
-    evalue = None
-    for i in aLine:
-        if   i.startswith("score="): score = i[6:]
-        elif i.startswith("E="):     evalue = i[2:]
-
-    sLine, qLine = sLines
-    alnStrings = [i[6] for i in sLines]
-    alignmentColumns = zip(*alnStrings)
+    alignmentColumns = zip(rowA, rowB)
     alnSize = len(alignmentColumns)
-    matches = quantify(alignmentColumns, isMatch)
+    matches = sum(x == y for x, y in alignmentColumns)
     matchPercent = "%.2f" % (100.0 * matches / alnSize)
-    mismatches = quantify(alignmentColumns, isGapless) - matches
-    gapOpens = sum(map(gapRunCount, alnStrings))
-
-    qName = qLine[1]
-    qBeg, qEnd = blastEndsFromMafLine(qLine)
+    mismatches = alnSize - matches - rowA.count("-") - rowB.count("-")
+    gapOpens = gapRunCount(rowA) + gapRunCount(rowB)
 
-    sName = sLine[1]
-    sBeg, sEnd = blastEndsFromMafLine(sLine)
+    out = [seqNameB, seqNameA, matchPercent, str(alnSize), str(mismatches),
+           str(gapOpens), begB, endB, begA, endA]
 
-    out = [qName, sName, matchPercent, alnSize, mismatches, gapOpens,
-           qBeg, qEnd, sBeg, sEnd]
+    score, evalue = scoreAndEvalue(aLine)
     if evalue:
         out.append(evalue)
         if score and opts.bitScoreA is not None and opts.bitScoreB is not None:
             bitScore = opts.bitScoreA * float(score) - opts.bitScoreB
             out.append("%.3g" % bitScore)
-    print joined(out, "\t")
+
+    print "\t".join(out)
+
+def mafConvertToBlastTab(opts, lines):
+    for maf in mafInput(opts, lines):
+        writeBlastTab(opts, maf)
 
 ##### Routines for converting to HTML format: #####
 
@@ -586,110 +693,102 @@ def multipleMatchSymbol(alignmentColumn):
     if isMatch(alignmentColumn): return "*"
     else: return " "
 
-def writeHtml(maf, lineSize):
+def writeHtml(opts, maf):
+    aLine, sLines, qLines, pLines = maf
+
     scoreLine = "Alignment"
-    try:
-        scoreLine += " score=%s" % maf.namesAndValues["score"]
-        scoreLine += ", expect=%s" % maf.namesAndValues["E"]
-    except KeyError: pass
+    score, evalue = scoreAndEvalue(aLine)
+    if score:
+        scoreLine += " score=" + score
+        if evalue:
+            scoreLine += ", expect=" + evalue
     print "<h3>%s:</h3>" % scoreLine
 
-    if maf.pLines:
-        probRows = [i[1] for i in maf.pLines]
+    if pLines:
+        probRows = [i.split()[1] for i in pLines]
         probCols = izip(*probRows)
         classes = imap(probabilityClass, probCols)
     else:
         classes = repeat(None)
 
-    nameWidth = maxlen(maf.seqNames)
-    alignmentColumns = zip(*maf.alnStrings)
+    seqNames = [i[0] for i in sLines]
+    nameWidth = maxlen(seqNames)
+    rows = [i[6] for i in sLines]
+    alignmentColumns = zip(*rows)
 
     print '<pre>'
-    for chunk in blastChunker(maf, lineSize, alignmentColumns):
-        cols, rows, starts, ends = chunk
-        startWidth = maxlen(starts)
+    for chunk in blastChunker(sLines, opts.linesize, alignmentColumns):
+        cols, rows, begs, ends = chunk
+        begWidth = maxlen(begs)
         endWidth = maxlen(ends)
-        matchSymbols = map(multipleMatchSymbol, cols)
-        matchSymbols = ''.join(matchSymbols)
-        classChunk = islice(classes, lineSize)
+        matchSymbols = ''.join(map(multipleMatchSymbol, cols))
+        classChunk = islice(classes, opts.linesize)
         classRuns = list(identicalRuns(classChunk))
-        for n, s, r, e in zip(maf.seqNames, starts, rows, ends):
+        for n, b, r, e in zip(seqNames, begs, rows, ends):
             spans = [htmlSpan(r, i) for i in classRuns]
             spans = ''.join(spans)
-            formatParams = nameWidth, n, startWidth, s, spans, endWidth, e
+            formatParams = nameWidth, n, begWidth, b, spans, endWidth, e
             print '%-*s %*s %s %*s' % formatParams
-        print ' ' * nameWidth, ' ' * startWidth, matchSymbols
+        print ' ' * nameWidth, ' ' * begWidth, matchSymbols
         print
     print '</pre>'
 
-##### Routines for reading MAF format: #####
+def mafConvertToHtml(opts, lines):
+    for maf in mafInput(opts, lines):
+        writeHtml(opts, maf)
 
-def mafInput(lines, opts, isKeepComments):
-    a = []
-    s = []
-    q = []
-    p = []
-    for i in lines:
-        w = i.split()
-        if not w:
-            if a: yield a, s, q, p
-            a = []
-            s = []
-            q = []
-            p = []
-        elif w[0] == "a":
-            a = w
-        elif w[0] == "s":
-            s.append(w)
-        elif w[0] == "q":
-            q.append(w)
-        elif w[0] == "p":
-            p.append(w)
-        elif i[0] == "#":
-            for j in w:
-                try:
-                    k, v = j.split("=")
-                    x = float(v)
-                    if k == "lambda": opts.bitScoreA = x / math.log(2)
-                    if k == "K":      opts.bitScoreB = math.log(x, 2)
-                except ValueError: pass
-            if isKeepComments: print i,
-    if a: yield a, s, q, p
+##### Main program: #####
 
 def isFormat(myString, myFormat):
     return myFormat.startswith(myString)
 
+def mafConvertOneFile(opts, formatName, lines):
+    if   isFormat(formatName, "axt"):
+        mafConvertToAxt(opts, lines)
+    elif isFormat(formatName, "blast"):
+        mafConvertToBlast(opts, lines)
+    elif isFormat(formatName, "blasttab"):
+        mafConvertToBlastTab(opts, lines)
+    elif isFormat(formatName, "html"):
+        mafConvertToHtml(opts, lines)
+    elif isFormat(formatName, "psl"):
+        mafConvertToPsl(opts, lines)
+    elif isFormat(formatName, "sam"):
+        mafConvertToSam(opts, lines)
+    elif isFormat(formatName, "tabular"):
+        mafConvertToTab(opts, lines)
+    else:
+        raise Exception("unknown format: " + formatName)
+
 def mafConvert(opts, args):
-    format = args[0].lower()
-    if isFormat(format, "sam"):
-        if opts.dictionary: d = readSequenceLengths(fileinput.input(args[1:]))
-        else: d = {}
-        if opts.readgroup:
-            readGroupItems = opts.readgroup.split()
-            rg = "RG:Z:" + readGroupId(readGroupItems)
-        else:
-            readGroupItems = []
-            rg = ""
-        if not opts.noheader: writeSamHeader(d, opts.dictfile, readGroupItems)
-    inputLines = fileinput.input(args[1:])
-    if isFormat(format, "html") and not opts.noheader: writeHtmlHeader()
-    isKeepCommentLines = isFormat(format, "tabular") and not opts.noheader
+    formatName = args[0].lower()
+    fileNames = args[1:]
+
+    opts.isKeepComments = False
     opts.bitScoreA = None
     opts.bitScoreB = None
-    oldQueryName = ""
-    for maf in mafInput(inputLines, opts, isKeepCommentLines):
-        if   isFormat(format, "axt"): writeAxt(Maf(*maf))
-        elif isFormat(format, "blast"):
-            maf = Maf(*maf)
-            writeBlast(maf, opts, oldQueryName)
-            oldQueryName = maf.seqNames[1]
-        elif isFormat(format, "blasttab"): writeBlastTab(maf, opts)
-        elif isFormat(format, "html"): writeHtml(Maf(*maf), opts.linesize)
-        elif isFormat(format, "psl"): writePsl(Maf(*maf), opts.protein)
-        elif isFormat(format, "sam"): writeSam(maf, rg)
-        elif isFormat(format, "tabular"): writeTab(maf)
-        else: raise Exception("unknown format: " + format)
-    if isFormat(format, "html") and not opts.noheader: print "</body></html>"
+
+    if not opts.noheader:
+        if isFormat(formatName, "html"):
+            writeHtmlHeader()
+        if isFormat(formatName, "sam"):
+            writeSamHeader(opts, fileNames)
+        if isFormat(formatName, "tabular"):
+            opts.isKeepComments = True
+
+    if fileNames:
+        for i in fileNames:
+            if i == "-":
+                mafConvertOneFile(opts, formatName, sys.stdin)
+            else:
+                with open(i) as f:
+                    mafConvertOneFile(opts, formatName, f)
+    else:
+        mafConvertOneFile(opts, formatName, sys.stdin)
+
+    if not opts.noheader:
+        if isFormat(formatName, "html"):
+            print "</body></html>"
 
 if __name__ == "__main__":
     signal.signal(signal.SIGPIPE, signal.SIG_DFL)  # avoid silly error message
@@ -709,6 +808,8 @@ if __name__ == "__main__":
     op = optparse.OptionParser(usage=usage, description=description)
     op.add_option("-p", "--protein", action="store_true",
                   help="assume protein alignments, for psl match counts")
+    op.add_option("-j", "--join", type="float", metavar="N",
+                  help="join co-linear alignments separated by <= N letters")
     op.add_option("-n", "--noheader", action="store_true",
                   help="omit any header lines from the output")
     op.add_option("-d", "--dictionary", action="store_true",
@@ -719,7 +820,7 @@ if __name__ == "__main__":
                   help="read group info for sam format")
     op.add_option("-l", "--linesize", type="int", default=60, #metavar="CHARS",
                   help="line length for blast and html formats (default: %default)")
-    (opts, args) = op.parse_args()
+    opts, args = op.parse_args()
     if opts.linesize <= 0: op.error("option -l: should be >= 1")
     if opts.dictionary and opts.dictfile: op.error("can't use both -d and -f")
     if len(args) < 1: op.error("I need a format-name and some MAF alignments")
diff --git a/src/LastalArguments.cc b/src/LastalArguments.cc
index f9b2dbf..0a16f11 100644
--- a/src/LastalArguments.cc
+++ b/src/LastalArguments.cc
@@ -477,6 +477,8 @@ void LastalArguments::setDefaultsFromAlphabet( bool isDna, bool isProtein,
       batchSize = 0x100000;   // 1 Mbyte
     else if( outputType == 0 )
       batchSize = 0x1000000;  // 16 Mbytes
+    else if( !isVolumes )
+      batchSize = 0x800000;   // 8 Mbytes
     else if( inputFormat == sequenceFormat::prb )
       batchSize = 0x2000000;  // 32 Mbytes (?)
     else
diff --git a/src/split/cbrc_split_aligner.cc b/src/split/cbrc_split_aligner.cc
index af29ae0..c395a5f 100644
--- a/src/split/cbrc_split_aligner.cc
+++ b/src/split/cbrc_split_aligner.cc
@@ -129,54 +129,51 @@ int SplitAligner::calcSpliceScore(double dist) const {
     return std::floor(scale * s + 0.5);
 }
 
-// The dinucleotide immediately downstream of the given coordinate
-unsigned SplitAligner::spliceBegSignal(unsigned coordinate,
-				       char strand) const {
-  if (strand == '+') {
-    const uchar *genomeBeg = genome.seqReader();
-    const uchar *p = genomeBeg + coordinate;
-    unsigned n1 = alphabet.numbersToUppercase[*p];
-    if (n1 >= 4) return 16;
-    unsigned n2 = alphabet.numbersToUppercase[*(p + 1)];
-    if (n2 >= 4) return 16;
-    return n1 * 4 + n2;
-  } else {
-    const uchar *genomeEnd = genome.seqReader() + genome.finishedSize();
-    const uchar *p = genomeEnd - coordinate;
-    unsigned n1 = alphabet.numbersToUppercase[*(p - 1)];
-    if (n1 >= 4) return 16;
-    unsigned n2 = alphabet.numbersToUppercase[*(p - 2)];
-    if (n2 >= 4) return 16;
-    return 15 - (n1 * 4 + n2);  // reverse-complement
-  }
+// The dinucleotide immediately downstream on the forward strand
+static unsigned spliceBegSignalFwd(const uchar *seqPtr,
+				   const uchar *toUnmasked) {
+  unsigned n1 = toUnmasked[*seqPtr];
+  if (n1 >= 4) return 16;
+  unsigned n2 = toUnmasked[*(seqPtr + 1)];
+  if (n2 >= 4) return 16;
+  return n1 * 4 + n2;
 }
 
-// The dinucleotide immediately upstream of the given coordinate
-unsigned SplitAligner::spliceEndSignal(unsigned coordinate,
-				       char strand) const {
-  if (strand == '+') {
-    const uchar *genomeBeg = genome.seqReader();
-    const uchar *p = genomeBeg + coordinate;
-    unsigned n2 = alphabet.numbersToUppercase[*(p - 1)];
-    if (n2 >= 4) return 16;
-    unsigned n1 = alphabet.numbersToUppercase[*(p - 2)];
-    if (n1 >= 4) return 16;
-    return n1 * 4 + n2;
-  } else {
-    const uchar *genomeEnd = genome.seqReader() + genome.finishedSize();
-    const uchar *p = genomeEnd - coordinate;
-    unsigned n2 = alphabet.numbersToUppercase[*p];
-    if (n2 >= 4) return 16;
-    unsigned n1 = alphabet.numbersToUppercase[*(p + 1)];
-    if (n1 >= 4) return 16;
-    return 15 - (n1 * 4 + n2);  // reverse-complement
-  }
+// The dinucleotide immediately downstream on the reverse strand
+static unsigned spliceBegSignalRev(const uchar *seqPtr,
+				   const uchar *toUnmasked) {
+  unsigned n1 = toUnmasked[*(seqPtr - 1)];
+  if (n1 >= 4) return 16;
+  unsigned n2 = toUnmasked[*(seqPtr - 2)];
+  if (n2 >= 4) return 16;
+  return 15 - (n1 * 4 + n2);  // reverse-complement
+}
+
+// The dinucleotide immediately upstream on the forward strand
+static unsigned spliceEndSignalFwd(const uchar *seqPtr,
+				   const uchar *toUnmasked) {
+  unsigned n2 = toUnmasked[*(seqPtr - 1)];
+  if (n2 >= 4) return 16;
+  unsigned n1 = toUnmasked[*(seqPtr - 2)];
+  if (n1 >= 4) return 16;
+  return n1 * 4 + n2;
+}
+
+// The dinucleotide immediately upstream on the reverse strand
+static unsigned spliceEndSignalRev(const uchar *seqPtr,
+				   const uchar *toUnmasked) {
+  unsigned n2 = toUnmasked[*seqPtr];
+  if (n2 >= 4) return 16;
+  unsigned n1 = toUnmasked[*(seqPtr + 1)];
+  if (n1 >= 4) return 16;
+  return 15 - (n1 * 4 + n2);  // reverse-complement
 }
 
 unsigned SplitAligner::findScore(unsigned j, long score) const {
   for (unsigned i = 0; i < numAlns; ++i) {
     if (dpBeg(i) >= j || dpEnd(i) < j) continue;
-    if (cell(Vmat, i, j) + spliceBegScore(i, j) == score) return i;
+    size_t ij = matrixRowOrigins[i] + j;
+    if (Vmat[ij] + spliceBegScore(ij) == score) return i;
   }
   return numAlns;
 }
@@ -184,16 +181,18 @@ unsigned SplitAligner::findScore(unsigned j, long score) const {
 unsigned SplitAligner::findSpliceScore(unsigned i, unsigned j,
 				       long score) const {
     assert(splicePrior > 0.0);
+    size_t ij = matrixRowOrigins[i] + j;
     unsigned iSeq = rnameAndStrandIds[i];
-    unsigned iEnd = cell(spliceEndCoords, i, j);
-    int iScore = spliceEndScore(i, j);
+    unsigned iEnd = spliceEndCoords[ij];
+    int iScore = spliceEndScore(ij);
     for (unsigned k = 0; k < numAlns; k++) {
 	if (rnameAndStrandIds[k] != iSeq) continue;
 	if (dpBeg(k) >= j || dpEnd(k) < j) continue;
-	unsigned kBeg = cell(spliceBegCoords, k, j);
+	size_t kj = matrixRowOrigins[k] + j;
+	unsigned kBeg = spliceBegCoords[kj];
 	if (iEnd <= kBeg) continue;
-	int s = iScore + spliceBegScore(k, j) + spliceScore(iEnd - kBeg);
-	if (cell(Vmat, k, j) + s == score) return k;
+	int s = iScore + spliceBegScore(kj) + spliceScore(iEnd - kBeg);
+	if (Vmat[kj] + s == score) return k;
     }
     return numAlns;
 }
@@ -201,30 +200,29 @@ unsigned SplitAligner::findSpliceScore(unsigned i, unsigned j,
 long SplitAligner::scoreFromSplice(unsigned i, unsigned j,
 				   unsigned oldNumInplay,
 				   unsigned& oldInplayPos) const {
+  size_t ij = matrixRowOrigins[i] + j;
   long score = LONG_MIN;
   unsigned iSeq = rnameAndStrandIds[i];
-  unsigned iEnd = cell(spliceEndCoords, i, j);
+  unsigned iEnd = spliceEndCoords[ij];
 
   for (/* noop */; oldInplayPos < oldNumInplay; ++oldInplayPos) {
     unsigned k = oldInplayAlnIndices[oldInplayPos];
     if (rnameAndStrandIds[k] < iSeq) continue;
-    if (rnameAndStrandIds[k] > iSeq) return score;
-    if (rBegs[k] >= iEnd) return score;
-    unsigned kBeg = cell(spliceBegCoords, k, j);
+    if (rnameAndStrandIds[k] > iSeq || rBegs[k] >= iEnd) return score;
+    size_t kj = matrixRowOrigins[k] + j;
+    unsigned kBeg = spliceBegCoords[kj];
     if (kBeg >= rBegs[i] || rBegs[i] - kBeg <= maxSpliceDist) break;
   }
 
-  int iScore = spliceEndScore(i, j);
-
   for (unsigned y = oldInplayPos; y < oldNumInplay; ++y) {
     unsigned k = oldInplayAlnIndices[y];
-    if (rnameAndStrandIds[k] > iSeq) break;
-    if (rBegs[k] >= iEnd) break;
-    unsigned kBeg = cell(spliceBegCoords, k, j);
+    if (rnameAndStrandIds[k] > iSeq || rBegs[k] >= iEnd) break;
+    size_t kj = matrixRowOrigins[k] + j;
+    unsigned kBeg = spliceBegCoords[kj];
     if (iEnd <= kBeg) continue;
     if (iEnd - kBeg > maxSpliceDist) continue;
-    int s = iScore + spliceBegScore(k, j) + spliceScore(iEnd - kBeg);
-    score = std::max(score, cell(Vmat, k, j) + s);
+    score = std::max(score,
+		     Vmat[kj] + spliceBegScore(kj) + spliceScore(iEnd - kBeg));
   }
 
   return score;
@@ -319,18 +317,19 @@ long SplitAligner::viterbi() {
 	long sMax = INT_MIN/2;
 	for (unsigned x = 0; x < newNumInplay; ++x) {
 	    unsigned i = newInplayAlnIndices[x];
-	    size_t k = matrixRowOrigins[i] + j;
+	    size_t ij = matrixRowOrigins[i] + j;
 
-	    long s = std::max(Vmat[k] + Dmat[k],
-			      scoreFromJump + spliceEndScore(i, j));
-	    if (restartProb <= 0 && alns[i].qstart == j && s < 0) s = 0;
+	    long s = scoreFromJump;
 	    if (splicePrior > 0.0)
 	      s = std::max(s,
 			   scoreFromSplice(i, j, oldNumInplay, oldInplayPos));
-	    s += Amat[k];
+	    s += spliceEndScore(ij);
+	    s = std::max(s, Vmat[ij] + Dmat[ij]);
+	    if (restartProb <= 0 && alns[i].qstart == j && s < 0) s = 0;
+	    s += Amat[ij];
 
-	    Vmat[k+1] = s;
-	    sMax = std::max(sMax, s + spliceBegScore(i, j+1));
+	    Vmat[ij + 1] = s;
+	    sMax = std::max(sMax, s + spliceBegScore(ij + 1));
 	}
 	maxScore = std::max(sMax, maxScore);
 	cell(Vvec, j+1) = maxScore;
@@ -376,9 +375,9 @@ void SplitAligner::traceBack(long viterbiScore,
   queryEnds.push_back(j);
 
   for (;;) {
-    long score = cell(Vmat, i, j);
     --j;
-    score -= cell(Amat, i, j);
+    size_t ij = matrixRowOrigins[i] + j;
+    long score = Vmat[ij + 1] - Amat[ij];
     if (restartProb <= 0 && alns[i].qstart == j && score == 0) {
       queryBegs.push_back(j);
       return;
@@ -390,10 +389,10 @@ void SplitAligner::traceBack(long viterbiScore,
     // makes some other kinds of boundary less clean.  What's the best
     // procedure for tied scores?
 
-    bool isStay = (score == scoreIndel(i, j));
+    bool isStay = (score == Vmat[ij] + Dmat[ij]);
     if (isStay && alns[i].qstrand == '+') continue;
 
-    long s = score - spliceEndScore(i, j);
+    long s = score - spliceEndScore(ij);
     long t = s - restartScore;
     if (t == cell(Vvec, j)) {
       queryBegs.push_back(j);
@@ -417,8 +416,9 @@ int SplitAligner::segmentScore(unsigned alnNum,
   int score = 0;
   unsigned i = alnNum;
   for (unsigned j = queryBeg; j < queryEnd; ++j) {
-    score += cell(Amat, i, j);
-    if (j > queryBeg) score += cell(Dmat, i, j);
+    size_t ij = matrixRowOrigins[i] + j;
+    score += Amat[ij];
+    if (j > queryBeg) score += Dmat[ij];
   }
   return score;
 }
@@ -426,30 +426,28 @@ int SplitAligner::segmentScore(unsigned alnNum,
 double SplitAligner::probFromSpliceF(unsigned i, unsigned j,
 				     unsigned oldNumInplay,
 				     unsigned& oldInplayPos) const {
+  size_t ij = matrixRowOrigins[i] + j;
   double sum = 0.0;
   unsigned iSeq = rnameAndStrandIds[i];
-  unsigned iEnd = cell(spliceEndCoords, i, j);
+  unsigned iEnd = spliceEndCoords[ij];
 
   for (/* noop */; oldInplayPos < oldNumInplay; ++oldInplayPos) {
     unsigned k = oldInplayAlnIndices[oldInplayPos];
     if (rnameAndStrandIds[k] < iSeq) continue;
-    if (rnameAndStrandIds[k] > iSeq) return sum;
-    if (rBegs[k] >= iEnd) return sum;
-    unsigned kBeg = cell(spliceBegCoords, k, j);
+    if (rnameAndStrandIds[k] > iSeq || rBegs[k] >= iEnd) return sum;
+    size_t kj = matrixRowOrigins[k] + j;
+    unsigned kBeg = spliceBegCoords[kj];
     if (kBeg >= rBegs[i] || rBegs[i] - kBeg <= maxSpliceDist) break;
   }
 
-  double iProb = spliceEndProb(i, j);
-
   for (unsigned y = oldInplayPos; y < oldNumInplay; ++y) {
     unsigned k = oldInplayAlnIndices[y];
-    if (rnameAndStrandIds[k] > iSeq) break;
-    if (rBegs[k] >= iEnd) break;
-    unsigned kBeg = cell(spliceBegCoords, k, j);
+    if (rnameAndStrandIds[k] > iSeq || rBegs[k] >= iEnd) break;
+    size_t kj = matrixRowOrigins[k] + j;
+    unsigned kBeg = spliceBegCoords[kj];
     if (iEnd <= kBeg) continue;
     if (iEnd - kBeg > maxSpliceDist) continue;
-    double p = iProb * spliceBegProb(k, j) * spliceProb(iEnd - kBeg);
-    sum += cell(Fmat, k, j) * p;
+    sum += Fmat[kj] * spliceBegProb(kj) * spliceProb(iEnd - kBeg);
   }
 
   return sum;
@@ -458,30 +456,28 @@ double SplitAligner::probFromSpliceF(unsigned i, unsigned j,
 double SplitAligner::probFromSpliceB(unsigned i, unsigned j,
 				     unsigned oldNumInplay,
 				     unsigned& oldInplayPos) const {
+  size_t ij = matrixRowOrigins[i] + j;
   double sum = 0.0;
   unsigned iSeq = rnameAndStrandIds[i];
-  unsigned iBeg = cell(spliceBegCoords, i, j);
+  unsigned iBeg = spliceBegCoords[ij];
 
   for (/* noop */; oldInplayPos < oldNumInplay; ++oldInplayPos) {
     unsigned k = oldInplayAlnIndices[oldInplayPos];
     if (rnameAndStrandIds[k] < iSeq) continue;
-    if (rnameAndStrandIds[k] > iSeq) return sum;
-    if (rEnds[k] <= iBeg) return sum;
-    unsigned kEnd = cell(spliceEndCoords, k, j);
+    if (rnameAndStrandIds[k] > iSeq || rEnds[k] <= iBeg) return sum;
+    size_t kj = matrixRowOrigins[k] + j;
+    unsigned kEnd = spliceEndCoords[kj];
     if (kEnd <= rEnds[i] || kEnd - rEnds[i] <= maxSpliceDist) break;
   }
 
-  double iProb = spliceBegProb(i, j);
-
   for (unsigned y = oldInplayPos; y < oldNumInplay; ++y) {
     unsigned k = oldInplayAlnIndices[y];
-    if (rnameAndStrandIds[k] > iSeq) break;
-    if (rEnds[k] <= iBeg) break;
-    unsigned kEnd = cell(spliceEndCoords, k, j);
+    if (rnameAndStrandIds[k] > iSeq || rEnds[k] <= iBeg) break;
+    size_t kj = matrixRowOrigins[k] + j;
+    unsigned kEnd = spliceEndCoords[kj];
     if (kEnd <= iBeg) continue;
     if (kEnd - iBeg > maxSpliceDist) continue;
-    double p = iProb * spliceEndProb(k, j) * spliceProb(kEnd - iBeg);
-    sum += cell(Bmat, k, j) * p;
+    sum += Bmat[kj] * spliceEndProb(kj) * spliceProb(kEnd - iBeg);
   }
 
   return sum;
@@ -513,17 +509,19 @@ void SplitAligner::forward() {
 	double rNew = 1.0;
 	for (unsigned x = 0; x < newNumInplay; ++x) {
 	    unsigned i = newInplayAlnIndices[x];
-	    size_t k = matrixRowOrigins[i] + j;
+	    size_t ij = matrixRowOrigins[i] + j;
 
-	    double p = Fmat[k] * Dexp[k] + probFromJump * spliceEndProb(i, j);
-	    if (restartProb <= 0 && alns[i].qstart == j) p += begprob;
+	    double p = probFromJump;
 	    if (splicePrior > 0.0)
 	      p += probFromSpliceF(i, j, oldNumInplay, oldInplayPos);
-	    p = p * Aexp[k] / r;
+	    p *= spliceEndProb(ij);
+	    p += Fmat[ij] * Dexp[ij];
+	    if (restartProb <= 0 && alns[i].qstart == j) p += begprob;
+	    p = p * Aexp[ij] / r;
 
-	    Fmat[k+1] = p;
+	    Fmat[ij + 1] = p;
 	    if (alns[i].qend == j+1) zF += p;
-	    pSum += p * spliceBegProb(i, j+1);
+	    pSum += p * spliceBegProb(ij + 1);
 	    rNew += p;
         }
         begprob /= r;
@@ -559,17 +557,19 @@ void SplitAligner::backward() {
 	double pSum = 0.0;
 	for (unsigned x = 0; x < newNumInplay; ++x) {
 	    unsigned i = newInplayAlnIndices[x];
-	    size_t k = matrixRowOrigins[i] + j;
+	    size_t ij = matrixRowOrigins[i] + j;
 
-	    double p = Bmat[k] * Dexp[k] + probFromJump * spliceBegProb(i, j);
-	    if (restartProb <= 0 && alns[i].qend == j) p += endprob;
+	    double p = probFromJump;
 	    if (splicePrior > 0.0)
 	      p += probFromSpliceB(i, j, oldNumInplay, oldInplayPos);
-	    p = p * Aexp[k-1] / r;
+	    p *= spliceBegProb(ij);
+	    p += Bmat[ij] * Dexp[ij];
+	    if (restartProb <= 0 && alns[i].qend == j) p += endprob;
+	    p = p * Aexp[ij - 1] / r;
 
-	    Bmat[k-1] = p;
+	    Bmat[ij - 1] = p;
 	    //if (alns[i].qstart == j-1) zB += p;
-	    pSum += p * spliceEndProb(i, j-1);
+	    pSum += p * spliceEndProb(ij - 1);
         }
         endprob /= r;
 	sumProb = pSum * restartProb + sumProb / r;
@@ -587,11 +587,12 @@ SplitAligner::marginalProbs(unsigned queryBeg, unsigned alnNum,
     unsigned i = alnNum;
     unsigned j = queryBeg;
     for (unsigned pos = alnBeg; pos < alnEnd; ++pos) {
+	size_t ij = matrixRowOrigins[i] + j;
         if (alns[i].qalign[pos] == '-') {
-            double value = cell(Fmat, i, j) * cell(Bmat, i, j) * cell(Dexp, i, j) / cell(rescales, j);
+            double value = Fmat[ij] * Bmat[ij] * Dexp[ij] / cell(rescales, j);
             output.push_back(value);
         } else {
-            double value = cell(Fmat, i, j+1) * cell(Bmat, i, j) / cell(Aexp, i, j);
+            double value = Fmat[ij + 1] * Bmat[ij] / Aexp[ij];
             if (value != value) value = 0.0;
             output.push_back(value);
             j++;
@@ -600,7 +601,7 @@ SplitAligner::marginalProbs(unsigned queryBeg, unsigned alnNum,
     return output;
 }
 
-// The next 2 routines represent affine gap scores in a cunning way.
+// The next routine represents affine gap scores in a cunning way.
 // Amat holds scores at query bases, and at every base that is aligned
 // to a gap it gets a score of insExistenceScore + insExtensionScore.
 // Dmat holds scores between query bases, and between every pair of
@@ -609,149 +610,133 @@ SplitAligner::marginalProbs(unsigned queryBeg, unsigned alnNum,
 // if we jump from one alignment to another in the middle of a gap.
 
 void SplitAligner::calcBaseScores(unsigned i) {
-  int firstGapScore = insExistenceScore + insExtensionScore;
+  const int firstInsScore = insExistenceScore + insExtensionScore;
+  const int tweenInsScore = -insExistenceScore;
+
   const UnsplitAlignment& a = alns[i];
+  const size_t origin = matrixRowOrigins[i];
 
-  int *b = &cell(Amat, i, dpBeg(i));
-  int *s = &cell(Amat, i, a.qstart);
-  int *f = &cell(Amat, i, a.qend);
-  int *e = &cell(Amat, i, dpEnd(i));
+  int *AmatB = &Amat[origin + dpBeg(i)];
+  int *DmatB = &Dmat[origin + dpBeg(i)];
+  int *AmatS = &Amat[origin + a.qstart];
+  int *AmatE = &Amat[origin + dpEnd(i)];
 
-  while (b < s) *b++ = firstGapScore;
+  int delScore = 0;
+  int insCompensationScore = 0;
+
+  // treat any query letters before the alignment as insertions:
+  while (AmatB < AmatS) {
+    *AmatB++ = firstInsScore;
+    *DmatB++ = delScore + insCompensationScore;
+    delScore = 0;
+    insCompensationScore = tweenInsScore;
+  }
 
   const char *rAlign = a.ralign;
   const char *qAlign = a.qalign;
   const char *qQual = a.qQual;
 
-  while (b < f) {
+  while (*qAlign) {
     unsigned char x = *rAlign++;
     unsigned char y = *qAlign++;
     int q = qQual ? (*qQual++ - qualityOffset) : (numQualCodes - 1);
-    if (y == '-') /* noop */;
-    else if (x == '-') *b++ = firstGapScore;
-    else {
+    if (x == '-') {  // gap in reference sequence: insertion
+      *AmatB++ = firstInsScore;
+      *DmatB++ = delScore + insCompensationScore;
+      delScore = 0;
+      insCompensationScore = tweenInsScore;
+    } else if (y == '-') {  // gap in query sequence: deletion
+      if (delScore == 0) delScore = gapExistenceScore;
+      delScore += gapExtensionScore;
+      insCompensationScore = 0;
+    } else {
       assert(q >= 0);
       if (q >= numQualCodes) q = numQualCodes - 1;
-      *b++ = score_mat[x % 64][y % 64][q];
+      *AmatB++ = score_mat[x % 64][y % 64][q];
+      *DmatB++ = delScore;
+      delScore = 0;
+      insCompensationScore = 0;
     }
     // Amazingly, in ASCII, '.' equals 'n' mod 64.
     // So '.' will get the same scores as 'n'.
   }
 
-  while (b < e) *b++ = firstGapScore;
-}
-
-void SplitAligner::calcInsScores(unsigned i) {
-  const UnsplitAlignment& a = alns[i];
-  bool isExt = false;
-
-  int *b = &cell(Dmat, i, dpBeg(i));
-  int *s = &cell(Dmat, i, a.qstart);
-  int *e = &cell(Dmat, i, dpEnd(i));
-
-  while (b < s) {
-    *b++ = (isExt ? -insExistenceScore : 0);
-    isExt = true;
+  // treat any query letters after the alignment as insertions:
+  while (AmatB < AmatE) {
+    *AmatB++ = firstInsScore;
+    *DmatB++ = delScore + insCompensationScore;
+    delScore = 0;
+    insCompensationScore = tweenInsScore;
   }
 
-  const char *rAlign = a.ralign;
-  const char *qAlign = a.qalign;
-
-  while (*qAlign) {
-    bool isDel = (*qAlign++ == '-');
-    bool isIns = (*rAlign++ == '-');
-    if (!isDel) *b++ = (isIns && isExt ? -insExistenceScore : 0);
-    isExt = isIns;
-  }
-
-  while (b < e) {
-    *b++ = (isExt ? -insExistenceScore : 0);
-    isExt = true;
-  }
-
-  *b++ = 0;
-}
-
-void SplitAligner::calcDelScores(unsigned i) {
-  const UnsplitAlignment& a = alns[i];
-  int *b = &cell(Dmat, i, a.qstart);
-  const char *qAlign = a.qalign;
-  int delScore = 0;
-  while (*qAlign) {
-    if (*qAlign++ == '-') {  // deletion in query
-      if (delScore == 0) delScore = gapExistenceScore;
-      delScore += gapExtensionScore;
-    } else {
-      *b++ += delScore;
-      delScore = 0;
-    }
-  }
-  *b++ += delScore;
+  *DmatB++ = delScore;
 }
 
-void SplitAligner::calcScoreMatrices() {
-  resizeMatrix(Amat);
-  resizeMatrix(Dmat);
-
-  for (unsigned i = 0; i < numAlns; i++) {
-    calcBaseScores(i);
-    calcInsScores(i);
-    calcDelScores(i);
+void SplitAligner::initRbegsAndEnds() {
+  for (unsigned i = 0; i < numAlns; ++i) {
+    const UnsplitAlignment& a = alns[i];
+    rBegs[i] = a.rstart;
+    rEnds[i] = a.rend;
   }
 }
 
-void SplitAligner::initSpliceCoords() {
-  resizeMatrix(spliceBegCoords);
-  resizeMatrix(spliceEndCoords);
-
-  for (unsigned i = 0; i < numAlns; ++i) {
-    const UnsplitAlignment& a = alns[i];
-    unsigned j = dpBeg(i);
-    unsigned k = a.rstart;
-
-    if (!chromosomeIndex.empty()) {
-      StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
-      if (f == chromosomeIndex.end())
-	err("can't find " + std::string(a.rname) + " in the genome");
-      unsigned c = f->second;
-      if (a.qstrand == '+') k += genome.seqBeg(c);
-      else                  k += genome.finishedSize() - genome.seqEnd(c);
-    }
+void SplitAligner::initSpliceCoords(unsigned i) {
+  const UnsplitAlignment& a = alns[i];
+  unsigned j = dpBeg(i);
+  unsigned k = a.rstart;
 
-    rBegs[i] = k;
+  cell(spliceBegCoords, i, j) = k;
+  while (j < a.qstart) {
+    cell(spliceEndCoords, i, j) = k;
+    ++j;
     cell(spliceBegCoords, i, j) = k;
-    while (j < a.qstart) {
-      cell(spliceEndCoords, i, j) = k;
-      ++j;
-      cell(spliceBegCoords, i, j) = k;
-    }
-    for (unsigned x = 0; a.ralign[x]; ++x) {
-      if (a.qalign[x] != '-') cell(spliceEndCoords, i, j) = k;
-      if (a.qalign[x] != '-') ++j;
-      if (a.ralign[x] != '-') ++k;
-      if (a.qalign[x] != '-') cell(spliceBegCoords, i, j) = k;
-    }
-    while (j < dpEnd(i)) {
-      cell(spliceEndCoords, i, j) = k;
-      ++j;
-      cell(spliceBegCoords, i, j) = k;
-    }
+  }
+  for (unsigned x = 0; a.ralign[x]; ++x) {
+    if (a.qalign[x] != '-') cell(spliceEndCoords, i, j) = k;
+    if (a.qalign[x] != '-') ++j;
+    if (a.ralign[x] != '-') ++k;
+    if (a.qalign[x] != '-') cell(spliceBegCoords, i, j) = k;
+  }
+  while (j < dpEnd(i)) {
     cell(spliceEndCoords, i, j) = k;
-    rEnds[i] = k;
+    ++j;
+    cell(spliceBegCoords, i, j) = k;
   }
+  cell(spliceEndCoords, i, j) = k;
+
+  assert(k == a.rend);  // xxx
 }
 
-void SplitAligner::initSpliceSignals() {
-  resizeMatrix(spliceBegSignals);
-  resizeMatrix(spliceEndSignals);
+void SplitAligner::initSpliceSignals(unsigned i) {
+  const uchar *toUnmasked = alphabet.numbersToUppercase;
+  const UnsplitAlignment& a = alns[i];
 
-  for (unsigned i = 0; i < numAlns; ++i) {
-    char strand = alns[i].qstrand;
-    for (unsigned j = dpBeg(i); j <= dpEnd(i); ++j) {
-      cell(spliceBegSignals, i, j) =
-	spliceBegSignal(cell(spliceBegCoords, i, j), strand);
-      cell(spliceEndSignals, i, j) =
-	spliceEndSignal(cell(spliceEndCoords, i, j), strand);
+  StringNumMap::const_iterator f = chromosomeIndex.find(a.rname);
+  if (f == chromosomeIndex.end())
+    err("can't find " + std::string(a.rname) + " in the genome");
+  unsigned v = f->second % maxGenomeVolumes();
+  unsigned c = f->second / maxGenomeVolumes();
+  const uchar *chromBeg = genome[v].seqReader() + genome[v].seqBeg(c);
+  const uchar *chromEnd = genome[v].seqReader() + genome[v].seqEnd(c);
+  if (a.rend > chromEnd - chromBeg)
+    err("alignment beyond the end of " + std::string(a.rname));
+
+  size_t rowBeg = matrixRowOrigins[i] + dpBeg(i);
+  const unsigned *begCoords = &spliceBegCoords[rowBeg];
+  const unsigned *endCoords = &spliceEndCoords[rowBeg];
+  unsigned char *begSignals = &spliceBegSignals[rowBeg];
+  unsigned char *endSignals = &spliceEndSignals[rowBeg];
+  unsigned dpLen = dpEnd(i) - dpBeg(i);
+
+  if (a.qstrand == '+') {
+    for (unsigned j = 0; j <= dpLen; ++j) {
+      begSignals[j] = spliceBegSignalFwd(chromBeg + begCoords[j], toUnmasked);
+      endSignals[j] = spliceEndSignalFwd(chromBeg + endCoords[j], toUnmasked);
+    }
+  } else {
+    for (unsigned j = 0; j <= dpLen; ++j) {
+      begSignals[j] = spliceBegSignalRev(chromEnd - begCoords[j], toUnmasked);
+      endSignals[j] = spliceEndSignalRev(chromEnd - endCoords[j], toUnmasked);
     }
   }
 }
@@ -780,27 +765,13 @@ void SplitAligner::initRnameAndStrandIds() {
   }
 }
 
-void SplitAligner::initForwardBackward() {
-  resizeMatrix(Aexp);
-  transform(Amat.begin(), Amat.end(), Aexp.begin(), scaledExp);
-
-  resizeMatrix(Dexp);
-  transform(Dmat.begin(), Dmat.end(), Dexp.begin(), scaledExp);
-
-  // if x/scale < about -745, then exp(x/scale) will be exactly 0.0
-}
-
-int SplitAligner::maxJumpScore() const {
-  int m = jumpScore;
-  if (splicePrior > 0.0) {
-    double s = spliceTerm1 - meanLogDist + sdevLogDist * sdevLogDist / 2.0;
-    int maxSpliceScore = std::floor(scale * s + 0.5);
-    m = std::max(m, maxSpliceScore);
-  }
-  if (!chromosomeIndex.empty()) {
-    m += arrayMax(spliceBegScores) + arrayMax(spliceEndScores);
-  }
-  return m;
+void SplitAligner::dpExtensionMinScores(int maxJumpScore,
+					size_t& minScore1,
+					size_t& minScore2) const {
+  if (!chromosomeIndex.empty()) maxJumpScore += maxSpliceBegEndScore;
+  assert(maxJumpScore + insExistenceScore <= 0);
+  minScore1 = 1 - (maxJumpScore + insExistenceScore);
+  minScore2 = 1 - (maxJumpScore + maxJumpScore + insExistenceScore);
 }
 
 static size_t dpExtension(size_t maxScore, size_t minScore, size_t divisor) {
@@ -842,10 +813,8 @@ void SplitAligner::initDpBounds() {
   size_t minScore1 = -1;
   size_t minScore2 = -1;
   if (jumpProb > 0.0 || splicePrior > 0.0) {
-    int m = maxJumpScore();
-    assert(m + insExistenceScore <= 0);
-    minScore1 = 1 - (m + insExistenceScore);
-    minScore2 = 1 - (m + m + insExistenceScore);
+    int m = (splicePrior > 0.0) ? maxSpliceScore : jumpScore;
+    dpExtensionMinScores(m, minScore1, minScore2);
   }
 
   for (unsigned i = 0; i < numAlns; ++i) {
@@ -875,15 +844,12 @@ void SplitAligner::initDpBounds() {
   }
 }
 
-void SplitAligner::initForOneQuery(std::vector<UnsplitAlignment>::const_iterator beg,
-				   std::vector<UnsplitAlignment>::const_iterator end) {
+void SplitAligner::layout(std::vector<UnsplitAlignment>::const_iterator beg,
+			  std::vector<UnsplitAlignment>::const_iterator end) {
     assert(end > beg);
     numAlns = end - beg;
     alns = beg;
 
-    initDpBounds();
-    calcScoreMatrices();
-
     sortedAlnIndices.resize(numAlns);
     for (unsigned i = 0; i < numAlns; ++i) sortedAlnIndices[i] = i;
     oldInplayAlnIndices.resize(numAlns);
@@ -893,13 +859,47 @@ void SplitAligner::initForOneQuery(std::vector<UnsplitAlignment>::const_iterator
     rEnds.resize(numAlns);
 
     if (splicePrior > 0.0 || !chromosomeIndex.empty()) {
-        initSpliceCoords();
-	if (!chromosomeIndex.empty()) initSpliceSignals();
+	initRbegsAndEnds();
 	//initRnameAndStrandIds();
     }
     initRnameAndStrandIds();
 
-    initForwardBackward();
+    initDpBounds();
+}
+
+size_t SplitAligner::memory(bool isViterbi, bool isBothSpliceStrands) const {
+  size_t numOfStrands = isBothSpliceStrands ? 2 : 1;
+  size_t x = 2 * sizeof(int) + 2 * sizeof(double);
+  if (splicePrior > 0 || !chromosomeIndex.empty()) x += 2 * sizeof(unsigned);
+  if (!chromosomeIndex.empty()) x += 2;
+  if (isViterbi) x += sizeof(long) * numOfStrands;
+  x += 2 * sizeof(double) * numOfStrands;
+  return x * cellsPerDpMatrix();
+}
+
+void SplitAligner::initMatricesForOneQuery() {
+  resizeMatrix(Amat);
+  resizeMatrix(Dmat);
+  resizeMatrix(Aexp);
+  resizeMatrix(Dexp);
+
+  for (unsigned i = 0; i < numAlns; i++) calcBaseScores(i);
+
+  if (splicePrior > 0.0 || !chromosomeIndex.empty()) {
+    resizeMatrix(spliceBegCoords);
+    resizeMatrix(spliceEndCoords);
+    for (unsigned i = 0; i < numAlns; ++i) initSpliceCoords(i);
+  }
+
+  if (!chromosomeIndex.empty()) {
+    resizeMatrix(spliceBegSignals);
+    resizeMatrix(spliceEndSignals);
+    for (unsigned i = 0; i < numAlns; ++i) initSpliceSignals(i);
+  }
+
+  transform(Amat.begin(), Amat.end(), Aexp.begin(), scaledExp);
+  transform(Dmat.begin(), Dmat.end(), Dexp.begin(), scaledExp);
+  // if x/scale < about -745, then exp(x/scale) will be exactly 0.0
 }
 
 void SplitAligner::flipSpliceSignals() {
@@ -951,14 +951,18 @@ void SplitAligner::setSpliceParams(double splicePriorIn,
   if (splicePrior <= 0.0) return;
 
   const double rootTwoPi = std::sqrt(8.0 * std::atan(1.0));
+  double s2 = sdevLogDist * sdevLogDist;
   spliceTerm1 = -std::log(sdevLogDist * rootTwoPi / splicePrior);
-  spliceTerm2 = -1.0 / (2.0 * sdevLogDist * sdevLogDist);
+  spliceTerm2 = -0.5 / s2;
+
+  double max1 = spliceTerm1 - meanLogDist + s2 * 0.5;
+  int max2 = std::floor(scale * max1 + 0.5);
+  maxSpliceScore = std::max(max2, jumpScore);
 
   // Set maxSpliceDist so as to ignore splices whose score would be
   // less than jumpScore.  By solving this quadratic equation:
   // spliceTerm1 + spliceTerm2 * (logDist - meanLogDist)^2 - logDist =
   // jumpScore / scale
-  double s2 = sdevLogDist * sdevLogDist;
   double r = s2 + 2 * (spliceTerm1 - meanLogDist - jumpScore / scale);
   if (r < 0) {
     maxSpliceDist = 0;
@@ -1053,6 +1057,8 @@ void SplitAligner::setSpliceSignals() {
     spliceBegProbs[i] = scaledExp(spliceBegScores[i]);
     spliceEndProbs[i] = scaledExp(spliceEndScores[i]);
   }
+
+  maxSpliceBegEndScore = arrayMax(spliceBegScores) + arrayMax(spliceEndScores);
 }
 
 void SplitAligner::printParameters() const {
@@ -1080,10 +1086,11 @@ void SplitAligner::printParameters() const {
   }
 }
 
-void SplitAligner::readGenome(const std::string& baseName) {
-  std::string alphabetLetters;
-  unsigned seqCount = -1;
-  unsigned volumes = -1;
+static void readPrjFile(const std::string& baseName,
+			std::string& alphabetLetters,
+			unsigned& seqCount,
+			unsigned& volumes) {
+  seqCount = volumes = -1;
 
   std::string fileName = baseName + ".prj";
   std::ifstream f(fileName.c_str());
@@ -1098,19 +1105,40 @@ void SplitAligner::readGenome(const std::string& baseName) {
     if( word == "volumes" ) iss >> volumes;
   }
 
-  if (alphabetLetters != "ACGT" || seqCount+1 == 0)
-    err("can't read file: " + fileName);
+  if (alphabetLetters != "ACGT") err("can't read file: " + fileName);
+}
 
-  if (volumes+1 > 0 && volumes > 1)
-    err("can't read multi-volume lastdb files, sorry");
+void SplitAligner::readGenomeVolume(const std::string& baseName,
+				    unsigned seqCount,
+				    unsigned volumeNumber) {
+  if (seqCount + 1 == 0) err("can't read: " + baseName);
 
-  genome.fromFiles(baseName, seqCount, 0);
+  genome[volumeNumber].fromFiles(baseName, seqCount, 0);
 
-  for (unsigned i = 0; i < seqCount; ++i) {
-    std::string n = genome.seqName(i);
-    if (!chromosomeIndex.insert(std::make_pair(n, i)).second)
+  for (unsigned long long i = 0; i < seqCount; ++i) {
+    std::string n = genome[volumeNumber].seqName(i);
+    unsigned long long j = i * maxGenomeVolumes() + volumeNumber;
+    if (!chromosomeIndex.insert(std::make_pair(n, j)).second)
       err("duplicate sequence name: " + n);
   }
+}
+
+void SplitAligner::readGenome(const std::string& baseName) {
+  std::string alphabetLetters;
+  unsigned seqCount, volumes;
+  readPrjFile(baseName, alphabetLetters, seqCount, volumes);
+
+  if (volumes + 1 > 0 && volumes > 1) {
+    if (volumes > maxGenomeVolumes()) err("too many volumes: " + baseName);
+    for (unsigned i = 0; i < volumes; ++i) {
+      std::string b = baseName + stringify(i);
+      unsigned c, v;
+      readPrjFile(b, alphabetLetters, c, v);
+      readGenomeVolume(b, c, i);
+    }
+  } else {
+    readGenomeVolume(baseName, seqCount, 0);
+  }
 
   alphabet.fromString(alphabetLetters);
 }
diff --git a/src/split/cbrc_split_aligner.hh b/src/split/cbrc_split_aligner.hh
index 73161dc..53db5b0 100644
--- a/src/split/cbrc_split_aligner.hh
+++ b/src/split/cbrc_split_aligner.hh
@@ -59,9 +59,20 @@ public:
     // Outputs some algorithm parameters on lines starting with "#"
     void printParameters() const;
 
-    // Prepares to analyze some candidate alignments for one query sequence
-    void initForOneQuery(std::vector<UnsplitAlignment>::const_iterator beg,
-			 std::vector<UnsplitAlignment>::const_iterator end);
+    // Prepares to analyze some candidate alignments for one query
+    // sequence: sets the number of DP matrix cells (and thus memory)
+    void layout(std::vector<UnsplitAlignment>::const_iterator beg,
+		std::vector<UnsplitAlignment>::const_iterator end);
+
+    // The number of cells in each dynamic programming matrix
+    size_t cellsPerDpMatrix() const
+    { return matrixRowOrigins[numAlns-1] + dpEnd(numAlns-1) + 1; }
+
+    // Bytes of memory needed for the current query sequence (roughly)
+    size_t memory(bool isViterbi, bool isBothSpliceStrands) const;
+
+    // Call this before viterbi/forward/backward, and after layout
+    void initMatricesForOneQuery();
 
     long viterbi();  // returns the optimal split-alignment score
 
@@ -143,6 +154,8 @@ private:
     double spliceTerm1;
     double spliceTerm2;
     unsigned maxSpliceDist;
+    int maxSpliceScore;
+    int maxSpliceBegEndScore;
     std::vector<unsigned> spliceBegCoords;
     std::vector<unsigned> spliceEndCoords;
     std::vector<unsigned char> spliceBegSignals;
@@ -153,31 +166,29 @@ private:
     std::vector<int> spliceScoreTable;  // lookup table
     std::vector<double> spliceProbTable;  // lookup table
     unsigned spliceTableSize;
-    MultiSequence genome;
+    MultiSequence genome[32];
     Alphabet alphabet;
-    typedef std::map<std::string, unsigned> StringNumMap;
+    typedef std::map<std::string, unsigned long long> StringNumMap;
     StringNumMap chromosomeIndex;
     int spliceBegScores[4 * 4 + 1];  // donor score for any dinucleotide
     int spliceEndScores[4 * 4 + 1];  // acceptor score for any dinucleotide
     double spliceBegProbs[4 * 4 + 1];
     double spliceEndProbs[4 * 4 + 1];
-    unsigned spliceBegSignal(unsigned coordinate, char strand) const;
-    unsigned spliceEndSignal(unsigned coordinate, char strand) const;
-    int spliceBegScore(unsigned i, unsigned j) const {
+    int spliceBegScore(size_t ij) const {
       if (chromosomeIndex.empty()) return 0;
-      return spliceBegScores[cell(spliceBegSignals, i, j)];
+      return spliceBegScores[spliceBegSignals[ij]];
     }
-    int spliceEndScore(unsigned i, unsigned j) const {
+    int spliceEndScore(size_t ij) const {
       if (chromosomeIndex.empty()) return 0;
-      return spliceEndScores[cell(spliceEndSignals, i, j)];
+      return spliceEndScores[spliceEndSignals[ij]];
     }
-    double spliceBegProb(unsigned i, unsigned j) const {
+    double spliceBegProb(size_t ij) const {
       if (chromosomeIndex.empty()) return 1;
-      return spliceBegProbs[cell(spliceBegSignals, i, j)];
+      return spliceBegProbs[spliceBegSignals[ij]];
     }
-    double spliceEndProb(unsigned i, unsigned j) const {
+    double spliceEndProb(size_t ij) const {
       if (chromosomeIndex.empty()) return 1;
-      return spliceEndProbs[cell(spliceEndSignals, i, j)];
+      return spliceEndProbs[spliceEndSignals[ij]];
     }
     int calcSpliceScore(double dist) const;
     int spliceScore(unsigned d) const
@@ -186,11 +197,16 @@ private:
     { return scaledExp(calcSpliceScore(dist)); }
     double spliceProb(unsigned d) const
     { return d < spliceTableSize ? spliceProbTable[d] : calcSpliceProb(d); }
-    void initSpliceCoords();
-    void initSpliceSignals();
+    void initSpliceCoords(unsigned i);
+    void initSpliceSignals(unsigned i);
     void initRnameAndStrandIds();
+    void initRbegsAndEnds();
+    static size_t maxGenomeVolumes() { return sizeof genome / sizeof *genome; }
+    void readGenomeVolume(const std::string& baseName,
+			  unsigned seqCount, unsigned volumeNumber);
 
-    int maxJumpScore() const;
+    void dpExtensionMinScores(int maxJumpScore,
+			      size_t& minScore1, size_t& minScore2) const;
 
     void updateInplayAlnIndicesF(unsigned& sortedAlnPos,
 				 unsigned& oldNumInplay,
@@ -239,7 +255,7 @@ private:
       // stored in a flat vector.  There are numAlns rows, and row i
       // has dpEnd(i) - dpBeg(i) + 1 cells.  (The final cell per row
       // is used in some matrices but not others.)
-      m.resize(matrixRowOrigins[numAlns-1] + dpEnd(numAlns-1) + 1);
+      m.resize(cellsPerDpMatrix());
     }
 
     double probFromSpliceF(unsigned i, unsigned j, unsigned oldNumInplay,
@@ -249,15 +265,7 @@ private:
 			   unsigned& oldInplayPos) const;
 
     void calcBaseScores(unsigned i);
-    void calcInsScores(unsigned i);
-    void calcDelScores(unsigned i);
-    void calcScoreMatrices();
-    void initForwardBackward();
     void initDpBounds();
-
-    long scoreIndel(unsigned i, unsigned j) const {
-      return cell(Vmat, i, j) + cell(Dmat, i, j);
-    }
 };
 
 }
diff --git a/src/split/last-split-main.cc b/src/split/last-split-main.cc
index 01cea8f..6453048 100644
--- a/src/split/last-split-main.cc
+++ b/src/split/last-split-main.cc
@@ -11,6 +11,16 @@
 #include <cstdlib>  // EXIT_SUCCESS, EXIT_FAILURE
 #include <iostream>
 
+static size_t defaultBytes(bool isSplicedAlignment) {
+  size_t b = isSplicedAlignment ? 8 : 8 * 1024;
+  for (int i = 0; i < 3; ++i) {
+    size_t n = b * 1024;
+    if (n / 1024 != b) return -1;
+    b = n;
+  }
+  return b;
+}
+
 static void run(int argc, char* argv[]) {
   LastSplitOptions opts;
 
@@ -22,6 +32,7 @@ static void run(int argc, char* argv[]) {
   opts.mismap = 0.01;
   opts.score = -1;
   opts.no_split = false;
+  opts.bytes = 0;
   opts.verbose = false;
   opts.isSplicedAlignment = false;
 
@@ -58,11 +69,12 @@ Options:\n\
     + cbrc::stringify(opts.mismap) + ")\n\
  -s, --score=INT    minimum alignment score (default=e OR e+t*ln[100])\n\
  -n, --no-split     write original, not split, alignments\n\
+ -b, --bytes=B      maximum memory (default=8T for split, 8G for spliced)\n\
  -v, --verbose      be verbose\n\
  -V, --version      show version information and exit\n\
 ";
 
-  const char sOpts[] = "hg:d:c:t:M:S:m:s:nvV";
+  const char sOpts[] = "hg:d:c:t:M:S:m:s:nb:vV";
 
   static struct option lOpts[] = {
     { "help",     no_argument,       0, 'h' },
@@ -75,6 +87,7 @@ Options:\n\
     { "mismap",   required_argument, 0, 'm' },
     { "score",    required_argument, 0, 's' },
     { "no-split", no_argument,       0, 'n' },
+    { "bytes",    required_argument, 0, 'b' },
     { "verbose",  no_argument,       0, 'v' },
     { "version",  no_argument,       0, 'V' },
     { 0, 0, 0, 0}
@@ -119,6 +132,9 @@ Options:\n\
     case 'n':
       opts.no_split = true;
       break;
+    case 'b':
+      cbrc::unstringifySize(opts.bytes, optarg);
+      break;
     case 'v':
       opts.verbose = true;
       break;
@@ -130,6 +146,8 @@ Options:\n\
     }
   }
 
+  if (!opts.bytes) opts.bytes = defaultBytes(opts.isSplicedAlignment);
+
   opts.inputFileNames.assign(argv + optind, argv + argc);
 
   if (opts.inputFileNames.empty()) opts.inputFileNames.push_back("-");
diff --git a/src/split/last-split.cc b/src/split/last-split.cc
index 7db0432..b18a156 100644
--- a/src/split/last-split.cc
+++ b/src/split/last-split.cc
@@ -127,7 +127,16 @@ static void doOneQuery(std::vector<cbrc::UnsplitAlignment>::const_iterator beg,
 		       cbrc::SplitAligner& sa, const LastSplitOptions& opts,
 		       bool isAlreadySplit) {
   if (opts.verbose) std::cerr << beg->qname << "\t" << (end - beg);
-  sa.initForOneQuery(beg, end);
+  sa.layout(beg, end);
+  if (opts.verbose) std::cerr << "\tcells=" << sa.cellsPerDpMatrix();
+  size_t bytes = sa.memory(!opts.no_split, opts.direction == 2);
+  if (bytes > opts.bytes) {
+    if (opts.verbose) std::cerr << "\n";
+    std::cerr << "last-split: skipping sequence " << beg->qname
+	      << " (" << bytes << " bytes)\n";
+    return;
+  }
+  sa.initMatricesForOneQuery();
 
   if (opts.direction != 0) {
     sa.forward();
diff --git a/src/split/last-split.hh b/src/split/last-split.hh
index 96a86c3..5c13c7f 100644
--- a/src/split/last-split.hh
+++ b/src/split/last-split.hh
@@ -13,6 +13,7 @@
 #ifndef LAST_SPLIT_HH
 #define LAST_SPLIT_HH
 
+#include <stddef.h>  // size_t
 #include <string>
 #include <vector>
 
@@ -26,6 +27,7 @@ struct LastSplitOptions {
   double mismap;
   int score;
   bool no_split;
+  size_t bytes;
   bool verbose;
   bool isSplicedAlignment;
   std::vector<std::string> inputFileNames;
diff --git a/src/stringify.hh b/src/stringify.hh
index ea846c7..6971726 100644
--- a/src/stringify.hh
+++ b/src/stringify.hh
@@ -38,15 +38,20 @@ void unstringifySize( T& x, const std::string& s ){
 
   std::string suffix;
   if( iss >> suffix ){
-    int multiplier;
-    /**/ if( suffix == "K" ) multiplier = 1024;  // "KibiBytes"
-    else if( suffix == "M" ) multiplier = 1024*1024;  // "MebiBytes"
-    else if( suffix == "G" ) multiplier = 1024*1024*1024;  // "GibiBytes"
+    int i;
+    /**/ if( suffix == "K" ) i = 1;  // "KibiBytes"
+    else if( suffix == "M" ) i = 2;  // "MebiBytes"
+    else if( suffix == "G" ) i = 3;  // "GibiBytes"
+    else if( suffix == "T" ) i = 4;  // "TebiBytes"
+    else if( suffix == "P" ) i = 5;  // "PebiBytes"
     else throw std::runtime_error( "can't interpret: " + s );
-    if( (x * multiplier) / multiplier != x ){  // check for overflow
-      throw std::runtime_error( "can't interpret (too big): " + s );
+
+    while( i-- ){
+      if( (x * 1024) / 1024 != x ){
+	throw std::runtime_error( "can't interpret (too big): " + s );
+      }
+      x *= 1024;
     }
-    x *= multiplier;
   }
 
   if( !(iss >> std::ws).eof() ){
@@ -54,5 +59,6 @@ void unstringifySize( T& x, const std::string& s ){
   }
 }
 
-}  // end namespace cbrc
-#endif  // STRINGIFY_HH
+}
+
+#endif
diff --git a/src/version.hh b/src/version.hh
index 406a230..9420baa 100644
--- a/src/version.hh
+++ b/src/version.hh
@@ -1 +1 @@
-"759"
+"809"

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/last-align.git