[pkg-java] r16252 - in branches/libcommons-compress-java/squeeze: . debian src src/main src/main/java src/main/java/org src/main/java/org/apache src/main/java/org/apache/commons src/main/java/org/apache/commons/compress src/main/java/org/apache/commons/compress/compressors src/main/java/org/apache/commons/compress/compressors/bzip2

Sat Jul 21 20:12:45 UTC 2012

Author: nomadium-guest
Date: 2012-07-21 20:12:44 +0000 (Sat, 21 Jul 2012)
New Revision: 16252

Added:
   branches/libcommons-compress-java/squeeze/src/
   branches/libcommons-compress-java/squeeze/src/main/
   branches/libcommons-compress-java/squeeze/src/main/java/
   branches/libcommons-compress-java/squeeze/src/main/java/org/
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java
   branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java
Removed:
   branches/libcommons-compress-java/squeeze/debian/patches/
   branches/libcommons-compress-java/squeeze/debian/source/
Modified:
   branches/libcommons-compress-java/squeeze/debian/changelog
Log:
Revert update source format to 3.0

Modified: branches/libcommons-compress-java/squeeze/debian/changelog
===================================================================

--- branches/libcommons-compress-java/squeeze/debian/changelog	2012-07-21 14:20:17 UTC (rev 16251)
+++ branches/libcommons-compress-java/squeeze/debian/changelog	2012-07-21 20:12:44 UTC (rev 16252)
@@ -1,11 +1,10 @@
-libcommons-compress-java (1.0-1+squeeze1) stable-security; urgency=low
+libcommons-compress-java (1.0-1+squeeze1) stable; urgency=low
 
   * Team upload.
   * Fix an algorithmic complexity vulnerability in the sorting algorithms
     in bzip2 compressing stream. CVE-2012-2098. (Closes: #674448).
-  * Update source format to 3.0 (quilt).
 
- -- Miguel Landaeta <miguel at miguel.cc>  Tue, 17 Jul 2012 19:53:00 -0430
+ -- Miguel Landaeta <miguel at miguel.cc>  Sat, 21 Jul 2012 15:12:36 -0430
 
 libcommons-compress-java (1.0-1) unstable; urgency=low
 

Added: branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java
===================================================================
--- branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java	                        (rev 0)
+++ branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BZip2CompressorOutputStream.java	2012-07-21 20:12:44 UTC (rev 16252)
@@ -0,0 +1,1332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.bzip2;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+import org.apache.commons.compress.compressors.CompressorOutputStream;
+
+/**
+ * An output stream that compresses into the BZip2 format into another stream.
+ *
+ * <p>
+ * The compression requires large amounts of memory. Thus you should call the
+ * {@link #close() close()} method as soon as possible, to force
+ * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory.
+ * </p>
+ *
+ * <p> You can shrink the amount of allocated memory and maybe raise
+ * the compression speed by choosing a lower blocksize, which in turn
+ * may cause a lower compression ratio. You can avoid unnecessary
+ * memory allocation by avoiding using a blocksize which is bigger
+ * than the size of the input.  </p>
+ *
+ * <p> You can compute the memory usage for compressing by the
+ * following formula: </p>
+ *
+ * <pre>
+ * <code>400k + (9 * blocksize)</code>.
+ * </pre>
+ *
+ * <p> To get the memory required for decompression by {@link
+ * BZip2CompressorInputStream} use </p>
+ *
+ * <pre>
+ * <code>65k + (5 * blocksize)</code>.
+ * </pre>
+ *
+ * <table width="100%" border="1">
+ * <colgroup> <col width="33%" /> <col width="33%" /> <col width="33%" />
+ * </colgroup>
+ * <tr>
+ * <th colspan="3">Memory usage by blocksize</th>
+ * </tr>
+ * <tr>
+ * <th align="right">Blocksize</th> <th align="right">Compression<br>
+ * memory usage</th> <th align="right">Decompression<br>
+ * memory usage</th>
+ * </tr>
+ * <tr>
+ * <td align="right">100k</td>
+ * <td align="right">1300k</td>
+ * <td align="right">565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">200k</td>
+ * <td align="right">2200k</td>
+ * <td align="right">1065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">300k</td>
+ * <td align="right">3100k</td>
+ * <td align="right">1565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">400k</td>
+ * <td align="right">4000k</td>
+ * <td align="right">2065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">500k</td>
+ * <td align="right">4900k</td>
+ * <td align="right">2565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">600k</td>
+ * <td align="right">5800k</td>
+ * <td align="right">3065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">700k</td>
+ * <td align="right">6700k</td>
+ * <td align="right">3565k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">800k</td>
+ * <td align="right">7600k</td>
+ * <td align="right">4065k</td>
+ * </tr>
+ * <tr>
+ * <td align="right">900k</td>
+ * <td align="right">8500k</td>
+ * <td align="right">4565k</td>
+ * </tr>
+ * </table>
+ *
+ * <p>
+ * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the
+ * bzipped input is smaller than one block.
+ * </p>
+ *
+ * <p>
+ * Instances of this class are not threadsafe.
+ * </p>
+ *
+ * <p>
+ * TODO: Update to BZip2 1.0.1
+ * </p>
+ * @NotThreadSafe
+ */
+public class BZip2CompressorOutputStream extends CompressorOutputStream
+    implements BZip2Constants {
+
+    /**
+     * The minimum supported blocksize <tt> == 1</tt>.
+     */
+    public static final int MIN_BLOCKSIZE = 1;
+
+    /**
+     * The maximum supported blocksize <tt> == 9</tt>.
+     */
+    public static final int MAX_BLOCKSIZE = 9;
+
+    private static final int GREATER_ICOST = 15;
+    private static final int LESSER_ICOST = 0;
+
+    private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
+                                          final Data dat, final int alphaSize,
+                                          final int maxLen) {
+        /*
+         * Nodes and heap entries run from 1. Entry 0 for both the heap and
+         * nodes is a sentinel.
+         */
+        final int[] heap = dat.heap;
+        final int[] weight = dat.weight;
+        final int[] parent = dat.parent;
+
+        for (int i = alphaSize; --i >= 0;) {
+            weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
+        }
+
+        for (boolean tooLong = true; tooLong;) {
+            tooLong = false;
+
+            int nNodes = alphaSize;
+            int nHeap = 0;
+            heap[0] = 0;
+            weight[0] = 0;
+            parent[0] = -2;
+
+            for (int i = 1; i <= alphaSize; i++) {
+                parent[i] = -1;
+                nHeap++;
+                heap[nHeap] = i;
+
+                int zz = nHeap;
+                int tmp = heap[zz];
+                while (weight[tmp] < weight[heap[zz >> 1]]) {
+                    heap[zz] = heap[zz >> 1];
+                    zz >>= 1;
+                }
+                heap[zz] = tmp;
+            }
+
+            while (nHeap > 1) {
+                int n1 = heap[1];
+                heap[1] = heap[nHeap];
+                nHeap--;
+
+                int yy = 0;
+                int zz = 1;
+                int tmp = heap[1];
+
+                while (true) {
+                    yy = zz << 1;
+
+                    if (yy > nHeap) {
+                        break;
+                    }
+
+                    if ((yy < nHeap)
+                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+                        yy++;
+                    }
+
+                    if (weight[tmp] < weight[heap[yy]]) {
+                        break;
+                    }
+
+                    heap[zz] = heap[yy];
+                    zz = yy;
+                }
+
+                heap[zz] = tmp;
+
+                int n2 = heap[1];
+                heap[1] = heap[nHeap];
+                nHeap--;
+
+                yy = 0;
+                zz = 1;
+                tmp = heap[1];
+
+                while (true) {
+                    yy = zz << 1;
+
+                    if (yy > nHeap) {
+                        break;
+                    }
+
+                    if ((yy < nHeap)
+                        && (weight[heap[yy + 1]] < weight[heap[yy]])) {
+                        yy++;
+                    }
+
+                    if (weight[tmp] < weight[heap[yy]]) {
+                        break;
+                    }
+
+                    heap[zz] = heap[yy];
+                    zz = yy;
+                }
+
+                heap[zz] = tmp;
+                nNodes++;
+                parent[n1] = parent[n2] = nNodes;
+
+                final int weight_n1 = weight[n1];
+                final int weight_n2 = weight[n2];
+                weight[nNodes] = ((weight_n1 & 0xffffff00)
+                                  + (weight_n2 & 0xffffff00))
+                    | (1 + (((weight_n1 & 0x000000ff)
+                             > (weight_n2 & 0x000000ff))
+                            ? (weight_n1 & 0x000000ff)
+                            : (weight_n2 & 0x000000ff)));
+
+                parent[nNodes] = -1;
+                nHeap++;
+                heap[nHeap] = nNodes;
+
+                tmp = 0;
+                zz = nHeap;
+                tmp = heap[zz];
+                final int weight_tmp = weight[tmp];
+                while (weight_tmp < weight[heap[zz >> 1]]) {
+                    heap[zz] = heap[zz >> 1];
+                    zz >>= 1;
+                }
+                heap[zz] = tmp;
+
+            }
+
+            for (int i = 1; i <= alphaSize; i++) {
+                int j = 0;
+                int k = i;
+
+                for (int parent_k; (parent_k = parent[k]) >= 0;) {
+                    k = parent_k;
+                    j++;
+                }
+
+                len[i - 1] = (byte) j;
+                if (j > maxLen) {
+                    tooLong = true;
+                }
+            }
+
+            if (tooLong) {
+                for (int i = 1; i < alphaSize; i++) {
+                    int j = weight[i] >> 8;
+                    j = 1 + (j >> 1);
+                    weight[i] = j << 8;
+                }
+            }
+        }
+    }
+
+    /**
+     * Index of the last char in the block, so the block size == last + 1.
+     */
+    private int last;
+
+    /**
+     * Always: in the range 0 .. 9. The current block size is 100000 * this
+     * number.
+     */
+    private final int blockSize100k;
+
+    private int bsBuff;
+    private int bsLive;
+    private final CRC crc = new CRC();
+
+    private int nInUse;
+
+    private int nMTF;
+
+    private int currentChar = -1;
+    private int runLength = 0;
+
+    private int blockCRC;
+    private int combinedCRC;
+    private final int allowableBlockSize;
+
+    /**
+     * All memory intensive stuff.
+     */
+    private Data data;
+    private BlockSort blockSorter;
+
+    private OutputStream out;
+
+    /**
+     * Chooses a blocksize based on the given length of the data to compress.
+     *
+     * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
+     *         {@link #MAX_BLOCKSIZE} both inclusive. For a negative
+     *         <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
+     *         always.
+     *
+     * @param inputLength
+     *            The length of the data which will be compressed by
+     *            <tt>CBZip2OutputStream</tt>.
+     */
+    public static int chooseBlockSize(long inputLength) {
+        return (inputLength > 0) ? (int) Math
+            .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
+    }
+
+    /**
+     * Constructs a new <tt>CBZip2OutputStream</tt> with a blocksize of 900k.
+     *
+     * @param out 
+     *            the destination stream.
+     *
+     * @throws IOException
+     *             if an I/O error occurs in the specified stream.
+     * @throws NullPointerException
+     *             if <code>out == null</code>.
+     */
+    public BZip2CompressorOutputStream(final OutputStream out)
+        throws IOException {
+        this(out, MAX_BLOCKSIZE);
+    }
+
+    /**
+     * Constructs a new <tt>CBZip2OutputStream</tt> with specified blocksize.
+     *
+     * @param out
+     *            the destination stream.
+     * @param blockSize
+     *            the blockSize as 100k units.
+     *
+     * @throws IOException
+     *             if an I/O error occurs in the specified stream.
+     * @throws IllegalArgumentException
+     *             if <code>(blockSize < 1) || (blockSize > 9)</code>.
+     * @throws NullPointerException
+     *             if <code>out == null</code>.
+     *
+     * @see #MIN_BLOCKSIZE
+     * @see #MAX_BLOCKSIZE
+     */
+    public BZip2CompressorOutputStream(final OutputStream out,
+                                       final int blockSize)
+        throws IOException {
+        super();
+
+        if (blockSize < 1) {
+            throw new IllegalArgumentException("blockSize(" + blockSize
+                                               + ") < 1");
+        }
+        if (blockSize > 9) {
+            throw new IllegalArgumentException("blockSize(" + blockSize
+                                               + ") > 9");
+        }
+
+        this.blockSize100k = blockSize;
+        /* 20 is just a paranoia constant */
+        this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
+        this.out = out;
+        init();
+    }
+
+    public void write(final int b) throws IOException {
+        if (this.out != null) {
+            write0(b);
+        } else {
+            throw new IOException("closed");
+        }
+    }
+
+    /**
+     * Writes the current byte to the buffer, run-length encoding it
+     * if it has been repeated at least four times (the first step
+     * RLEs sequences of four identical bytes).
+     *
+     * <p>Flushes the current block before writing data if it is
+     * full.</p>
+     *
+     * <p>"write to the buffer" means adding to data.buffer starting
+     * two steps "after" this.last - initially starting at index 1
+     * (not 0) - and updating this.last to point to the last index
+     * written minus 1.</p>
+     */
+    private void writeRun() throws IOException {
+        final int lastShadow = this.last;
+
+        if (lastShadow < this.allowableBlockSize) {
+            final int currentCharShadow = this.currentChar;
+            final Data dataShadow = this.data;
+            dataShadow.inUse[currentCharShadow] = true;
+            final byte ch = (byte) currentCharShadow;
+
+            int runLengthShadow = this.runLength;
+            this.crc.updateCRC(currentCharShadow, runLengthShadow);
+
+            switch (runLengthShadow) {
+            case 1:
+                dataShadow.block[lastShadow + 2] = ch;
+                this.last = lastShadow + 1;
+                break;
+
+            case 2:
+                dataShadow.block[lastShadow + 2] = ch;
+                dataShadow.block[lastShadow + 3] = ch;
+                this.last = lastShadow + 2;
+                break;
+
+            case 3: {
+                final byte[] block = dataShadow.block;
+                block[lastShadow + 2] = ch;
+                block[lastShadow + 3] = ch;
+                block[lastShadow + 4] = ch;
+                this.last = lastShadow + 3;
+            }
+                break;
+
+            default: {
+                runLengthShadow -= 4;
+                dataShadow.inUse[runLengthShadow] = true;
+                final byte[] block = dataShadow.block;
+                block[lastShadow + 2] = ch;
+                block[lastShadow + 3] = ch;
+                block[lastShadow + 4] = ch;
+                block[lastShadow + 5] = ch;
+                block[lastShadow + 6] = (byte) runLengthShadow;
+                this.last = lastShadow + 5;
+            }
+                break;
+
+            }
+        } else {
+            endBlock();
+            initBlock();
+            writeRun();
+        }
+    }
+
+    /**
+     * Overriden to close the stream.
+     */
+    protected void finalize() throws Throwable {
+        finish();
+        super.finalize();
+    }
+
+
+    public void finish() throws IOException {
+        if (out != null) {
+            try {
+                if (this.runLength > 0) {
+                    writeRun();
+                }
+                this.currentChar = -1;
+                endBlock();
+                endCompression();
+            } finally {
+                this.out = null;
+                this.data = null;
+                this.blockSorter = null;
+            }
+        }
+    }
+
+    public void close() throws IOException {
+        if (out != null) {
+            OutputStream outShadow = this.out;
+            finish();
+            outShadow.close();
+        }
+    }
+
+    public void flush() throws IOException {
+        OutputStream outShadow = this.out;
+        if (outShadow != null) {
+            outShadow.flush();
+        }
+    }
+
+    /**
+     * Writes magic bytes like BZ on the first position of the stream
+     * and bytes indiciating the file-format, which is 
+     * huffmanised, followed by a digit indicating blockSize100k.
+     * @throws IOException if the magic bytes could not been written
+     */
+    private void init() throws IOException {
+        bsPutUByte('B');
+        bsPutUByte('Z');
+
+        this.data = new Data(this.blockSize100k);
+        this.blockSorter = new BlockSort(this.data);
+
+        // huffmanised magic bytes
+        bsPutUByte('h');
+        bsPutUByte('0' + this.blockSize100k);
+
+        this.combinedCRC = 0;
+        initBlock();
+    }
+
+    private void initBlock() {
+        // blockNo++;
+        this.crc.initialiseCRC();
+        this.last = -1;
+        // ch = 0;
+
+        boolean[] inUse = this.data.inUse;
+        for (int i = 256; --i >= 0;) {
+            inUse[i] = false;
+        }
+    }
+
+    private void endBlock() throws IOException {
+        this.blockCRC = this.crc.getFinalCRC();
+        this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
+        this.combinedCRC ^= this.blockCRC;
+
+        // empty block at end of file
+        if (this.last == -1) {
+            return;
+        }
+
+        /* sort the block and establish posn of original string */
+        blockSort();
+
+        /*
+         * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
+         * :-). A 32 bit value does not really give a strong enough guarantee
+         * that the value will not appear by chance in the compressed
+         * datastream. Worst-case probability of this event, for a 900k block,
+         * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
+         * bits. For a compressed file of size 100Gb -- about 100000 blocks --
+         * only a 48-bit marker will do. NB: normal compression/ decompression
+         * donot rely on these statistical properties. They are only important
+         * when trying to recover blocks from damaged files.
+         */
+        bsPutUByte(0x31);
+        bsPutUByte(0x41);
+        bsPutUByte(0x59);
+        bsPutUByte(0x26);
+        bsPutUByte(0x53);
+        bsPutUByte(0x59);
+
+        /* Now the block's CRC, so it is in a known place. */
+        bsPutInt(this.blockCRC);
+
+        /* Now a single bit indicating no randomisation. */
+        bsW(1, 0);
+
+        /* Finally, block's contents proper. */
+        moveToFrontCodeAndSend();
+    }
+
+    private void endCompression() throws IOException {
+        /*
+         * Now another magic 48-bit number, 0x177245385090, to indicate the end
+         * of the last block. (sqrt(pi), if you want to know. I did want to use
+         * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
+         * to feel statistically comfortable. Call me paranoid.)
+         */
+        bsPutUByte(0x17);
+        bsPutUByte(0x72);
+        bsPutUByte(0x45);
+        bsPutUByte(0x38);
+        bsPutUByte(0x50);
+        bsPutUByte(0x90);
+
+        bsPutInt(this.combinedCRC);
+        bsFinishedWithStream();
+    }
+
+    /**
+     * Returns the blocksize parameter specified at construction time.
+     */
+    public final int getBlockSize() {
+        return this.blockSize100k;
+    }
+
+    public void write(final byte[] buf, int offs, final int len)
+        throws IOException {
+        if (offs < 0) {
+            throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
+        }
+        if (len < 0) {
+            throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
+        }
+        if (offs + len > buf.length) {
+            throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
+                                                + len + ") > buf.length("
+                                                + buf.length + ").");
+        }
+        if (this.out == null) {
+            throw new IOException("stream closed");
+        }
+
+        for (int hi = offs + len; offs < hi;) {
+            write0(buf[offs++]);
+        }
+    }
+
+    /**
+     * Keeps track of the last bytes written and implicitly performs
+     * run-length encoding as the first step of the bzip2 algorithm.
+     */
+    private void write0(int b) throws IOException {
+        if (this.currentChar != -1) {
+            b &= 0xff;
+            if (this.currentChar == b) {
+                if (++this.runLength > 254) {
+                    writeRun();
+                    this.currentChar = -1;
+                    this.runLength = 0;
+                }
+                // else nothing to do
+            } else {
+                writeRun();
+                this.runLength = 1;
+                this.currentChar = b;
+            }
+        } else {
+            this.currentChar = b & 0xff;
+            this.runLength++;
+        }
+    }
+
+    private static void hbAssignCodes(final int[] code, final byte[] length,
+                                      final int minLen, final int maxLen,
+                                      final int alphaSize) {
+        int vec = 0;
+        for (int n = minLen; n <= maxLen; n++) {
+            for (int i = 0; i < alphaSize; i++) {
+                if ((length[i] & 0xff) == n) {
+                    code[i] = vec;
+                    vec++;
+                }
+            }
+            vec <<= 1;
+        }
+    }
+
+    private void bsFinishedWithStream() throws IOException {
+        while (this.bsLive > 0) {
+            int ch = this.bsBuff >> 24;
+            this.out.write(ch); // write 8-bit
+            this.bsBuff <<= 8;
+            this.bsLive -= 8;
+        }
+    }
+
+    private void bsW(final int n, final int v) throws IOException {
+        final OutputStream outShadow = this.out;
+        int bsLiveShadow = this.bsLive;
+        int bsBuffShadow = this.bsBuff;
+
+        while (bsLiveShadow >= 8) {
+            outShadow.write(bsBuffShadow >> 24); // write 8-bit
+            bsBuffShadow <<= 8;
+            bsLiveShadow -= 8;
+        }
+
+        this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
+        this.bsLive = bsLiveShadow + n;
+    }
+
+    private void bsPutUByte(final int c) throws IOException {
+        bsW(8, c);
+    }
+
+    private void bsPutInt(final int u) throws IOException {
+        bsW(8, (u >> 24) & 0xff);
+        bsW(8, (u >> 16) & 0xff);
+        bsW(8, (u >> 8) & 0xff);
+        bsW(8, u & 0xff);
+    }
+
+    private void sendMTFValues() throws IOException {
+        final byte[][] len = this.data.sendMTFValues_len;
+        final int alphaSize = this.nInUse + 2;
+
+        for (int t = N_GROUPS; --t >= 0;) {
+            byte[] len_t = len[t];
+            for (int v = alphaSize; --v >= 0;) {
+                len_t[v] = GREATER_ICOST;
+            }
+        }
+
+        /* Decide how many coding tables to use */
+        // assert (this.nMTF > 0) : this.nMTF;
+        final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
+            : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
+
+        /* Generate an initial set of coding tables */
+        sendMTFValues0(nGroups, alphaSize);
+
+        /*
+         * Iterate up to N_ITERS times to improve the tables.
+         */
+        final int nSelectors = sendMTFValues1(nGroups, alphaSize);
+
+        /* Compute MTF values for the selectors. */
+        sendMTFValues2(nGroups, nSelectors);
+
+        /* Assign actual codes for the tables. */
+        sendMTFValues3(nGroups, alphaSize);
+
+        /* Transmit the mapping table. */
+        sendMTFValues4();
+
+        /* Now the selectors. */
+        sendMTFValues5(nGroups, nSelectors);
+
+        /* Now the coding tables. */
+        sendMTFValues6(nGroups, alphaSize);
+
+        /* And finally, the block data proper */
+        sendMTFValues7(nSelectors);
+    }
+
+    private void sendMTFValues0(final int nGroups, final int alphaSize) {
+        final byte[][] len = this.data.sendMTFValues_len;
+        final int[] mtfFreq = this.data.mtfFreq;
+
+        int remF = this.nMTF;
+        int gs = 0;
+
+        for (int nPart = nGroups; nPart > 0; nPart--) {
+            final int tFreq = remF / nPart;
+            int ge = gs - 1;
+            int aFreq = 0;
+
+            for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
+                aFreq += mtfFreq[++ge];
+            }
+
+            if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
+                && (((nGroups - nPart) & 1) != 0)) {
+                aFreq -= mtfFreq[ge--];
+            }
+
+            final byte[] len_np = len[nPart - 1];
+            for (int v = alphaSize; --v >= 0;) {
+                if ((v >= gs) && (v <= ge)) {
+                    len_np[v] = LESSER_ICOST;
+                } else {
+                    len_np[v] = GREATER_ICOST;
+                }
+            }
+
+            gs = ge + 1;
+            remF -= aFreq;
+        }
+    }
+
+    private int sendMTFValues1(final int nGroups, final int alphaSize) {
+        final Data dataShadow = this.data;
+        final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
+        final int[] fave = dataShadow.sendMTFValues_fave;
+        final short[] cost = dataShadow.sendMTFValues_cost;
+        final char[] sfmap = dataShadow.sfmap;
+        final byte[] selector = dataShadow.selector;
+        final byte[][] len = dataShadow.sendMTFValues_len;
+        final byte[] len_0 = len[0];
+        final byte[] len_1 = len[1];
+        final byte[] len_2 = len[2];
+        final byte[] len_3 = len[3];
+        final byte[] len_4 = len[4];
+        final byte[] len_5 = len[5];
+        final int nMTFShadow = this.nMTF;
+
+        int nSelectors = 0;
+
+        for (int iter = 0; iter < N_ITERS; iter++) {
+            for (int t = nGroups; --t >= 0;) {
+                fave[t] = 0;
+                int[] rfreqt = rfreq[t];
+                for (int i = alphaSize; --i >= 0;) {
+                    rfreqt[i] = 0;
+                }
+            }
+
+            nSelectors = 0;
+
+            for (int gs = 0; gs < this.nMTF;) {
+                /* Set group start & end marks. */
+
+                /*
+                 * Calculate the cost of this group as coded by each of the
+                 * coding tables.
+                 */
+
+                final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
+
+                if (nGroups == N_GROUPS) {
+                    // unrolled version of the else-block
+
+                    short cost0 = 0;
+                    short cost1 = 0;
+                    short cost2 = 0;
+                    short cost3 = 0;
+                    short cost4 = 0;
+                    short cost5 = 0;
+
+                    for (int i = gs; i <= ge; i++) {
+                        final int icv = sfmap[i];
+                        cost0 += len_0[icv] & 0xff;
+                        cost1 += len_1[icv] & 0xff;
+                        cost2 += len_2[icv] & 0xff;
+                        cost3 += len_3[icv] & 0xff;
+                        cost4 += len_4[icv] & 0xff;
+                        cost5 += len_5[icv] & 0xff;
+                    }
+
+                    cost[0] = cost0;
+                    cost[1] = cost1;
+                    cost[2] = cost2;
+                    cost[3] = cost3;
+                    cost[4] = cost4;
+                    cost[5] = cost5;
+
+                } else {
+                    for (int t = nGroups; --t >= 0;) {
+                        cost[t] = 0;
+                    }
+
+                    for (int i = gs; i <= ge; i++) {
+                        final int icv = sfmap[i];
+                        for (int t = nGroups; --t >= 0;) {
+                            cost[t] += len[t][icv] & 0xff;
+                        }
+                    }
+                }
+
+                /*
+                 * Find the coding table which is best for this group, and
+                 * record its identity in the selector table.
+                 */
+                int bt = -1;
+                for (int t = nGroups, bc = 999999999; --t >= 0;) {
+                    final int cost_t = cost[t];
+                    if (cost_t < bc) {
+                        bc = cost_t;
+                        bt = t;
+                    }
+                }
+
+                fave[bt]++;
+                selector[nSelectors] = (byte) bt;
+                nSelectors++;
+
+                /*
+                 * Increment the symbol frequencies for the selected table.
+                 */
+                final int[] rfreq_bt = rfreq[bt];
+                for (int i = gs; i <= ge; i++) {
+                    rfreq_bt[sfmap[i]]++;
+                }
+
+                gs = ge + 1;
+            }
+
+            /*
+             * Recompute the tables based on the accumulated frequencies.
+             */
+            for (int t = 0; t < nGroups; t++) {
+                hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
+            }
+        }
+
+        return nSelectors;
+    }
+
+    private void sendMTFValues2(final int nGroups, final int nSelectors) {
+        // assert (nGroups < 8) : nGroups;
+
+        final Data dataShadow = this.data;
+        byte[] pos = dataShadow.sendMTFValues2_pos;
+
+        for (int i = nGroups; --i >= 0;) {
+            pos[i] = (byte) i;
+        }
+
+        for (int i = 0; i < nSelectors; i++) {
+            final byte ll_i = dataShadow.selector[i];
+            byte tmp = pos[0];
+            int j = 0;
+
+            while (ll_i != tmp) {
+                j++;
+                byte tmp2 = tmp;
+                tmp = pos[j];
+                pos[j] = tmp2;
+            }
+
+            pos[0] = tmp;
+            dataShadow.selectorMtf[i] = (byte) j;
+        }
+    }
+
+    private void sendMTFValues3(final int nGroups, final int alphaSize) {
+        int[][] code = this.data.sendMTFValues_code;
+        byte[][] len = this.data.sendMTFValues_len;
+
+        for (int t = 0; t < nGroups; t++) {
+            int minLen = 32;
+            int maxLen = 0;
+            final byte[] len_t = len[t];
+            for (int i = alphaSize; --i >= 0;) {
+                final int l = len_t[i] & 0xff;
+                if (l > maxLen) {
+                    maxLen = l;
+                }
+                if (l < minLen) {
+                    minLen = l;
+                }
+            }
+
+            // assert (maxLen <= 20) : maxLen;
+            // assert (minLen >= 1) : minLen;
+
+            hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
+        }
+    }
+
+    private void sendMTFValues4() throws IOException {
+        final boolean[] inUse = this.data.inUse;
+        final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
+
+        for (int i = 16; --i >= 0;) {
+            inUse16[i] = false;
+            final int i16 = i * 16;
+            for (int j = 16; --j >= 0;) {
+                if (inUse[i16 + j]) {
+                    inUse16[i] = true;
+                }
+            }
+        }
+
+        for (int i = 0; i < 16; i++) {
+            bsW(1, inUse16[i] ? 1 : 0);
+        }
+
+        final OutputStream outShadow = this.out;
+        int bsLiveShadow = this.bsLive;
+        int bsBuffShadow = this.bsBuff;
+
+        for (int i = 0; i < 16; i++) {
+            if (inUse16[i]) {
+                final int i16 = i * 16;
+                for (int j = 0; j < 16; j++) {
+                    // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
+                    while (bsLiveShadow >= 8) {
+                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
+                        bsBuffShadow <<= 8;
+                        bsLiveShadow -= 8;
+                    }
+                    if (inUse[i16 + j]) {
+                        bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+                    }
+                    bsLiveShadow++;
+                }
+            }
+        }
+
+        this.bsBuff = bsBuffShadow;
+        this.bsLive = bsLiveShadow;
+    }
+
+    private void sendMTFValues5(final int nGroups, final int nSelectors)
+        throws IOException {
+        bsW(3, nGroups);
+        bsW(15, nSelectors);
+
+        final OutputStream outShadow = this.out;
+        final byte[] selectorMtf = this.data.selectorMtf;
+
+        int bsLiveShadow = this.bsLive;
+        int bsBuffShadow = this.bsBuff;
+
+        for (int i = 0; i < nSelectors; i++) {
+            for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
+                // inlined: bsW(1, 1);
+                while (bsLiveShadow >= 8) {
+                    outShadow.write(bsBuffShadow >> 24);
+                    bsBuffShadow <<= 8;
+                    bsLiveShadow -= 8;
+                }
+                bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
+                bsLiveShadow++;
+            }
+
+            // inlined: bsW(1, 0);
+            while (bsLiveShadow >= 8) {
+                outShadow.write(bsBuffShadow >> 24);
+                bsBuffShadow <<= 8;
+                bsLiveShadow -= 8;
+            }
+            // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
+            bsLiveShadow++;
+        }
+
+        this.bsBuff = bsBuffShadow;
+        this.bsLive = bsLiveShadow;
+    }
+
+    private void sendMTFValues6(final int nGroups, final int alphaSize)
+        throws IOException {
+        final byte[][] len = this.data.sendMTFValues_len;
+        final OutputStream outShadow = this.out;
+
+        int bsLiveShadow = this.bsLive;
+        int bsBuffShadow = this.bsBuff;
+
+        for (int t = 0; t < nGroups; t++) {
+            byte[] len_t = len[t];
+            int curr = len_t[0] & 0xff;
+
+            // inlined: bsW(5, curr);
+            while (bsLiveShadow >= 8) {
+                outShadow.write(bsBuffShadow >> 24); // write 8-bit
+                bsBuffShadow <<= 8;
+                bsLiveShadow -= 8;
+            }
+            bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
+            bsLiveShadow += 5;
+
+            for (int i = 0; i < alphaSize; i++) {
+                int lti = len_t[i] & 0xff;
+                while (curr < lti) {
+                    // inlined: bsW(2, 2);
+                    while (bsLiveShadow >= 8) {
+                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
+                        bsBuffShadow <<= 8;
+                        bsLiveShadow -= 8;
+                    }
+                    bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
+                    bsLiveShadow += 2;
+
+                    curr++; /* 10 */
+                }
+
+                while (curr > lti) {
+                    // inlined: bsW(2, 3);
+                    while (bsLiveShadow >= 8) {
+                        outShadow.write(bsBuffShadow >> 24); // write 8-bit
+                        bsBuffShadow <<= 8;
+                        bsLiveShadow -= 8;
+                    }
+                    bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
+                    bsLiveShadow += 2;
+
+                    curr--; /* 11 */
+                }
+
+                // inlined: bsW(1, 0);
+                while (bsLiveShadow >= 8) {
+                    outShadow.write(bsBuffShadow >> 24); // write 8-bit
+                    bsBuffShadow <<= 8;
+                    bsLiveShadow -= 8;
+                }
+                // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
+                bsLiveShadow++;
+            }
+        }
+
+        this.bsBuff = bsBuffShadow;
+        this.bsLive = bsLiveShadow;
+    }
+
+    private void sendMTFValues7(final int nSelectors) throws IOException {
+        final Data dataShadow = this.data;
+        final byte[][] len = dataShadow.sendMTFValues_len;
+        final int[][] code = dataShadow.sendMTFValues_code;
+        final OutputStream outShadow = this.out;
+        final byte[] selector = dataShadow.selector;
+        final char[] sfmap = dataShadow.sfmap;
+        final int nMTFShadow = this.nMTF;
+
+        int selCtr = 0;
+
+        int bsLiveShadow = this.bsLive;
+        int bsBuffShadow = this.bsBuff;
+
+        for (int gs = 0; gs < nMTFShadow;) {
+            final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
+            final int selector_selCtr = selector[selCtr] & 0xff;
+            final int[] code_selCtr = code[selector_selCtr];
+            final byte[] len_selCtr = len[selector_selCtr];
+
+            while (gs <= ge) {
+                final int sfmap_i = sfmap[gs];
+
+                //
+                // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
+                // code_selCtr[sfmap_i]);
+                //
+                while (bsLiveShadow >= 8) {
+                    outShadow.write(bsBuffShadow >> 24);
+                    bsBuffShadow <<= 8;
+                    bsLiveShadow -= 8;
+                }
+                final int n = len_selCtr[sfmap_i] & 0xFF;
+                bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
+                bsLiveShadow += n;
+
+                gs++;
+            }
+
+            gs = ge + 1;
+            selCtr++;
+        }
+
+        this.bsBuff = bsBuffShadow;
+        this.bsLive = bsLiveShadow;
+    }
+
+    private void moveToFrontCodeAndSend() throws IOException {
+        bsW(24, this.data.origPtr);
+        generateMTFValues();
+        sendMTFValues();
+    }
+
+    private void blockSort() {
+        blockSorter.blockSort(data, last);
+    }
+
+    /*
+     * Performs Move-To-Front on the Burrows-Wheeler transformed
+     * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
+     * run-length-encoded form.
+     *
+     * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
+     */
+    private void generateMTFValues() {
+        final int lastShadow = this.last;
+        final Data dataShadow = this.data;
+        final boolean[] inUse = dataShadow.inUse;
+        final byte[] block = dataShadow.block;
+        final int[] fmap = dataShadow.fmap;
+        final char[] sfmap = dataShadow.sfmap;
+        final int[] mtfFreq = dataShadow.mtfFreq;
+        final byte[] unseqToSeq = dataShadow.unseqToSeq;
+        final byte[] yy = dataShadow.generateMTFValues_yy;
+
+        // make maps
+        int nInUseShadow = 0;
+        for (int i = 0; i < 256; i++) {
+            if (inUse[i]) {
+                unseqToSeq[i] = (byte) nInUseShadow;
+                nInUseShadow++;
+            }
+        }
+        this.nInUse = nInUseShadow;
+
+        final int eob = nInUseShadow + 1;
+
+        for (int i = eob; i >= 0; i--) {
+            mtfFreq[i] = 0;
+        }
+
+        for (int i = nInUseShadow; --i >= 0;) {
+            yy[i] = (byte) i;
+        }
+
+        int wr = 0;
+        int zPend = 0;
+
+        for (int i = 0; i <= lastShadow; i++) {
+            final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
+            byte tmp = yy[0];
+            int j = 0;
+
+            while (ll_i != tmp) {
+                j++;
+                byte tmp2 = tmp;
+                tmp = yy[j];
+                yy[j] = tmp2;
+            }
+            yy[0] = tmp;
+
+            if (j == 0) {
+                zPend++;
+            } else {
+                if (zPend > 0) {
+                    zPend--;
+                    while (true) {
+                        if ((zPend & 1) == 0) {
+                            sfmap[wr] = RUNA;
+                            wr++;
+                            mtfFreq[RUNA]++;
+                        } else {
+                            sfmap[wr] = RUNB;
+                            wr++;
+                            mtfFreq[RUNB]++;
+                        }
+
+                        if (zPend >= 2) {
+                            zPend = (zPend - 2) >> 1;
+                        } else {
+                            break;
+                        }
+                    }
+                    zPend = 0;
+                }
+                sfmap[wr] = (char) (j + 1);
+                wr++;
+                mtfFreq[j + 1]++;
+            }
+        }
+
+        if (zPend > 0) {
+            zPend--;
+            while (true) {
+                if ((zPend & 1) == 0) {
+                    sfmap[wr] = RUNA;
+                    wr++;
+                    mtfFreq[RUNA]++;
+                } else {
+                    sfmap[wr] = RUNB;
+                    wr++;
+                    mtfFreq[RUNB]++;
+                }
+
+                if (zPend >= 2) {
+                    zPend = (zPend - 2) >> 1;
+                } else {
+                    break;
+                }
+            }
+        }
+
+        sfmap[wr] = (char) eob;
+        mtfFreq[eob]++;
+        this.nMTF = wr + 1;
+    }
+
+    static final class Data extends Object {
+
+        // with blockSize 900k
+        /* maps unsigned byte => "does it occur in block" */
+        final boolean[] inUse = new boolean[256]; // 256 byte
+        final byte[] unseqToSeq = new byte[256]; // 256 byte
+        final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
+        final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
+        final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
+
+        final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
+        final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
+        // byte
+        final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
+        // byte
+        final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
+        final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
+        final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
+        // byte
+        final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
+        final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
+
+        final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
+        final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
+        final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
+
+        // ------------
+        // 333408 byte
+
+        /* holds the RLEd block of original data starting at index 1.
+         * After sorting the last byte added to the buffer is at index
+         * 0. */
+        final byte[] block; // 900021 byte
+        /* maps index in Burrows-Wheeler transformed block => index of
+         * byte in original block */
+        final int[] fmap; // 3600000 byte
+        final char[] sfmap; // 3600000 byte
+        // ------------
+        // 8433529 byte
+        // ============
+
+        /**
+         * Index of original line in Burrows-Wheeler table.
+         *
+         * <p>This is the index in fmap that points to the last byte
+         * of the original data.</p>
+         */
+        int origPtr;
+
+        Data(int blockSize100k) {
+            super();
+
+            final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
+            this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
+            this.fmap = new int[n];
+            this.sfmap = new char[2 * n];
+        }
+
+    }
+
+}

Added: branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java
===================================================================
--- branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java	                        (rev 0)
+++ branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSort.java	2012-07-21 20:12:44 UTC (rev 16252)
@@ -0,0 +1,1081 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.bzip2;
+
+import java.util.BitSet;
+
+/**
+ * Encapsulates the Burrows-Wheeler sorting algorithm needed by {@link
+ * BZip2CompressorOutputStream}.
+ *
+ * <p>This class is based on a Java port of Julian Seward's
+ * blocksort.c in his libbzip2</p>
+ *
+ * <p>The Burrows-Wheeler transform is a reversible transform of the
+ * original data that is supposed to group similiar bytes close to
+ * each other.  The idea is to sort all permutations of the input and
+ * only keep the last byte of each permutation.  E.g. for "Commons
+ * Compress" you'd get:</p>
+ *
+ * <pre>
+ *  CompressCommons
+ * Commons Compress
+ * CompressCommons 
+ * essCommons Compr
+ * mmons CompressCo
+ * mons CompressCom
+ * mpressCommons Co
+ * ns CompressCommo
+ * ommons CompressC
+ * ompressCommons C
+ * ons CompressComm
+ * pressCommons Com
+ * ressCommons Comp
+ * s CompressCommon
+ * sCommons Compres
+ * ssCommons Compre
+ * </pre>
+ *
+ * <p>Which results in a new text "ss romooCCmmpnse", in adition the
+ * index of the first line that contained the original text is kept -
+ * in this case it is 1.  The idea is that in a long English text all
+ * permutations that start with "he" are likely suffixes of a "the" and
+ * thus they end in "t" leading to a larger block of "t"s that can
+ * better be compressed by the subsequent Move-to-Front, run-length
+ * und Huffman encoding steps.</p>
+ *
+ * <p>For more information see for example:</p>
+ * <ul>
+ *   <li><a
+ *   href="http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf">Burrows,
+ *   M. and Wheeler, D.: A Block-sorting Lossless Data Compression
+ *   Algorithm</a></li>
+ *   <li><a href="http://webglimpse.net/pubs/suffix.pdf">Manber, U. and
+ *   Myers, G.: Suffix arrays: A new method for on-line string
+ *   searches</a></li>
+ *   <li><a
+ *   href="http://www.cs.tufts.edu/~nr/comp150fp/archive/bob-sedgewick/fast-strings.pdf">Bentley,
+ *   J.L. and Sedgewick, R.: Fast Algorithms for Sorting and Searching
+ *   Strings</a></li>
+ * </ul>
+ *
+ * @NotThreadSafe
+ */
+class BlockSort {
+
+    /*
+     * Some of the constructs used in the C code cannot be ported
+     * literally to Java - for example macros, unsigned types.  Some
+     * code has been hand-tuned to improve performance.  In order to
+     * avoid memory pressure some structures are reused for several
+     * blocks and some memory is even shared between sorting and the
+     * MTF stage even though either algorithm uses it for its own
+     * purpose.
+     *
+     * Comments preserved from the actual C code are prefixed with
+     * "LBZ2:".
+     */
+
+    /*
+     * 2012-05-20 Stefan Bodewig:
+     *
+     * This class seems to mix several revisions of libbzip2's code.
+     * The mainSort function and those used by it look closer to the
+     * 0.9.5 version but show some variations introduced later.  At
+     * the same time the logic of Compress 1.4 to randomize the block
+     * on bad input has been dropped after libbzip2 0.9.0 and replaced
+     * by a fallback sorting algorithm.
+     *
+     * I've added the fallbackSort function of 1.0.6 and tried to
+     * integrate it with the existing code without touching too much.
+     * I've also removed the now unused randomization code.
+     */
+
+    /*
+     * LBZ2: If you are ever unlucky/improbable enough to get a stack
+     * overflow whilst sorting, increase the following constant and
+     * try again. In practice I have never seen the stack go above 27
+     * elems, so the following limit seems very generous.
+     */
+    private static final int QSORT_STACK_SIZE = 1000;
+
+    private static final int FALLBACK_QSORT_STACK_SIZE = 100;
+
+    private static final int STACK_SIZE =
+        QSORT_STACK_SIZE < FALLBACK_QSORT_STACK_SIZE
+        ? FALLBACK_QSORT_STACK_SIZE : QSORT_STACK_SIZE;
+
+    /*
+     * Used when sorting. If too many long comparisons happen, we stop sorting,
+     * and use fallbackSort instead.
+     */
+    private int workDone;
+    private int workLimit;
+    private boolean firstAttempt;
+
+    private final int[] stack_ll = new int[STACK_SIZE]; // 4000 byte
+    private final int[] stack_hh = new int[STACK_SIZE]; // 4000 byte
+    private final int[] stack_dd = new int[QSORT_STACK_SIZE]; // 4000 byte
+
+    private final int[] mainSort_runningOrder = new int[256]; // 1024 byte
+    private final int[] mainSort_copy = new int[256]; // 1024 byte
+    private final boolean[] mainSort_bigDone = new boolean[256]; // 256 byte
+
+    private final int[] ftab = new int[65537]; // 262148 byte
+
+    /**
+     * Array instance identical to Data's sfmap, both are used only
+     * temporarily and indepently, so we do not need to allocate
+     * additional memory.
+     */
+    private final char[] quadrant;
+
+    BlockSort(final BZip2CompressorOutputStream.Data data) {
+        this.quadrant = data.sfmap;
+    }
+
+    void blockSort(final BZip2CompressorOutputStream.Data data, final int last) {
+        this.workLimit = WORK_FACTOR * last;
+        this.workDone = 0;
+        this.firstAttempt = true;
+
+        if (last + 1 < 10000) {
+            fallbackSort(data, last);
+        } else {
+            mainSort(data, last);
+
+            if (this.firstAttempt && (this.workDone > this.workLimit)) {
+                fallbackSort(data, last);
+            }
+        }
+
+        final int[] fmap = data.fmap;
+        data.origPtr = -1;
+        for (int i = 0; i <= last; i++) {
+            if (fmap[i] == 0) {
+                data.origPtr = i;
+                break;
+            }
+        }
+
+        // assert (data.origPtr != -1) : data.origPtr;
+    }
+
+    /**
+     * Adapt fallbackSort to the expected interface of the rest of the
+     * code, in particular deal with the fact that block starts at
+     * offset 1 (in libbzip2 1.0.6 it starts at 0).
+     */
+    final void fallbackSort(final BZip2CompressorOutputStream.Data data,
+                            final int last) {
+        data.block[0] = data.block[last + 1];
+        fallbackSort(data.fmap, data.block, last + 1);
+        for (int i = 0; i < last + 1; i++) {
+            --data.fmap[i];
+        }
+        for (int i = 0; i < last + 1; i++) {
+            if (data.fmap[i] == -1) {
+                data.fmap[i] = last;
+                break;
+            }
+        }
+    }
+
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+/*--- LBZ2: Fallback O(N log(N)^2) sorting        ---*/
+/*--- algorithm, for repetitive blocks      ---*/
+/*---------------------------------------------*/
+
+    /*
+     * This is the fallback sorting algorithm libbzip2 1.0.6 uses for
+     * repetitive or very short inputs.
+     *
+     * The idea is inspired by Manber-Myers string suffix sorting
+     * algorithm.  First a bucket sort places each permutation of the
+     * block into a bucket based on its first byte.  Permutations are
+     * represented by pointers to their first character kept in
+     * (partially) sorted order inside the array ftab.
+     *
+     * The next step visits all buckets in order and performs a
+     * quicksort on all permutations of the bucket based on the index
+     * of the bucket the second byte of the permutation belongs to,
+     * thereby forming new buckets.  When arrived here the
+     * permutations are sorted up to the second character and we have
+     * buckets of permutations that are identical up to two
+     * characters.
+     *
+     * Repeat the step of quicksorting each bucket, now based on the
+     * bucket holding the sequence of the third and forth character
+     * leading to four byte buckets.  Repeat this doubling of bucket
+     * sizes until all buckets only contain single permutations or the
+     * bucket size exceeds the block size.
+     *
+     * I.e.
+     *
+     * "abraba" form three buckets for the chars "a", "b", and "r" in
+     * the first step with
+     *
+     * fmap = { 'a:' 5, 3, 0, 'b:' 4, 1, 'r', 2 }
+     *
+     * when looking at the bucket of "a"s the second characters are in
+     * the buckets that start with fmap-index 0 (rolled over), 3 and 3
+     * respectively, forming two new buckets "aa" and "ab", so we get
+     *
+     * fmap = { 'aa:' 5, 'ab:' 3, 0, 'ba:' 4, 'br': 1, 'ra:' 2 }
+     *
+     * since the last bucket only contained a single item it didn't
+     * have to be sorted at all.
+     *
+     * There now is just one bucket with more than one permutation
+     * that remains to be sorted.  For the permutation that starts
+     * with index 3 the third and forth char are in bucket 'aa' at
+     * index 0 and for the one starting at block index 0 they are in
+     * bucket 'ra' with sort index 5.  The fully sorted order then becomes.
+     *
+     * fmap = { 5, 3, 0, 4, 1, 2 }
+     * 
+     */
+
+    /**
+     * @param fmap points to the index of the starting point of a
+     *        permutation inside the block of data in the current
+     *        partially sorted order
+     * @param eclass points from the index of a character inside the
+     *        block to the first index in fmap that contains the
+     *        bucket of its suffix that is sorted in this step.
+     * @param lo lower boundary of the fmap-interval to be sorted 
+     * @param hi upper boundary of the fmap-interval to be sorted 
+     */
+    private void fallbackSimpleSort(int[] fmap, 
+                                    int[] eclass, 
+                                    int lo, 
+                                    int hi) {
+        if (lo == hi) {
+            return;
+        }
+
+        int j;
+        if (hi - lo > 3) {
+            for (int i = hi - 4; i >= lo; i--) {
+                int tmp = fmap[i];
+                int ec_tmp = eclass[tmp];
+                for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]];
+                     j += 4) {
+                    fmap[j - 4] = fmap[j];
+                }
+                fmap[j - 4] = tmp;
+            }
+        }
+
+        for (int i = hi - 1; i >= lo; i--) {
+            int tmp = fmap[i];
+            int ec_tmp = eclass[tmp];
+            for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) {
+                fmap[j - 1] = fmap[j];
+            }
+            fmap[j-1] = tmp;
+        }
+    }
+
+    private static final int FALLBACK_QSORT_SMALL_THRESH = 10;
+
+    /**
+     * swaps two values in fmap
+     */
+    private void fswap(int[] fmap, int zz1, int zz2) {
+        int zztmp = fmap[zz1];
+        fmap[zz1] = fmap[zz2];
+        fmap[zz2] = zztmp;
+    }
+
+    /**
+     * swaps two intervals starting at yyp1 and yyp2 of length yyn inside fmap.
+     */
+    private void fvswap(int[] fmap, int yyp1, int yyp2, int yyn) {
+        while (yyn > 0) {
+            fswap(fmap, yyp1, yyp2);
+            yyp1++; yyp2++; yyn--;
+        }
+    }
+
+    private int fmin(int a, int b) {
+        return a < b ? a : b;
+    }
+
+    private void fpush(int sp, int lz, int hz) {
+        stack_ll[sp] = lz;
+        stack_hh[sp] = hz;
+    }
+
+    private int[] fpop(int sp) {
+        return new int[] { stack_ll[sp], stack_hh[sp] };
+    }
+
+    /**
+     * @param fmap points to the index of the starting point of a
+     *        permutation inside the block of data in the current
+     *        partially sorted order
+     * @param eclass points from the index of a character inside the
+     *        block to the first index in fmap that contains the
+     *        bucket of its suffix that is sorted in this step.
+     * @param loSt lower boundary of the fmap-interval to be sorted 
+     * @param hiSt upper boundary of the fmap-interval to be sorted 
+     */
+    private void fallbackQSort3(int[] fmap, 
+                                int[] eclass, 
+                                int loSt, 
+                                int hiSt) {
+        int lo, unLo, ltLo, hi, unHi, gtHi, n;
+
+        long r = 0;
+        int sp = 0;
+        fpush(sp++, loSt, hiSt);
+
+        while (sp > 0) {
+            int[] s = fpop(--sp);
+            lo = s[0]; hi = s[1];
+
+            if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+                fallbackSimpleSort(fmap, eclass, lo, hi);
+                continue;
+            }
+
+            /* LBZ2: Random partitioning.  Median of 3 sometimes fails to
+               avoid bad cases.  Median of 9 seems to help but 
+               looks rather expensive.  This too seems to work but
+               is cheaper.  Guidance for the magic constants 
+               7621 and 32768 is taken from Sedgewick's algorithms
+               book, chapter 35.
+            */
+            r = ((r * 7621) + 1) % 32768;
+            long r3 = r % 3, med;
+            if (r3 == 0) {
+                med = eclass[fmap[lo]]; 
+            } else if (r3 == 1) {
+                med = eclass[fmap[(lo + hi) >>> 1]];
+            } else {
+                med = eclass[fmap[hi]];
+            }
+
+            unLo = ltLo = lo;
+            unHi = gtHi = hi;
+
+            // looks like the ternary partition attributed to Wegner
+            // in the cited Sedgewick paper
+            while (true) {
+                while (true) {
+                    if (unLo > unHi) {
+                        break;
+                    }
+                    n = eclass[fmap[unLo]] - (int) med;
+                    if (n == 0) { 
+                        fswap(fmap, unLo, ltLo); 
+                        ltLo++; unLo++; 
+                        continue; 
+                    }
+                    if (n > 0) {
+                        break;
+                    }
+                    unLo++;
+                }
+                while (true) {
+                    if (unLo > unHi) {
+                        break;
+                    }
+                    n = eclass[fmap[unHi]] - (int) med;
+                    if (n == 0) {
+                        fswap(fmap, unHi, gtHi); 
+                        gtHi--; unHi--; 
+                        continue; 
+                    }
+                    if (n < 0) {
+                        break;
+                    }
+                    unHi--;
+                }
+                if (unLo > unHi) {
+                    break;
+                }
+                fswap(fmap, unLo, unHi); unLo++; unHi--;
+            }
+
+            if (gtHi < ltLo) {
+                continue;
+            }
+
+            n = fmin(ltLo - lo, unLo - ltLo);
+            fvswap(fmap, lo, unLo - n, n);
+            int m = fmin(hi - gtHi, gtHi - unHi);
+            fvswap(fmap, unHi + 1, hi - m + 1, m);
+
+            n = lo + unLo - ltLo - 1;
+            m = hi - (gtHi - unHi) + 1;
+
+            if (n - lo > hi - m) {
+                fpush(sp++, lo, n);
+                fpush(sp++, m, hi);
+            } else {
+                fpush(sp++, m, hi);
+                fpush(sp++, lo, n);
+            }
+        }
+    }
+
+
+/*---------------------------------------------*/
+
+    private int[] eclass;
+
+    private int[] getEclass() {
+        return eclass == null
+            ? (eclass = new int[quadrant.length / 2]) : eclass;
+    }
+
+    /*
+     * The C code uses an array of ints (each int holding 32 flags) to
+     * represents the bucket-start flags (bhtab).  It also contains
+     * optimizations to skip over 32 consecutively set or
+     * consecutively unset bits on word boundaries at once.  For now
+     * I've chosen to use the simpler but potentially slower code
+     * using BitSet - also in the hope that using the BitSet#nextXXX
+     * methods may be fast enough.
+     */
+
+    /**
+     * @param fmap points to the index of the starting point of a
+     *        permutation inside the block of data in the current
+     *        partially sorted order
+     * @param block the original data
+     * @param nblock size of the block
+     * @param off offset of first byte to sort in block
+     */
+    final void fallbackSort(int[] fmap, byte[] block, int nblock) {
+        final int[] ftab = new int[257];
+        int H, i, j, k, l, r, cc, cc1;
+        int nNotDone;
+        int nBhtab;
+        final int[] eclass = getEclass();
+
+        for (i = 0; i < nblock; i++) {
+            eclass[i] = 0;
+        }
+        /*--
+          LBZ2: Initial 1-char radix sort to generate
+          initial fmap and initial BH bits.
+          --*/
+        for (i = 0; i < nblock; i++) {
+            ftab[block[i] & 0xff]++;
+        }
+        for (i = 1; i < 257;    i++) {
+            ftab[i] += ftab[i - 1];
+        }
+
+        for (i = 0; i < nblock; i++) {
+            j = block[i] & 0xff;
+            k = ftab[j] - 1;
+            ftab[j] = k;
+            fmap[k] = i;
+        }
+
+        nBhtab = 64 + nblock;
+        BitSet bhtab = new BitSet(nBhtab);
+        for (i = 0; i < 256; i++) {
+            bhtab.set(ftab[i]);
+        }
+
+        /*--
+          LBZ2: Inductively refine the buckets.  Kind-of an
+          "exponential radix sort" (!), inspired by the
+          Manber-Myers suffix array construction algorithm.
+          --*/
+
+        /*-- LBZ2: set sentinel bits for block-end detection --*/
+        for (i = 0; i < 32; i++) { 
+            bhtab.set(nblock + 2 * i);
+            bhtab.clear(nblock + 2 * i + 1);
+        }
+
+        /*-- LBZ2: the log(N) loop --*/
+        H = 1;
+        while (true) {
+
+            j = 0;
+            for (i = 0; i < nblock; i++) {
+                if (bhtab.get(i)) {
+                    j = i;
+                }
+                k = fmap[i] - H;
+                if (k < 0) {
+                    k += nblock;
+                }
+                eclass[k] = j;
+            }
+
+            nNotDone = 0;
+            r = -1;
+            while (true) {
+
+                /*-- LBZ2: find the next non-singleton bucket --*/
+                k = r + 1;
+                k = bhtab.nextClearBit(k);
+                l = k - 1;
+                if (l >= nblock) {
+                    break;
+                }
+                k = bhtab.nextSetBit(k + 1);
+                r = k - 1;
+                if (r >= nblock) {
+                    break;
+                }
+
+                /*-- LBZ2: now [l, r] bracket current bucket --*/
+                if (r > l) {
+                    nNotDone += (r - l + 1);
+                    fallbackQSort3(fmap, eclass, l, r);
+
+                    /*-- LBZ2: scan bucket and generate header bits-- */
+                    cc = -1;
+                    for (i = l; i <= r; i++) {
+                        cc1 = eclass[fmap[i]];
+                        if (cc != cc1) {
+                            bhtab.set(i);
+                            cc = cc1;
+                        }
+                    }
+                }
+            }
+
+            H *= 2;
+            if (H > nblock || nNotDone == 0) {
+                break;
+            }
+        }
+    }
+
+/*---------------------------------------------*/
+
+    /*
+     * LBZ2: Knuth's increments seem to work better than Incerpi-Sedgewick here.
+     * Possibly because the number of elems to sort is usually small, typically
+     * <= 20.
+     */
+    private static final int[] INCS = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+                                        9841, 29524, 88573, 265720, 797161,
+                                        2391484 };
+
+    /**
+     * This is the most hammered method of this class.
+     *
+     * <p>
+     * This is the version using unrolled loops. Normally I never use such ones
+     * in Java code. The unrolling has shown a noticable performance improvement
+     * on JRE 1.4.2 (Linux i586 / HotSpot Client). Of course it depends on the
+     * JIT compiler of the vm.
+     * </p>
+     */
+    private boolean mainSimpleSort(final BZip2CompressorOutputStream.Data dataShadow,
+                                   final int lo, final int hi, final int d,
+                                   final int lastShadow) {
+        final int bigN = hi - lo + 1;
+        if (bigN < 2) {
+            return this.firstAttempt && (this.workDone > this.workLimit);
+        }
+
+        int hp = 0;
+        while (INCS[hp] < bigN) {
+            hp++;
+        }
+
+        final int[] fmap = dataShadow.fmap;
+        final char[] quadrant = this.quadrant;
+        final byte[] block = dataShadow.block;
+        final int lastPlus1 = lastShadow + 1;
+        final boolean firstAttemptShadow = this.firstAttempt;
+        final int workLimitShadow = this.workLimit;
+        int workDoneShadow = this.workDone;
+
+        // Following block contains unrolled code which could be shortened by
+        // coding it in additional loops.
+
+        HP: while (--hp >= 0) {
+            final int h = INCS[hp];
+            final int mj = lo + h - 1;
+
+            for (int i = lo + h; i <= hi;) {
+                // copy
+                for (int k = 3; (i <= hi) && (--k >= 0); i++) {
+                    final int v = fmap[i];
+                    final int vd = v + d;
+                    int j = i;
+
+                    // for (int a;
+                    // (j > mj) && mainGtU((a = fmap[j - h]) + d, vd,
+                    // block, quadrant, lastShadow);
+                    // j -= h) {
+                    // fmap[j] = a;
+                    // }
+                    //
+                    // unrolled version:
+
+                    // start inline mainGTU
+                    boolean onceRunned = false;
+                    int a = 0;
+
+                    HAMMER: while (true) {
+                        if (onceRunned) {
+                            fmap[j] = a;
+                            if ((j -= h) <= mj) {
+                                break HAMMER;
+                            }
+                        } else {
+                            onceRunned = true;
+                        }
+
+                        a = fmap[j - h];
+                        int i1 = a + d;
+                        int i2 = vd;
+
+                        // following could be done in a loop, but
+                        // unrolled it for performance:
+                        if (block[i1 + 1] == block[i2 + 1]) {
+                            if (block[i1 + 2] == block[i2 + 2]) {
+                                if (block[i1 + 3] == block[i2 + 3]) {
+                                    if (block[i1 + 4] == block[i2 + 4]) {
+                                        if (block[i1 + 5] == block[i2 + 5]) {
+                                            if (block[(i1 += 6)] == block[(i2 += 6)]) {
+                                                int x = lastShadow;
+                                                X: while (x > 0) {
+                                                    x -= 4;
+
+                                                    if (block[i1 + 1] == block[i2 + 1]) {
+                                                        if (quadrant[i1] == quadrant[i2]) {
+                                                            if (block[i1 + 2] == block[i2 + 2]) {
+                                                                if (quadrant[i1 + 1] == quadrant[i2 + 1]) {
+                                                                    if (block[i1 + 3] == block[i2 + 3]) {
+                                                                        if (quadrant[i1 + 2] == quadrant[i2 + 2]) {
+                                                                            if (block[i1 + 4] == block[i2 + 4]) {
+                                                                                if (quadrant[i1 + 3] == quadrant[i2 + 3]) {
+                                                                                    if ((i1 += 4) >= lastPlus1) {
+                                                                                        i1 -= lastPlus1;
+                                                                                    }
+                                                                                    if ((i2 += 4) >= lastPlus1) {
+                                                                                        i2 -= lastPlus1;
+                                                                                    }
+                                                                                    workDoneShadow++;
+                                                                                    continue X;
+                                                                                } else if ((quadrant[i1 + 3] > quadrant[i2 + 3])) {
+                                                                                    continue HAMMER;
+                                                                                } else {
+                                                                                    break HAMMER;
+                                                                                }
+                                                                            } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
+                                                                                continue HAMMER;
+                                                                            } else {
+                                                                                break HAMMER;
+                                                                            }
+                                                                        } else if ((quadrant[i1 + 2] > quadrant[i2 + 2])) {
+                                                                            continue HAMMER;
+                                                                        } else {
+                                                                            break HAMMER;
+                                                                        }
+                                                                    } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
+                                                                        continue HAMMER;
+                                                                    } else {
+                                                                        break HAMMER;
+                                                                    }
+                                                                } else if ((quadrant[i1 + 1] > quadrant[i2 + 1])) {
+                                                                    continue HAMMER;
+                                                                } else {
+                                                                    break HAMMER;
+                                                                }
+                                                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
+                                                                continue HAMMER;
+                                                            } else {
+                                                                break HAMMER;
+                                                            }
+                                                        } else if ((quadrant[i1] > quadrant[i2])) {
+                                                            continue HAMMER;
+                                                        } else {
+                                                            break HAMMER;
+                                                        }
+                                                    } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
+                                                        continue HAMMER;
+                                                    } else {
+                                                        break HAMMER;
+                                                    }
+
+                                                }
+                                                break HAMMER;
+                                            } // while x > 0
+                                            else {
+                                                if ((block[i1] & 0xff) > (block[i2] & 0xff)) {
+                                                    continue HAMMER;
+                                                } else {
+                                                    break HAMMER;
+                                                }
+                                            }
+                                        } else if ((block[i1 + 5] & 0xff) > (block[i2 + 5] & 0xff)) {
+                                            continue HAMMER;
+                                        } else {
+                                            break HAMMER;
+                                        }
+                                    } else if ((block[i1 + 4] & 0xff) > (block[i2 + 4] & 0xff)) {
+                                        continue HAMMER;
+                                    } else {
+                                        break HAMMER;
+                                    }
+                                } else if ((block[i1 + 3] & 0xff) > (block[i2 + 3] & 0xff)) {
+                                    continue HAMMER;
+                                } else {
+                                    break HAMMER;
+                                }
+                            } else if ((block[i1 + 2] & 0xff) > (block[i2 + 2] & 0xff)) {
+                                continue HAMMER;
+                            } else {
+                                break HAMMER;
+                            }
+                        } else if ((block[i1 + 1] & 0xff) > (block[i2 + 1] & 0xff)) {
+                            continue HAMMER;
+                        } else {
+                            break HAMMER;
+                        }
+
+                    } // HAMMER
+                    // end inline mainGTU
+
+                    fmap[j] = v;
+                }
+
+                if (firstAttemptShadow && (i <= hi)
+                    && (workDoneShadow > workLimitShadow)) {
+                    break HP;
+                }
+            }
+        }
+
+        this.workDone = workDoneShadow;
+        return firstAttemptShadow && (workDoneShadow > workLimitShadow);
+    }
+
+/*--
+   LBZ2: The following is an implementation of
+   an elegant 3-way quicksort for strings,
+   described in a paper "Fast Algorithms for
+   Sorting and Searching Strings", by Robert
+   Sedgewick and Jon L. Bentley.
+--*/
+
+    private static void vswap(int[] fmap, int p1, int p2, int n) {
+        n += p1;
+        while (p1 < n) {
+            int t = fmap[p1];
+            fmap[p1++] = fmap[p2];
+            fmap[p2++] = t;
+        }
+    }
+
+    private static byte med3(byte a, byte b, byte c) {
+        return (a < b) ? (b < c ? b : a < c ? c : a) : (b > c ? b : a > c ? c
+                                                        : a);
+    }
+
+    private static final int SMALL_THRESH = 20;
+    private static final int DEPTH_THRESH = 10;
+    private static final int WORK_FACTOR = 30;
+
+    /**
+     * Method "mainQSort3", file "blocksort.c", BZip2 1.0.2
+     */
+    private void mainQSort3(final BZip2CompressorOutputStream.Data dataShadow,
+                            final int loSt, final int hiSt, final int dSt,
+                            final int last) {
+        final int[] stack_ll = this.stack_ll;
+        final int[] stack_hh = this.stack_hh;
+        final int[] stack_dd = this.stack_dd;
+        final int[] fmap = dataShadow.fmap;
+        final byte[] block = dataShadow.block;
+
+        stack_ll[0] = loSt;
+        stack_hh[0] = hiSt;
+        stack_dd[0] = dSt;
+
+        for (int sp = 1; --sp >= 0;) {
+            final int lo = stack_ll[sp];
+            final int hi = stack_hh[sp];
+            final int d = stack_dd[sp];
+
+            if ((hi - lo < SMALL_THRESH) || (d > DEPTH_THRESH)) {
+                if (mainSimpleSort(dataShadow, lo, hi, d, last)) {
+                    return;
+                }
+            } else {
+                final int d1 = d + 1;
+                final int med = med3(block[fmap[lo] + d1],
+                                     block[fmap[hi] + d1], block[fmap[(lo + hi) >>> 1] + d1]) & 0xff;
+
+                int unLo = lo;
+                int unHi = hi;
+                int ltLo = lo;
+                int gtHi = hi;
+
+                while (true) {
+                    while (unLo <= unHi) {
+                        final int n = (block[fmap[unLo] + d1] & 0xff)
+                            - med;
+                        if (n == 0) {
+                            final int temp = fmap[unLo];
+                            fmap[unLo++] = fmap[ltLo];
+                            fmap[ltLo++] = temp;
+                        } else if (n < 0) {
+                            unLo++;
+                        } else {
+                            break;
+                        }
+                    }
+
+                    while (unLo <= unHi) {
+                        final int n = (block[fmap[unHi] + d1] & 0xff)
+                            - med;
+                        if (n == 0) {
+                            final int temp = fmap[unHi];
+                            fmap[unHi--] = fmap[gtHi];
+                            fmap[gtHi--] = temp;
+                        } else if (n > 0) {
+                            unHi--;
+                        } else {
+                            break;
+                        }
+                    }
+
+                    if (unLo <= unHi) {
+                        final int temp = fmap[unLo];
+                        fmap[unLo++] = fmap[unHi];
+                        fmap[unHi--] = temp;
+                    } else {
+                        break;
+                    }
+                }
+
+                if (gtHi < ltLo) {
+                    stack_ll[sp] = lo;
+                    stack_hh[sp] = hi;
+                    stack_dd[sp] = d1;
+                    sp++;
+                } else {
+                    int n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo)
+                        : (unLo - ltLo);
+                    vswap(fmap, lo, unLo - n, n);
+                    int m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi)
+                        : (gtHi - unHi);
+                    vswap(fmap, unLo, hi - m + 1, m);
+
+                    n = lo + unLo - ltLo - 1;
+                    m = hi - (gtHi - unHi) + 1;
+
+                    stack_ll[sp] = lo;
+                    stack_hh[sp] = n;
+                    stack_dd[sp] = d;
+                    sp++;
+
+                    stack_ll[sp] = n + 1;
+                    stack_hh[sp] = m - 1;
+                    stack_dd[sp] = d1;
+                    sp++;
+
+                    stack_ll[sp] = m;
+                    stack_hh[sp] = hi;
+                    stack_dd[sp] = d;
+                    sp++;
+                }
+            }
+        }
+    }
+
+    private static final int SETMASK = (1 << 21);
+    private static final int CLEARMASK = (~SETMASK);
+
+    final void mainSort(final BZip2CompressorOutputStream.Data dataShadow,
+                        final int lastShadow) {
+        final int[] runningOrder = this.mainSort_runningOrder;
+        final int[] copy = this.mainSort_copy;
+        final boolean[] bigDone = this.mainSort_bigDone;
+        final int[] ftab = this.ftab;
+        final byte[] block = dataShadow.block;
+        final int[] fmap = dataShadow.fmap;
+        final char[] quadrant = this.quadrant;
+        final int workLimitShadow = this.workLimit;
+        final boolean firstAttemptShadow = this.firstAttempt;
+
+        // LBZ2: Set up the 2-byte frequency table
+        for (int i = 65537; --i >= 0;) {
+            ftab[i] = 0;
+        }
+
+        /*
+         * In the various block-sized structures, live data runs from 0 to
+         * last+NUM_OVERSHOOT_BYTES inclusive. First, set up the overshoot area
+         * for block.
+         */
+        for (int i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++) {
+            block[lastShadow + i + 2] = block[(i % (lastShadow + 1)) + 1];
+        }
+        for (int i = lastShadow + BZip2Constants.NUM_OVERSHOOT_BYTES +1; --i >= 0;) {
+            quadrant[i] = 0;
+        }
+        block[0] = block[lastShadow + 1];
+
+        // LBZ2: Complete the initial radix sort:
+
+        int c1 = block[0] & 0xff;
+        for (int i = 0; i <= lastShadow; i++) {
+            final int c2 = block[i + 1] & 0xff;
+            ftab[(c1 << 8) + c2]++;
+            c1 = c2;
+        }
+
+        for (int i = 1; i <= 65536; i++) {
+            ftab[i] += ftab[i - 1];
+        }
+
+        c1 = block[1] & 0xff;
+        for (int i = 0; i < lastShadow; i++) {
+            final int c2 = block[i + 2] & 0xff;
+            fmap[--ftab[(c1 << 8) + c2]] = i;
+            c1 = c2;
+        }
+
+        fmap[--ftab[((block[lastShadow + 1] & 0xff) << 8) + (block[1] & 0xff)]] = lastShadow;
+
+        /*
+         * LBZ2: Now ftab contains the first loc of every small bucket. Calculate the
+         * running order, from smallest to largest big bucket.
+         */
+        for (int i = 256; --i >= 0;) {
+            bigDone[i] = false;
+            runningOrder[i] = i;
+        }
+
+        for (int h = 364; h != 1;) {
+            h /= 3;
+            for (int i = h; i <= 255; i++) {
+                final int vv = runningOrder[i];
+                final int a = ftab[(vv + 1) << 8] - ftab[vv << 8];
+                final int b = h - 1;
+                int j = i;
+                for (int ro = runningOrder[j - h]; (ftab[(ro + 1) << 8] - ftab[ro << 8]) > a; ro = runningOrder[j
+                                                                                                                - h]) {
+                    runningOrder[j] = ro;
+                    j -= h;
+                    if (j <= b) {
+                        break;
+                    }
+                }
+                runningOrder[j] = vv;
+            }
+        }
+
+        /*
+         * LBZ2: The main sorting loop.
+         */
+        for (int i = 0; i <= 255; i++) {
+            /*
+             * LBZ2: Process big buckets, starting with the least full.
+             */
+            final int ss = runningOrder[i];
+
+            // Step 1:
+            /*
+             * LBZ2: Complete the big bucket [ss] by quicksorting any unsorted small
+             * buckets [ss, j]. Hopefully previous pointer-scanning phases have
+             * already completed many of the small buckets [ss, j], so we don't
+             * have to sort them at all.
+             */
+            for (int j = 0; j <= 255; j++) {
+                final int sb = (ss << 8) + j;
+                final int ftab_sb = ftab[sb];
+                if ((ftab_sb & SETMASK) != SETMASK) {
+                    final int lo = ftab_sb & CLEARMASK;
+                    final int hi = (ftab[sb + 1] & CLEARMASK) - 1;
+                    if (hi > lo) {
+                        mainQSort3(dataShadow, lo, hi, 2, lastShadow);
+                        if (firstAttemptShadow
+                            && (this.workDone > workLimitShadow)) {
+                            return;
+                        }
+                    }
+                    ftab[sb] = ftab_sb | SETMASK;
+                }
+            }
+
+            // Step 2:
+            // LBZ2: Now scan this big bucket so as to synthesise the
+            // sorted order for small buckets [t, ss] for all t != ss.
+
+            for (int j = 0; j <= 255; j++) {
+                copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
+            }
+
+            for (int j = ftab[ss << 8] & CLEARMASK, hj = (ftab[(ss + 1) << 8] & CLEARMASK); j < hj; j++) {
+                final int fmap_j = fmap[j];
+                c1 = block[fmap_j] & 0xff;
+                if (!bigDone[c1]) {
+                    fmap[copy[c1]] = (fmap_j == 0) ? lastShadow : (fmap_j - 1);
+                    copy[c1]++;
+                }
+            }
+
+            for (int j = 256; --j >= 0;) {
+                ftab[(j << 8) + ss] |= SETMASK;
+            }
+
+            // Step 3:
+            /*
+             * LBZ2: The ss big bucket is now done. Record this fact, and update the
+             * quadrant descriptors. Remember to update quadrants in the
+             * overshoot area too, if necessary. The "if (i < 255)" test merely
+             * skips this updating for the last bucket processed, since updating
+             * for the last bucket is pointless.
+             */
+            bigDone[ss] = true;
+
+            if (i < 255) {
+                final int bbStart = ftab[ss << 8] & CLEARMASK;
+                final int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
+                int shifts = 0;
+
+                while ((bbSize >> shifts) > 65534) {
+                    shifts++;
+                }
+
+                for (int j = 0; j < bbSize; j++) {
+                    final int a2update = fmap[bbStart + j];
+                    final char qVal = (char) (j >> shifts);
+                    quadrant[a2update] = qVal;
+                    if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES) {
+                        quadrant[a2update + lastShadow + 1] = qVal;
+                    }
+                }
+            }
+
+        }
+    }
+
+}

Added: branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java
===================================================================
--- branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java	                        (rev 0)
+++ branches/libcommons-compress-java/squeeze/src/main/java/org/apache/commons/compress/compressors/bzip2/BlockSortTest.java	2012-07-21 20:12:44 UTC (rev 16252)
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.commons.compress.compressors.bzip2;
+
+import org.junit.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+public class BlockSortTest {
+
+    private static final byte[] FIXTURE = { 0, 1, (byte) 252, (byte) 253, (byte) 255,
+                                            (byte) 254, 3, 2, (byte) 128 };
+
+    /*
+      Burrows-Wheeler transform of fixture the manual way:
+
+      * build the matrix
+
+      0, 1, 252, 253, 255, 254, 3, 2, 128
+      1, 252, 253, 255, 254, 3, 2, 128, 0
+      252, 253, 255, 254, 3, 2, 128, 0, 1
+      253, 255, 254, 3, 2, 128, 0, 1, 252
+      255, 254, 3, 2, 128, 0, 1, 252, 253
+      254, 3, 2, 128, 0, 1, 252, 253, 255
+      3, 2, 128, 0, 1, 252, 253, 255, 254
+      2, 128, 0, 1, 252, 253, 255, 254, 3
+      128, 0, 1, 252, 253, 255, 254, 3, 2
+
+      * sort it
+
+      0, 1, 252, 253, 255, 254, 3, 2, 128
+      1, 252, 253, 255, 254, 3, 2, 128, 0
+      2, 128, 0, 1, 252, 253, 255, 254, 3
+      3, 2, 128, 0, 1, 252, 253, 255, 254
+      128, 0, 1, 252, 253, 255, 254, 3, 2
+      252, 253, 255, 254, 3, 2, 128, 0, 1
+      253, 255, 254, 3, 2, 128, 0, 1, 252
+      254, 3, 2, 128, 0, 1, 252, 253, 255
+      255, 254, 3, 2, 128, 0, 1, 252, 253
+
+      * grab last column
+
+      128, 0, 3, 254, 2, 1, 252, 255, 253
+
+        and the original line has been 0
+    */
+
+    private static final byte[] FIXTURE_BWT = { (byte) 128, 0, 3, (byte) 254, 2, 1, 
+                                                (byte) 252, (byte) 255, (byte) 253 };
+
+    private static final int[] FIXTURE_SORTED = {
+        0, 1, 7, 6, 8, 2, 3, 5, 4
+    };
+
+    private static final byte[] FIXTURE2 = { 
+        'C', 'o', 'm', 'm', 'o', 'n', 's', ' ', 'C', 'o', 'm', 'p', 'r', 'e', 's', 's', 
+    };
+
+    private static final byte[] FIXTURE2_BWT = {
+        's', 's', ' ', 'r', 'o', 'm', 'o', 'o', 'C', 'C', 'm', 'm', 'p', 'n', 's', 'e', 
+    };
+
+    @Test
+    public void testSortFixture() {
+        DS ds = setUpFixture();
+        ds.s.blockSort(ds.data, FIXTURE.length - 1);
+        assertFixtureSorted(ds.data);
+        assertEquals(0, ds.data.origPtr);
+    }
+
+    @Test
+    public void testSortFixtureMainSort() {
+        DS ds = setUpFixture();
+        ds.s.mainSort(ds.data, FIXTURE.length - 1);
+        assertFixtureSorted(ds.data);
+    }
+
+    @Test
+    public void testSortFixtureFallbackSort() {
+        DS ds = setUpFixture();
+        ds.s.fallbackSort(ds.data, FIXTURE.length - 1);
+        assertFixtureSorted(ds.data);
+    }
+
+    @Test
+    public void testSortFixture2() {
+        DS ds = setUpFixture2();
+        ds.s.blockSort(ds.data, FIXTURE2.length - 1);
+        assertFixture2Sorted(ds.data);
+        assertEquals(1, ds.data.origPtr);
+    }
+
+    @Test
+    public void testSortFixture2MainSort() {
+        DS ds = setUpFixture2();
+        ds.s.mainSort(ds.data, FIXTURE2.length - 1);
+        assertFixture2Sorted(ds.data);
+    }
+
+    @Test
+    public void testSortFixture2FallbackSort() {
+        DS ds = setUpFixture2();
+        ds.s.fallbackSort(ds.data, FIXTURE2.length - 1);
+        assertFixture2Sorted(ds.data);
+    }
+
+    @Test
+    public void testFallbackSort() {
+        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
+        BlockSort s = new BlockSort(data);
+        int[] fmap = new int[FIXTURE.length];
+        s.fallbackSort(fmap, FIXTURE, FIXTURE.length);
+        assertArrayEquals(FIXTURE_SORTED, fmap);
+    }
+
+    private DS setUpFixture() {
+        return setUpFixture(FIXTURE);
+    }
+
+    private void assertFixtureSorted(BZip2CompressorOutputStream.Data data) {
+        assertFixtureSorted(data, FIXTURE, FIXTURE_BWT);
+    }
+
+    private DS setUpFixture2() {
+        return setUpFixture(FIXTURE2);
+    }
+
+    private void assertFixture2Sorted(BZip2CompressorOutputStream.Data data) {
+        assertFixtureSorted(data, FIXTURE2, FIXTURE2_BWT);
+    }
+
+    private DS setUpFixture(byte[] fixture) {
+        BZip2CompressorOutputStream.Data data = new BZip2CompressorOutputStream.Data(1);
+        System.arraycopy(fixture, 0, data.block, 1, fixture.length);
+        return new DS(data, new BlockSort(data));
+    }
+
+    private void assertFixtureSorted(BZip2CompressorOutputStream.Data data,
+                                     byte[] fixture, byte[] fixtureBwt) {
+        assertEquals(fixture[fixture.length - 1], data.block[0]);
+        for (int i = 0; i < fixture.length; i++) {
+            assertEquals(fixtureBwt[i], data.block[data.fmap[i]]);
+        }
+    }
+
+    private static class DS {
+        private final BZip2CompressorOutputStream.Data data;
+        private final BlockSort s;
+        DS(BZip2CompressorOutputStream.Data data, BlockSort s) {
+            this.data = data;
+            this.s = s;
+        }
+    }
+}
\ No newline at end of file