[med-svn] [Git][med-team/beagle][master] 7 commits: Fix clean target Closes: #1043854

Andreas Tille (@tille) gitlab at salsa.debian.org
Mon Dec 16 15:23:46 GMT 2024



Andreas Tille pushed to branch master at Debian Med / beagle


Commits:
4c36d9b1 by Andreas Tille at 2024-12-16T15:52:35+01:00
Fix clean target Closes: #1043854

- - - - -
51b7c5c9 by Andreas Tille at 2024-12-16T15:53:55+01:00
New upstream version 241029
- - - - -
d8130812 by Andreas Tille at 2024-12-16T15:53:55+01:00
New upstream version

- - - - -
8149b6c5 by Andreas Tille at 2024-12-16T15:53:56+01:00
Update upstream source from tag 'upstream/241029'

Update to upstream version '241029'
with Debian dir 6ff5a3201ff432ad85e6def4213edd4547fa9dcc
- - - - -
7e09db76 by Andreas Tille at 2024-12-16T15:53:56+01:00
Standards-Version: 4.7.0 (routine-update)

- - - - -
61f30c04 by Andreas Tille at 2024-12-16T16:19:27+01:00
Fix lintian-overrides

- - - - -
914312b3 by Andreas Tille at 2024-12-16T16:21:47+01:00
Upload to unstable

- - - - -


25 changed files:

- beagleutil/SampleIds.java
- + blbutil/BGZipIt.java
- blbutil/InputIt.java
- blbutil/SampleFileIt.java
- + blbutil/VcfFileIt.java
- bref/Bref3.java
- bref/Bref3Reader.java
- bref/UnBref3.java
- debian/beagle-doc.lintian-overrides
- debian/changelog
- debian/clean
- debian/control
- main/Main.java
- main/Pedigree.java
- main/WindowWriter.java
- phase/FixedPhaseData.java
- phase/PhaseData.java
- phase/Stage2Baum.java
- vcf/MarkerMap.java
- vcf/RefTargSlidingWindow.java
- vcf/Samples.java
- vcf/TargSlidingWindow.java
- vcf/VcfHeader.java
- vcf/VcfIt.java
- vcf/VcfWriter.java


Changes:

=====================================
beagleutil/SampleIds.java
=====================================
@@ -19,6 +19,7 @@
 package beagleutil;
 
 import java.util.Arrays;
+import vcf.Samples;
 
 /**
  * <p>Class {@code SampleIds} is a singleton class that represents a
@@ -87,6 +88,33 @@ public final class SampleIds {
         return indexer.getIndices(ids);
     }
 
+    /**
+     * Returns an array mapping sample identifier indices to sample indices.
+     * Indices for sample identifiers not present in this list of samples
+     * are mapped to {@code -1}.
+     * @param samples a list of sample identifiers
+     * @return an array mapping sample identifier indices to sample indices
+     * @throws IllegalArgumentException if two sample identifiers are the
+     * same string
+     * @throws NullPointerException if {@code samples == null}
+     */
+    public int[] idIndexToIndex(Samples samples) {
+        String[] ids = samples.ids();
+        int[] idIndex = new int[ids.length];
+        for (int j=0; j<ids.length; ++j) {
+            idIndex[j] = sampleIds.getIndex(ids[j]);
+        }
+        int[] idIndexToIndex = new int[sampleIds.size()];
+        Arrays.fill(idIndexToIndex, -1);
+        for (int j=0; j<ids.length; ++j) {
+            if (idIndexToIndex[idIndex[j]] != -1) {
+                throw new IllegalArgumentException("Dupicate sample: " + ids[j]);
+            }
+            idIndexToIndex[idIndex[j]] = j;
+        }
+        return idIndexToIndex;
+     }
+
     /**
      * Returns the index of the specified sampled identifier, or returns
      * {@code -1} if the specified sample identifier is not indexed.


=====================================
blbutil/BGZipIt.java
=====================================
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2014-2021 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import ints.IntList;
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * <p>Class {@code BGZipIt} is a {@code blbutil.FileIt<String>} whose
+ * {@code next()} method returns lines of a bgzip-compressed file.
+ * </p>
+ * <p>The GZIP file format specification is described
+ * <a href="https://www.ietf.org/rfc/rfc1952.txt">RFC 1952</a>
+ * and the BGZIP file format specification is described in the
+ * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">
+ * Sequence Alignment/Map Format Specification</a>
+ * </p>
+ * <p>Instances of class {@code BGZipIt} are not thread safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BGZipIt implements FileIt<String> {
+
+    private static final byte CR = 0x0D;
+    private static final byte LF = 0x0A;
+    private static final byte[] EOF = new byte[0];
+
+    private static final byte GZIP_ID1 = 31;
+    private static final byte GZIP_ID2 = (byte) 139;
+    private static final byte GZIP_CM = 8;
+    private static final byte GZIP_FLG = (1 << 2); // only required set bit
+    private static final byte GZIP_XLEN1 = 6;
+    private static final byte GZIP_XLEN2 = 0;
+    private static final byte BGZIP_SI1 = 66;
+    private static final byte BGZIP_SI2 = 67;
+    private static final byte BGZIP_SLEN1 = 2;
+    private static final byte BGZIP_SLEN2 = 0;
+
+    private final InputStream is;
+    private final File source;
+    private final int nBufferedBlocks;
+    private final ArrayDeque<String> lines;
+    private byte[] leftOverBytes;
+
+    /**
+     * Constructs a new {@code BGZipIt} instance from the specified data
+     * @param is an input stream that reads from a gzip-compressed
+     * VCF file
+     * @param nBufferedBlocks the number of buffered gzip blocks
+     * @throws IllegalArgumentException if {@code nBufferedBlocks < 1}
+     * @throws NullPointerException if {@code is == null}
+     */
+    public BGZipIt(InputStream is, int nBufferedBlocks) {
+        this(is, nBufferedBlocks, null);
+    }
+
+    /**
+     * Constructs a new {@code BGZipIt} instance from the specified data
+     * @param is an input stream that reads gzip-compressed
+     * VCF data
+     * @param nBufferedBlocks the number of buffered gzip blocks
+     * @param source the gzip-compressed VCF file that is read
+     * @throws IllegalArgumentException if {@code nBufferedBlocks < 1}
+     * @throws NullPointerException if {@code is == null}
+     */
+    public BGZipIt(InputStream is, int nBufferedBlocks, File source) {
+        if (nBufferedBlocks < 1) {
+            throw new IllegalArgumentException(String.valueOf(nBufferedBlocks));
+        }
+        this.is = is;
+        this.source = source;
+        this.nBufferedBlocks = nBufferedBlocks;
+        this.leftOverBytes = new byte[0];
+        this.lines = new ArrayDeque<>();
+        fillBuffer();
+    }
+
+    @Override
+    public void close() {
+        try {
+            is.close();
+        } catch (IOException ex) {
+            Utilities.exit(ex);
+        }
+    }
+
+    /**
+     * Returns {@code true} if the iteration has more elements.
+     * @return {@code true} if the iteration has more elements
+     */
+    @Override
+    public boolean hasNext() {
+        return lines.isEmpty()==false;
+    }
+
+    /**
+     * Returns the next line of the VCF file. End of line characters are
+     * not included in the returned line.
+     * @return the next line of the VCF file
+     * @throws NoSuchElementException if the VCF file has no more lines
+     */
+    @Override
+    public String next() {
+        String s = lines.remove();
+        if (lines.isEmpty()) {
+            fillBuffer();
+        }
+        return s;
+    }
+
+    @Override
+    public File file() {
+        return source;
+    }
+
+    private void fillBuffer() {
+        byte[][] blocks = readAndInflateBlocks(is, leftOverBytes, nBufferedBlocks);
+        if (blocks.length>0) {
+            int[] eolIndices = IntStream.range(0, blocks.length)
+                    .parallel()
+                    .flatMap(j -> eolIndices(j, blocks[j]))
+                    .toArray();
+            leftOverBytes = leftOverBytes(blocks, eolIndices);
+            addToLines(blocks, eolIndices, lines);
+            if (lines.isEmpty() && leftOverBytes.length>0) {
+                fillBuffer();
+            }
+        }
+    }
+
+    private static IntStream eolIndices(int block, byte[] bytes) {
+        IntList il = new IntList();
+        for (int b=0; b<bytes.length; ++b) {
+            if (bytes[b]==LF) {
+                il.add(block);
+                il.add(b);
+            }
+        }
+        return il.stream();
+    }
+
+    private static byte[] leftOverBytes(byte[][] blocks, int[] eolIndices) {
+        if (blocks.length==0) {
+            return new byte[0];
+        }
+        else {
+            int lastBlock = blocks.length-1;
+            int endIndex = blocks[lastBlock].length;
+            if (eolIndices.length==0) {
+                return merge(blocks, 0, 0, lastBlock, endIndex);
+            }
+            else {
+                int startBlock = eolIndices[eolIndices.length-2];
+                int startIndex = eolIndices[eolIndices.length-1] + 1;
+                return merge(blocks, startBlock, startIndex, lastBlock, endIndex);
+            }
+        }
+    }
+
+    private static void addToLines(byte[][] blocks, int[] eolIndices,
+            ArrayDeque<String> lines) {
+        List<String> tmpList = IntStream.range(0, eolIndices.length)
+                .parallel()
+                .filter(j -> (j & 0b1)==0)
+                .mapToObj(j -> toString(blocks, eolIndices, j))
+                .collect(Collectors.toList());
+        lines.addAll(tmpList);
+    }
+
+    private static String toString(byte[][] blocks, int[] eolIndices, int index) {
+        int block = eolIndices[index];
+        int endIndex = eolIndices[index + 1];
+        byte[] merged;
+        if (index==0) {
+            merged = merge(blocks, 0, 0, block, endIndex);
+        }
+        else {
+            assert index>=2;
+            int startBlock = eolIndices[index-2];
+            int startIndex = eolIndices[index-1] + 1;
+            merged = merge(blocks, startBlock, startIndex, block, endIndex);
+        }
+        int lengthM1 = merged.length-1;
+        if (lengthM1>=0 && merged[lengthM1]==CR) {
+            // Correct for CR LF line ending on Windows systems
+            return new String(merged, 0, lengthM1, StandardCharsets.UTF_8);
+        }
+        else {
+            return new String(merged, StandardCharsets.UTF_8);
+        }
+    }
+
+    private static byte[] merge(byte[][] blocks, int startBlock, int startIndex,
+            int lastBlock, int endIndex) {
+        // merge correctly handles startIndex == blocks[startBlock].length
+        if (lastBlock==startBlock) {
+            return Arrays.copyOfRange(blocks[startBlock], startIndex, endIndex);
+        }
+        else {
+            int size = 0;
+            for (int j=startBlock; j<lastBlock; ++j) {
+                size += blocks[j].length;
+            }
+            size -= startIndex;
+            size += endIndex;
+            byte[] merged = new byte[size];
+            int len = (blocks[startBlock].length - startIndex);
+            System.arraycopy(blocks[startBlock], startIndex, merged, 0, len);
+            for (int j=(startBlock + 1); j<lastBlock; ++j) {
+                System.arraycopy(blocks[j], 0, merged, len, blocks[j].length);
+                len += blocks[j].length;
+            }
+            System.arraycopy(blocks[lastBlock], 0, merged, len, endIndex);
+            assert merged.length == (len + endIndex);
+            return merged;
+        }
+    }
+
+    private static byte[][] readAndInflateBlocks(InputStream is, byte[] initialBytes, int nBlocks) {
+        ArrayList<byte[]> compressedBlocks = new ArrayList<>(nBlocks);
+        for (int j=0; j<nBlocks; ++j) {
+            byte[] ba = readCompressedBlock(is);
+            if (ba.length>0) {
+                compressedBlocks.add(ba);
+            }
+            else if (ba==EOF) {
+                break;
+            }
+        }
+        byte[][] blocks = compressedBlocks.stream()
+                .parallel()
+                .map(ba -> inflateBlock(ba))
+                .toArray(byte[][]::new);
+        if (blocks.length>0 && initialBytes.length>0) {
+            int newLength = initialBytes.length + blocks[0].length;
+            byte[] prependedBlock = Arrays.copyOf(initialBytes, newLength);
+            System.arraycopy(blocks[0], 0, prependedBlock, initialBytes.length,
+                    blocks[0].length);
+            blocks[0] = prependedBlock;
+        }
+        return blocks;
+    }
+
+    private static byte[] readCompressedBlock(InputStream is) {
+        byte[] ba = new byte[18];
+        try {
+            int bytesRead = 0;
+            int offset = 0;
+            while (offset<ba.length
+                    && (bytesRead = is.read(ba, offset, ba.length - offset)) != -1) {
+                offset += bytesRead;
+            }
+            if (offset==0) {
+                return EOF;
+            }
+            if (offset==ba.length && isStartOfBgzipBlock(ba)) {
+                int blockSize = ((ba[16] & 0xff) | ((ba[17] & 0xff) << 8)) + 1;
+                ba = Arrays.copyOf(ba, blockSize);
+                while (offset<ba.length
+                        && (bytesRead = is.read(ba, offset, ba.length - offset)) != -1) {
+                    offset += bytesRead;
+                }
+                if (offset < ba.length) {
+                    Utilities.exit("Premature end of BGZIP block");
+                }
+            }
+            else {
+                Utilities.exit("Invalid BGZIP block header");
+            }
+        }
+        catch (IOException e) {
+            Utilities.exit(e);
+        }
+        return ba;
+    }
+
+    private static byte[] inflateBlock(byte[] ba) {
+        ByteArrayOutputStream os = new ByteArrayOutputStream(ba.length);
+        byte[] buffer = new byte[1<<13];
+        try (ByteArrayInputStream bais = new ByteArrayInputStream(ba);
+                GZIPInputStream gzis = new GZIPInputStream(bais)) {
+            int bytesRead;
+            while ((bytesRead = gzis.read(buffer)) != -1) {
+                os.write(buffer, 0, bytesRead);
+            }
+        }
+        catch (IOException e) {
+            Utilities.exit(e);
+        }
+        return os.toByteArray();
+    }
+
+    /**
+     * Returns {@code true} if the first 16 bytes of the specified input stream
+     * are a gzip header that includes a 6 byte extra field containing
+     * the block size as described in the bgzip specification, and returns
+     * {@code false} otherwise. The method sets a mark before reading
+     * the initial bytes from the stream, and resets the stream to the
+     * mark position before returning.
+     * @param bis a buffered input stream
+     * @return {@code true} if the first 16 bytes of the specified input stream
+     * are a gzip header that includes a 6 byte extra field containing
+     * the block size as described in the bgzip specification
+     */
+    public static boolean beginsWithBgzipBlock(BufferedInputStream bis) {
+        assert bis.markSupported();
+        int maxBytes = 16;
+        int bytesRead = 0;
+        int offset = 0;
+        byte[] ba = new byte[maxBytes];
+        bis.mark(maxBytes);
+        try {
+            while (offset<ba.length
+                    && (bytesRead = bis.read(ba, offset, ba.length - offset)) != -1) {
+                offset += bytesRead;
+            }
+            bis.reset();
+        }
+        catch(IOException ex) {
+            Utilities.exit(ex);
+        }
+        return offset==ba.length && isStartOfBgzipBlock(ba);
+    }
+
+    private static boolean isStartOfBgzipBlock(byte[] buffer) {
+    // isStartOfBgzipBlock() returns false if additional non-bgzip
+    // subfields are present
+        return (buffer.length >= 16
+                && buffer[0] == GZIP_ID1)
+                && (buffer[1] == GZIP_ID2)
+                && (buffer[2] == GZIP_CM)
+                && ((buffer[3] & GZIP_FLG)!=0)
+                && (buffer[10] == GZIP_XLEN1)
+                && (buffer[11] == GZIP_XLEN2)
+                && (buffer[12] == BGZIP_SI1)
+                && (buffer[13] == BGZIP_SI2)
+                && (buffer[14] == BGZIP_SLEN1)
+                && (buffer[15] == BGZIP_SLEN2);
+    }
+}


=====================================
blbutil/InputIt.java
=====================================
@@ -18,6 +18,7 @@
  */
 package blbutil;
 
+import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileInputStream;
@@ -37,7 +38,15 @@ import java.util.zip.GZIPInputStream;
  * is trapped, an error message is written to standard out, and the
  * Java Virtual Machine is terminated.
  * </p>
- * Instances of class {@code InputIt} are not thread-safe.
+ * <p>The GZIP file format specification is described
+ * <a href="https://www.ietf.org/rfc/rfc1952.txt">RFC 1952</a>
+ * and the BGZIP file format specification is described in the
+ * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">
+ * Sequence Alignment/Map Format Specification</a>
+ * </p>
+ *
+ * <p>Instances of class {@code InputIt} are not thread-safe.
+ * </p>
  *
  * @author Brian L. Browning {@code <browning at uw.edu>}
  */
@@ -52,7 +61,7 @@ public class InputIt implements FileIt<String> {
      * size that will iterate through lines of the specified input stream.
      *
      * @param is input stream of text data
-     *
+     * @param file the file that is the source of the input stream
      */
     private InputIt(InputStream is, File file) {
         BufferedReader br = null;
@@ -73,6 +82,7 @@ public class InputIt implements FileIt<String> {
      * that will iterate through the lines of the specified input stream.
      *
      * @param is input stream of text data
+     * @param file the file that is the source of the input stream
      * @param bufferSize the buffer size in bytes
      *
      * @throws IllegalArgumentException if {@code bufferSize < 0}
@@ -175,38 +185,30 @@ public class InputIt implements FileIt<String> {
     }
 
     /**
-     * Constructs and returns an {@code InputIt} instance with the specified
-     * buffer size that iterates through lines of text read from standard input.
-     *
-     * @param bufferSize the buffer size in bytes
-     *
-     * @return a new {@code InputIt} instance that iterates
-     * through lines of text read from standard input
-     *
-     * @throws IllegalArgumentException if {@code bufferSize < 0}
-     */
-    public static InputIt fromStdIn(int bufferSize) {
-        File file = null;
-        return new InputIt(System.in, file, bufferSize);
-    }
-
-    /**
-     * Constructs and returns an {@code InputIt} instance with the default
-     * buffer size that iterates through lines of the specified compressed
-     * or uncompressed text file. If the filename ends in ".gz", the file
-     * must be either BGZIP-compressed or GZIP-compressed.
-     *
+     * Constructs and returns a buffered {@code FileIt<String>} instance
+     * that iterates through lines of the specified compressed or
+     * uncompressed text file. If the filename ends in ".gz" or ".bgz", the
+     * file must be GZIP-compressed.
      * @param file a compressed or uncompressed text file
-     * @return  a new {@code InputIt} instance that iterates
-     * through lines of the specified text file
+     * @param nBufferedBlocks the number buffered GZIP blocks if the
+     * specified file is bgzip-compressed
+     * @return {@code FileIt<String>} instance that iterates through the
+     * lines of the specified file
      *
      * @throws NullPointerException if {@code file == null}
      */
-    public static InputIt fromGzipFile(File file) {
+    public static FileIt<String> fromBGZipFile(File file, int nBufferedBlocks) {
+        String filename = file.getName();
         try {
             InputStream is = new FileInputStream(file);
-            if (file.getName().endsWith(".gz")) {
-                return new InputIt(new GZIPInputStream(is), file);
+            BufferedInputStream bis = new BufferedInputStream(is);
+            if (filename.endsWith(".gz") || filename.endsWith(".bgz")) {
+                if (BGZipIt.beginsWithBgzipBlock(bis)) {
+                    return new BGZipIt(bis, nBufferedBlocks, file);
+                }
+                else {
+                    return new InputIt(new GZIPInputStream(bis), file);
+                }
             }
             else {
                 return new InputIt(is, file);
@@ -223,24 +225,22 @@ public class InputIt implements FileIt<String> {
     }
 
     /**
-     * Constructs and returns an {@code InputIt} instance with the specified
-     * buffer size that iterates through lines of the specified compressed
-     * or uncompressed text file. If the filename ends in ".gz", the file must
-     * be either BGZIP-compressed or GZIP-compressed.
+     * Constructs and returns a buffered {@code InputIt} instance that
+     * iterates through lines of the specified compressed or uncompressed
+     * text file. If the filename ends in ".gz", the file must be
+     * tGZIP-compressed.
      *
      * @param file a compressed or uncompressed text file
-     * @param bufferSize the buffer size in bytes
-     * @return  a new {@code InputIt} instance that iterates
+     * @return  a buffered {@code InputIt} instance that iterates
      * through lines of the specified text file
-     *
-     * @throws IllegalArgumentException if {@code bufferSize < 0}
      * @throws NullPointerException if {@code file == null}
      */
-    public static InputIt fromGzipFile(File file, int bufferSize) {
+    public static InputIt fromGzipFile(File file) {
+        String filename = file.getName();
         try {
             InputStream is = new FileInputStream(file);
-            if (file.getName().endsWith(".gz")) {
-                return new InputIt(new GZIPInputStream(is), file, bufferSize);
+            if (filename.endsWith(".gz") || filename.endsWith(".bgz")) {
+                return new InputIt(new GZIPInputStream(is), file);
             }
             else {
                 return new InputIt(is, file);
@@ -256,14 +256,13 @@ public class InputIt implements FileIt<String> {
         return null;
     }
 
-     /**
-     * Constructs and returns an {@code InputIt} instance with the default
-     * buffer size that iterates through lines of the specified text file.
+    /**
+     * Constructs and returns a buffered {@code InputIt} instance
+     * that iterates through lines of the specified text file.
      *
      * @param file a text file
-     * @return a new {@code InputIt} instance that iterates through
+     * @return a buffered {@code InputIt} instance that iterates through
      * lines of the specified text file
-     *
      * @throws NullPointerException if {@code filename == null}
      */
     public static InputIt fromTextFile(File file) {
@@ -276,27 +275,4 @@ public class InputIt implements FileIt<String> {
         assert false;
         return null;
     }
-
-     /**
-     * Constructs and returns an {@code InputIt} instance with the specified
-     * buffer size that iterates through lines of the specified text file.
-     *
-     * @param file a text file
-     * @param bufferSize the buffer size in bytes
-     * @return a new {@code InputIt} instance that iterates through
-     * lines of the specified text file
-     *
-     * @throws IllegalArgumentException if {@code bufferSize < 0}
-     * @throws NullPointerException if {@code filename == null}
-     */
-    public static InputIt fromTextFile(File file, int bufferSize) {
-        try {
-            return new InputIt(new FileInputStream(file), file, bufferSize);
-        }
-        catch(FileNotFoundException e) {
-            Utilities.exit(e, "Error opening " + file);
-        }
-        assert false;
-        return null;
-    }
 }


=====================================
blbutil/SampleFileIt.java
=====================================
@@ -21,7 +21,7 @@ package blbutil;
 import vcf.Samples;
 
 /**
- * <p>An iterator for records in a file.  Each records contains
+ * <p>An iterator for records in a file.  Each record contains
  * data for the same set of samples.
  *</p>
  * Instances of class {@code SampleFileIt} are not thread-safe.


=====================================
blbutil/VcfFileIt.java
=====================================
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014-2021 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import vcf.VcfHeader;
+
+/**
+ * <p>An iterator for records in a VCF file.  Each record contains
+ * data for the same set of samples.
+ *</p>
+ * Instances of class {@code VcfFileIt} are not thread-safe.
+ *
+ * @param <E> the type of the elements returned by this iterator's
+ * {@code next()} method.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface VcfFileIt<E> extends SampleFileIt<E> {
+
+    /**
+     * Returns the VCF meta-information lines and header line
+     * @return the VCF meta-information lines and header line
+     */
+    VcfHeader vcfHeader();
+}


=====================================
bref/Bref3.java
=====================================
@@ -19,7 +19,6 @@
 package bref;
 
 import blbutil.Const;
-import blbutil.FileIt;
 import blbutil.InputIt;
 import blbutil.SampleFileIt;
 import blbutil.Utilities;
@@ -29,7 +28,7 @@ import vcf.RefIt;
 import vcf.Samples;
 
 /**
- * <p>Class {@code Bref3} converts files in VCF format into 
+ * <p>Class {@code Bref3} converts files in VCF format into
  * bref version 3 format.
  * </p>
  * <p>Instances of class {@code Bref3} are not thread-safe.</p>
@@ -38,7 +37,7 @@ import vcf.Samples;
  */
 public class Bref3 {
 
-    private static final String program = "bref3.22Jul22.46e.jar";
+    private static final String PROGRAM = "bref3.29Oct24.c8e.jar";
 
     /**
      * The {@code main()} method is the entry point to the bref program.
@@ -105,26 +104,26 @@ public class Bref3 {
     }
 
     private static SampleFileIt<RefGTRec> refIt(String fileName) {
-        FileIt<String> it = null;
         if (fileName==null) {
-            it = InputIt.fromStdIn();
-        }
-        else if (fileName.endsWith(".gz")) {
-            it = InputIt.fromGzipFile(new File(fileName));
+            return RefIt.create(InputIt.fromStdIn());
         }
         else {
-            it = InputIt.fromTextFile(new File(fileName));
+            File file = new File(fileName);
+            int nCores = Runtime.getRuntime().availableProcessors();
+            System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism",
+                    String.valueOf(nCores));
+            int nBufferedBlocks = nCores << 2;
+            return RefIt.create(InputIt.fromBGZipFile(file, nBufferedBlocks));
         }
-        return RefIt.create(it);
     }
 
     private static BrefWriter brefOut(Samples samples, int maxNSeq) {
         File outFile = null;    // write to standard output
         if (maxNSeq<0) {
-            return new AsIsBref3Writer(program, samples, outFile);
+            return new AsIsBref3Writer(PROGRAM, samples, outFile);
         }
         else {
-            return new CompressBref3Writer(program, samples, maxNSeq, outFile);
+            return new CompressBref3Writer(PROGRAM, samples, maxNSeq, outFile);
         }
     }
 
@@ -133,24 +132,24 @@ public class Bref3 {
         sb.append("usage:");
         sb.append(Const.nl);
         sb.append("  java -jar ");
-        sb.append(program);
+        sb.append(PROGRAM);
         sb.append(" help");
         sb.append(Const.nl);
         sb.append(Const.nl);
         sb.append("  java -jar ");
-        sb.append(program);
+        sb.append(PROGRAM);
         sb.append(" [vcf] <nseq>  > [bref3]");
         sb.append(Const.nl);
         sb.append(Const.nl);
         sb.append("  cat   [vcf]   | java -jar ");
-        sb.append(program);
+        sb.append(PROGRAM);
         sb.append(" <nseq>  > [bref3]");
         sb.append(Const.nl);
         sb.append(Const.nl);
         sb.append("where");
         sb.append(Const.nl);
         sb.append("  [bref3]  = the output bref3 file");
-        sb.append(Const.nl);        
+        sb.append(Const.nl);
         sb.append("  [vcf]    = A VCF file with phased, non-missing genotype data.  If the");
         sb.append(Const.nl);
         sb.append("             file is gzip-compressed, its filename must end in \".gz\"");


=====================================
bref/Bref3Reader.java
=====================================
@@ -85,7 +85,7 @@ public final class Bref3Reader {
         boolean[] isDiploid = new boolean[sampleIds.length];
         Arrays.fill(isDiploid, true);
         this.program = programString;
-        this.samples = Samples.fromIds(sampleIds, isDiploid);
+        this.samples = new Samples(sampleIds, isDiploid);
         this.markerFilter = markerFilter;
         this.nHaps = 2*samples.size();
         this.byteBuffer = new byte[2*nHaps];


=====================================
bref/UnBref3.java
=====================================
@@ -37,7 +37,7 @@ import vcf.VcfWriter;
  */
 public class UnBref3 {
 
-    private static final String program = "unbref3.22Jul22.46e.jar";
+    private static final String program = "unbref3.29Oct24.c8e.jar";
 
     /**
      * The {@code main()} method is the entry point to the bref program.


=====================================
debian/beagle-doc.lintian-overrides
=====================================
@@ -1,3 +1,4 @@
 # jQuery from libjs-jquery is not compatible
-beagle-doc: embedded-javascript-library * please use libjs-jquery
-beagle-doc: embedded-javascript-library * please use libjs-jquery-ui
+beagle-doc: embedded-javascript-library please use libjs-jquery [usr/share/doc/beagle/api/script-dir/jquery-3.7.1.min.js]
+beagle-doc: embedded-javascript-library please use libjs-jquery-ui [usr/share/doc/beagle/api/script-dir/jquery-ui.min.css]
+beagle-doc: embedded-javascript-library please use libjs-jquery-ui [usr/share/doc/beagle/api/script-dir/jquery-ui.min.js]


=====================================
debian/changelog
=====================================
@@ -1,3 +1,14 @@
+beagle (241029-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Fix clean target
+    Closes: #1043854
+  * Standards-Version: 4.7.0 (routine-update)
+  * Fix lintian-overrides
+
+ -- Andreas Tille <tille at debian.org>  Mon, 16 Dec 2024 16:19:36 +0100
+
 beagle (220722-1) unstable; urgency=medium
 
   * Team upload.


=====================================
debian/clean
=====================================
@@ -1 +1,2 @@
 SRC/
+b*.jar


=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13),
                javahelper
 Build-Depends-Indep: default-jdk,
                      libhtsjdk-java
-Standards-Version: 4.6.1
+Standards-Version: 4.7.0
 Vcs-Browser: https://salsa.debian.org/med-team/beagle
 Vcs-Git: https://salsa.debian.org/med-team/beagle.git
 Homepage: https://faculty.washington.edu/browning/beagle/beagle.html


=====================================
main/Main.java
=====================================
@@ -58,12 +58,12 @@ public class Main {
     /**
      * The program name and commit version.
      */
-    public static final String PROGRAM = "beagle.22Jul22.46e.jar";
+    public static final String PROGRAM = "beagle.29Oct24.c8e.jar";
 
     /**
      * The command to invoke the program.
      */
-    public static final String COMMAND = "java -jar beagle.22Jul22.46e.jar";
+    public static final String COMMAND = "java -jar beagle.29Oct24.c8e.jar";
 
     /**
      * The copyright string.


=====================================
main/Pedigree.java
=====================================
@@ -99,7 +99,7 @@ public class Pedigree {
     private static void readPedFile(Samples samples, File pedFile,
             int[] fathers, int[] mothers, int[][] offspring) {
         if (pedFile != null) {
-            int[] idIndexToIndex = samples.idIndexToIndex();
+            int[] idIndexToIndex = SampleIds.instance().idIndexToIndex(samples);
             boolean[] processed = new boolean[samples.size()];
             IntList[] children = new IntList[samples.size()];
             try (FileIt<String> pedIt=InputIt.fromGzipFile(pedFile)) {


=====================================
main/WindowWriter.java
=====================================
@@ -122,7 +122,7 @@ public class WindowWriter implements Closeable {
      * @throws IllegalArgumentException if
      * {@code stateProbs.size() != impData.nTargHaps()}
      * @throws IndexOutOfBoundsException if
-     * {@code refStart < 0 || refEnd > impData.refGT().nMarkers()}
+     * {@code start < 0 || end > impData.refGT().nMarkers()}
      * @throws NullPointerException if {@code impData==null || stateProbs==null}
      * @throws NullPointerException if any element of {@code stateProbs} is
      * {@code null}


=====================================
phase/FixedPhaseData.java
=====================================
@@ -60,7 +60,7 @@ public class FixedPhaseData {
     private final MarkerMap map;
     private final Steps stage1Steps;
     private final GT targGT;
-    private final Optional<RefGT> refGT;
+    private final Optional<RefGT> restrictedRefGT;
     private final int overlap;
 
     private final MarkerMap stage1Map;
@@ -117,7 +117,7 @@ public class FixedPhaseData {
         this.map = markerMap(window.genMap(), window.targGT().markers());
         this.targGT = phasedOverlap==null ? window.targGT() :
                 new SplicedGT(phasedOverlap, window.targGT());
-        this.refGT = window.restrictRefGT();
+        this.restrictedRefGT = window.restrictRefGT();
         this.overlap = phasedOverlap==null ? 0 : phasedOverlap.nMarkers();
         this.nHaps = nHaps(window);
 
@@ -132,7 +132,7 @@ public class FixedPhaseData {
             this.ibsStep = par.step_scale()*medianDiff(stage1Map.genPos());
             this.stage1Steps = new Steps(stage1Map, ibsStep);
             this.stage1TargGT = targGT;
-            this.stage1RefGT = refGT;
+            this.stage1RefGT = restrictedRefGT;
             this.stage1XRefGT = stage1RefGT.isPresent()
                     ? Optional.of(XRefGT.fromPhasedGT(stage1RefGT.get(), par.nthreads()))
                     : Optional.empty();
@@ -152,7 +152,7 @@ public class FixedPhaseData {
             this.ibsStep = par.step_scale()*medianDiff(stage1Map.genPos());
             this.stage1Steps = new Steps(stage1Map, ibsStep);
             this.stage1TargGT = targGT.restrict(hiFreqMarkers, hiFreqInd);
-            this.stage1RefGT = restrict(refGT, hiFreqMarkers, hiFreqInd);
+            this.stage1RefGT = restrict(restrictedRefGT, hiFreqMarkers, hiFreqInd);
             this.stage1XRefGT = stage1RefGT.isPresent()
                     ? Optional.of(XRefGT.fromPhasedGT(stage1RefGT.get(), par.nthreads()))
                     : Optional.empty();
@@ -370,8 +370,8 @@ public class FixedPhaseData {
      * Returns the optional phased, nonmissing reference genotypes.
      * @return the optional phased, nonmissing reference genotypes
      */
-    public Optional<RefGT> refGT() {
-        return refGT;
+    public Optional<RefGT> restrictedRefGT() {
+        return restrictedRefGT;
     }
 
     /**


=====================================
phase/PhaseData.java
=====================================
@@ -137,11 +137,11 @@ public class PhaseData {
      * Sets the allele mismatch probability to the specified value.
      * @param pMismatch the allele mismatch probability
      * @throws IllegalArgumentException if
-     * {@code pMismatch <= 0.0 || pMismatch >= 1.0
+     * {@code pMismatch < 0.0 || pMismatch > 1.0
      *      || Float.isFinite(pMismatch) == false}
      */
     public void updatePMismatch(float pMismatch) {
-        if (pMismatch <= 0.0 || pMismatch >= 1.0
+        if (pMismatch < 0.0 || pMismatch > 1.0
                 || Float.isFinite(pMismatch)==false) {
             throw new IllegalArgumentException(String.valueOf(pMismatch));
         }


=====================================
phase/Stage2Baum.java
=====================================
@@ -68,7 +68,7 @@ public class Stage2Baum {
         this.probs = new float[2][nStage1Markers][stateProbs.maxStates()];
 
         this.unphTargGT = fpd.targGT();
-        this.refGT = fpd.refGT();
+        this.refGT = fpd.restrictedRefGT();
         this.nTargHaps = fpd.targGT().nHaps();
         this.stage2Haps = stage2Haps;
         this.stage1To2 = fpd.stage1To2();


=====================================
vcf/MarkerMap.java
=====================================
@@ -97,8 +97,10 @@ public class MarkerMap {
             String s = "Window has only one position: CHROM=" + a.chrom() + " POS=" + a.pos();
             throw new IllegalArgumentException(s);
         }
-        return Math.abs(genMap.genPos(b)-genMap.genPos(a))
+        double meanSingleBaseDist = Math.abs(genMap.genPos(b) - genMap.genPos(a))
                 / Math.abs(b.pos()-a.pos());
+        // require meanSingleBaseDist to be >= 0.01 * mean human single base genetic distance
+        return Math.max(meanSingleBaseDist, 1e-8);
     }
 
     private MarkerMap(double[] gPos) {


=====================================
vcf/RefTargSlidingWindow.java
=====================================
@@ -18,6 +18,7 @@
  */
 package vcf;
 
+import beagleutil.ChromIds;
 import blbutil.Const;
 import blbutil.FileIt;
 import blbutil.Filter;
@@ -90,7 +91,8 @@ public class RefTargSlidingWindow implements SlidingWindow {
 
     private static SampleFileIt<GTRec> targIt(Par par, Filter<String> sFilter,
             Filter<Marker> mFilter) {
-        FileIt<String> it = InputIt.fromGzipFile(par.gt());
+        int nBufferedBlocks = par.nthreads() << 3;
+        FileIt<String> it = InputIt.fromBGZipFile(par.gt(), nBufferedBlocks);
         SampleFileIt<GTRec> targIt = VcfIt.create(it, sFilter, mFilter,
                 VcfIt.TO_LOWMEM_GT_REC);
         if (par.chromInt() != null) {
@@ -100,7 +102,7 @@ public class RefTargSlidingWindow implements SlidingWindow {
     }
 
     private static SampleFileIt<RefGTRec> refIt(Par par,
-            Filter<String> sFilter, Filter<Marker> mFilter) {
+            Filter<String> sampleFilter, Filter<Marker> markerFilter) {
         SampleFileIt<RefGTRec> refIt;
         String filename = par.ref().toString();
         if (filename.endsWith(".bref")) {
@@ -109,17 +111,20 @@ public class RefTargSlidingWindow implements SlidingWindow {
             Utilities.exit(s);
         }
         if (filename.endsWith(".bref3")) {
-            refIt = new Bref3It(par.ref(), mFilter);
+            refIt = new Bref3It(par.ref(), markerFilter);
         } else {
             if (filename.endsWith(".vcf") == false
-                    && filename.endsWith(".vcf.gz") == false) {
+                    && filename.endsWith(".vcf.gz") == false
+                    && filename.endsWith(".vcf.bgz") == false) {
                 System.err.println(Const.nl
-                        + "WARNING: unrecognized reference file type "
-                        + "(expected \".bref3\", \".vcf\", or \".vcf.gz\")"
+                        + "ERROR: unrecognized reference filename extension: "
+                        + Const.nl
+                        + "       Expected \".bref3\", \".vcf\", \".vcf.gz\", or \".vcf.bgz\""
                         + Const.nl);
             }
-            FileIt<String> it = InputIt.fromGzipFile(par.ref());
-            refIt = RefIt.create(it, sFilter, mFilter);
+            int nBufferedBlocks = par.nthreads() << 3;
+            FileIt<String> it = InputIt.fromBGZipFile(par.ref(), nBufferedBlocks);
+            refIt = RefIt.create(it, sampleFilter, markerFilter);
         }
         if (par.chromInt() != null) {
             refIt = new IntervalVcfIt<>(refIt, par.chromInt());
@@ -286,7 +291,6 @@ public class RefTargSlidingWindow implements SlidingWindow {
                     nextRefRec = refIt.hasNext() ? refIt.next() : null;
                 }
                 nextTargRec = targIt.hasNext() ? targIt.next() : null;
-
             }
             if (impute) {
                 while (nextRefRec!=null
@@ -297,7 +301,7 @@ public class RefTargSlidingWindow implements SlidingWindow {
                     nextRefRec = refIt.hasNext() ? refIt.next() : null;
                 }
             }
-            return window(refOverlapEnd, windowIndex);
+            return window(refOverlapEnd, windowIndex, chromIndex, endPos);
         }
 
         private void resetLists() {
@@ -312,27 +316,48 @@ public class RefTargSlidingWindow implements SlidingWindow {
             inTargOverlap.clear();
         }
 
-        private Window window(int refOverlapEnd, int windowIndex) {
-            if (refRecs.isEmpty()) {
-                String s = Const.nl
-                        + Const.nl + "Marker window contains no reference markers. Do the reference and"
-                        + Const.nl + "target VCF files contain the same chromosomes in the same order?"
-                        + Const.nl;
-                throw new IllegalArgumentException(s);
+        private Window window(int refOverlapEnd, int windowIndex, int chromIndex,
+                int endPos) {
+            if (targRecs.isEmpty() || refRecs.isEmpty()) {
+                throw new IllegalArgumentException(
+                        emptyWindowErrorMessage(chromIndex, endPos));
             }
             RefGT refGT = new RefGT(refRecs.toArray(new RefGTRec[0]));
             BasicGT targGT = new BasicGT(targRecs.toArray(new GTRec[0]));
             boolean lastWindow = (nextTargRec==null || nextRefRec==null);
-            MarkerIndices markerIndices = markerIndices(refGT, lastWindow, refOverlapEnd);
+            MarkerIndices markerIndices = markerIndices(refGT, lastWindow,
+                    refOverlapEnd, endPos);
             return new Window(genMap, windowIndex, lastWindow,
                     markerIndices, refGT, targGT);
         }
 
+        private String emptyWindowErrorMessage(int chromIndex, int endPos) {
+            assert refRecs.isEmpty() || targRecs.isEmpty();
+            if (refRecs.isEmpty()) {
+                return "The window ending at "
+                        + ChromIds.instance().id(chromIndex) + ":" + endPos
+                        + Const.nl + "contains no reference markers"
+                        + Const.nl + "Do the reference and target VCF files contain the same"
+                        + Const.nl + "chromosomes in the same order?"
+                        + Const.nl;
+            }
+            else {
+                assert targRecs.isEmpty();
+                return "The reference and target VCF files contain no markers in common in the window: "
+                        + Const.nl + ChromIds.instance().id(chromIndex)
+                        + ":" + refRecs.get(0).marker().pos()
+                        + "-" + endPos
+                        + Const.nl + "Do both VCF files share any markers in this window?"
+                        + Const.nl + "Do both VCF files contain the same chromosomes in the same order?"
+                        + Const.nl;
+            }
+        }
+
         private MarkerIndices markerIndices(RefGT refGT, boolean lastWindow,
-                int refOverlapEnd) {
+                int refOverlapEnd, int endPos) {
             boolean chromEnd =  lastWindow
                     || (refRecs.get(0).marker().chromIndex() != nextRefRec.marker().chromIndex());
-            int refOverlapStart = overlapStart(refGT, chromEnd, overlapCM);
+            int refOverlapStart = overlapStart(refGT, chromEnd, overlapCM, endPos);
             boolean[] inTarget = new boolean[inTarg.size()];
             for (int j=0; j<inTarget.length; ++j) {
                 inTarget[j] = inTarg.get(j);
@@ -340,15 +365,16 @@ public class RefTargSlidingWindow implements SlidingWindow {
             return new MarkerIndices(inTarget, refOverlapEnd, refOverlapStart);
         }
 
-        private int overlapStart(RefGT refGT, boolean chromEnd, float overlapCM) {
+        private int overlapStart(RefGT refGT, boolean chromEnd, float overlapCM,
+                int endPos) {
             if (chromEnd) {
                 return refGT.nMarkers();
             } else {
                 int nMarkersM1 = refGT.nMarkers() - 1;
-                Marker lastMarker = refGT.marker(nMarkersM1);
-                double endGenPos = genMap.genPos(lastMarker);
+                int chromIndex = refGT.marker(nMarkersM1).chromIndex();
+                double endGenPos = genMap.genPos(chromIndex, endPos-1);
                 double startGenPos = endGenPos - overlapCM;
-                int key = genMap.basePos(lastMarker.chromIndex(), startGenPos);
+                int key = genMap.basePos(chromIndex, startGenPos);
                 int low = 0;
                 int high = nMarkersM1;
                 while (low <= high) {


=====================================
vcf/Samples.java
=====================================
@@ -18,7 +18,6 @@
  */
 package vcf;
 
-import beagleutil.SampleIds;
 import java.util.Arrays;
 
 /**
@@ -30,49 +29,50 @@ import java.util.Arrays;
  */
 public final class Samples {
 
-    private static final SampleIds sampleIds = SampleIds.instance();
-    private final int[] idIndices;
+    private final String[] ids;
     private final boolean[] isDiploid;
 
-    /**
-     * Constructs a new instance of {@code Samples} corresponding to
-     * the specified list of diploid sample identifier indices.
-     * @param idIndices an array of sample identifier indices
+   /**
+     * Constructs a new {@code Samples} instance corresponding to the
+     * specified list of sample identifiers.  A warning is printed to standard
+     * error if any string occurs more than once in the {@code ids} array.
+     * @param ids an array of sample identifiers
      * @param isDiploid a boolean array whose {@code k}-th value is {@code true}
      * if the {@code k}-th sample is diploid, and is {@code false} if the
      * {@code k}-th sample is haploid
-     * @throws IllegalArgumentException if
-     * {@code idIndices.length != isDiploid.length}
-     * @throws IllegalArgumentException if the specified {@code idIndices} array
-     * has two or more elements that are equal
-     * @throws IndexOutOfBoundsException if any element of the specified
-     * {@code idIndices} array is negative or greater than or equal to
-     * {@code beagleutil.SampleIds.instance().size()}
-     * @throws NullPointerException if
-     * {@code idIndices == null || isDiploid == null}
+     *
+     * @throws IllegalArgumentException if {@code ids.length != isDiploid.length}
+     * @throws IllegalArgumentException if there exists {@code j} such that
+     * {@code ((0 <= j) && j < ids.length) && (ids[j].length()==0)}
+     * @throws NullPointerException if {@code ids == null || isDiploid == null}
+     * @throws NullPointerException if there exists {@code j} such that
+     * {@code ((0 <= j) && j < ids.length) && (ids[j]==null))}
      */
-    public Samples(int[] idIndices, boolean[] isDiploid) {
-        if (idIndices.length!=isDiploid.length) {
+    public Samples(String[] ids, boolean[] isDiploid) {
+        if (ids.length!=isDiploid.length) {
             throw new IllegalArgumentException(String.valueOf(isDiploid));
         }
-        checkForDuplicates(idIndices);
-        this.idIndices = idIndices.clone();
+        checkForNullsAndDuplicates(ids);
+        this.ids = ids.clone();
         this.isDiploid = isDiploid.clone();
     }
 
-    private static void checkForDuplicates(int[] idIndices) {
-        int[] copy = Arrays.stream(idIndices).parallel().sorted().toArray();
-        if (copy[0]<0) {
-            throw new IllegalArgumentException(String.valueOf(copy[0]));
+    private static void checkForNullsAndDuplicates(String[] ids) {
+        String[] sortedCopy = Arrays.stream(ids)
+                .parallel()
+                .sorted()
+                .toArray(String[]::new);
+        if (sortedCopy.length>0 && sortedCopy[0].length()==0) {
+            throw new IllegalArgumentException("Empty string identifier");
         }
-        for (int j=1; j<copy.length; ++j) {
-            if (copy[j-1]==copy[j]) {
-                throw new IllegalArgumentException(String.valueOf(copy[j]));
+        for (int j=1; j<sortedCopy.length; ++j) {
+            if (sortedCopy[j].length()==0) {
+                throw new IllegalArgumentException("Empty string identifier");
+            }
+            if (sortedCopy[j].equals(sortedCopy[j-1])) {
+                System.err.println("Warning: duplicate sample identifier: "
+                        + sortedCopy[j]);
             }
-        }
-        int last=idIndices.length-1;
-        if (copy[last]>=sampleIds.size()) {
-            throw new IllegalArgumentException(String.valueOf(copy[last]));
         }
     }
 
@@ -90,51 +90,14 @@ public final class Samples {
     public static Samples combine(Samples first, Samples second) {
         int n1 = first.size();
         int n2 = second.size();
-        int n = n1+n2;
-        int[] idIndices = new int[n];
+        int n = n1 + n2;
+        String[] ids = new String[n];
         boolean[] isDiploid = new boolean[n];
-        System.arraycopy(first.idIndices, 0, idIndices, 0, n1);
-        System.arraycopy(second.idIndices, 0, idIndices, n1, n2);
+        System.arraycopy(first.ids, 0, ids, 0, n1);
+        System.arraycopy(second.ids, 0, ids, n1, n2);
         System.arraycopy(first.isDiploid, 0, isDiploid, 0, n1);
         System.arraycopy(second.isDiploid, 0, isDiploid, n1, n2);
-        return new Samples(idIndices, isDiploid);
-    }
-
-    /**
-     * Returns an array mapping sample identifier indices to sample indices.
-     * Indices for sample identifiers not present in this list of samples
-     * are mapped to {@code -1}.
-     * @return an array mapping sample identifier indices to sample indices
-     */
-    public int[] idIndexToIndex() {
-        int[] idIndexToIndex = new int[sampleIds.size()];
-        Arrays.fill(idIndexToIndex, -1);
-        for (int j=0; j<idIndices.length; ++j) {
-            int idIndex = idIndices[j];
-            assert idIndexToIndex[idIndex] == -1; // no duplicate sample IDs
-            idIndexToIndex[idIndex] = j;
-        }
-        return idIndexToIndex;
-    }
-
-    /**
-     * Constructs and returns a {@code Samples} instance
-     * corresponding to the specified list of sample identifiers.
-     * @param ids an array of sample identifiers
-     * @param isDiploid a boolean array whose {@code k}-th value is {@code true}
-     * if the {@code k}-th sample is diploid, and is {@code false} if the
-     * {@code k}-th sample is haploid
-     * @return a {@code Samples} instance corresponding to the specified
-     * list of sample identifiers
-     *
-     * @throws IllegalArgumentException if
-     * {@code ids.length != isDiploid.length}
-     * @throws IllegalArgumentException if the specified array
-     * has two or more elements that are equal as strings
-     * @throws NullPointerException if {@code ids == null || isDiploid == null}
-     */
-    public static Samples fromIds(String[] ids, boolean[] isDiploid) {
-        return new Samples(sampleIds.getIndices(ids), isDiploid);
+        return new Samples(ids, isDiploid);
     }
 
     /**
@@ -144,8 +107,8 @@ public final class Samples {
     @Override
     public int hashCode() {
         int hash = 59;
-        hash += 31*Arrays.hashCode(this.isDiploid);
-        hash += 31*Arrays.hashCode(this.idIndices);
+        hash += 29*Arrays.hashCode(this.isDiploid);
+        hash += 29*Arrays.hashCode(this.ids);
         return hash;
     }
 
@@ -171,20 +134,7 @@ public final class Samples {
         if (Arrays.equals(this.isDiploid, other.isDiploid)==false) {
             return false;
         }
-        return Arrays.equals(this.idIndices, other.idIndices);
-    }
-
-    /**
-     * Returns the sample identifier index corresponding to the sample
-     * with the specified index in this list of samples.
-     * @param index a sample index
-     * @return the sample identifier index corresponding to the sample
-     * with the specified index in this list of samples
-     * @throws IndexOutOfBoundsException if
-     * {@code index < 0 || index >= this.size()}
-     */
-    public int idIndex(int index) {
-        return idIndices[index];
+        return Arrays.equals(this.ids, other.ids);
     }
 
     /**
@@ -192,7 +142,7 @@ public final class Samples {
      * @return the number of samples in this list
      */
     public int size() {
-        return idIndices.length;
+        return ids.length;
     }
 
     /**
@@ -205,7 +155,7 @@ public final class Samples {
      * {@code index < 0 || index >= this.size()}
      */
     public String id(int index) {
-        return sampleIds.id(idIndices[index]);
+        return ids[index];
     }
 
     /**
@@ -216,7 +166,7 @@ public final class Samples {
      * @return this list of samples as an array of sample identifiers
      */
     public String[] ids() {
-        return sampleIds.ids(idIndices);
+        return ids.clone();
     }
 
      /**


=====================================
vcf/TargSlidingWindow.java
=====================================
@@ -84,7 +84,8 @@ public class TargSlidingWindow implements SlidingWindow {
     }
 
     private static SampleFileIt<GTRec> targIt(Par par) {
-        FileIt<String> it = InputIt.fromGzipFile(par.gt());
+        int nBufferedBlocks = par.nthreads() << 3;
+        FileIt<String> it = InputIt.fromBGZipFile(par.gt(), nBufferedBlocks);
         Filter<String> sFilter = FilterUtil.sampleFilter(par.excludesamples());
         Filter<Marker> mFilter = FilterUtil.markerFilter(par.excludemarkers());
         SampleFileIt<GTRec> targIt = VcfIt.create(it, sFilter, mFilter,


=====================================
vcf/VcfHeader.java
=====================================
@@ -195,7 +195,7 @@ public final class VcfHeader  {
             ids[j] = headerFields[SAMPLE_OFFSET + includedIndices[j]];
             restrictedIsDiploid[j] = isDiploid[includedIndices[j]];
         }
-        return Samples.fromIds(ids, restrictedIsDiploid);
+        return new Samples(ids, restrictedIsDiploid);
     }
 
     /**


=====================================
vcf/VcfIt.java
=====================================
@@ -20,7 +20,7 @@ package vcf;
 
 import blbutil.FileIt;
 import blbutil.Filter;
-import blbutil.SampleFileIt;
+import blbutil.VcfFileIt;
 import java.io.File;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@@ -46,7 +46,7 @@ import java.util.stream.IntStream;
  *
  * @author Brian L. Browning {@code <browning at uw.edu>}
  */
-public class VcfIt<E extends GTRec> implements SampleFileIt<E> {
+public class VcfIt<E extends GTRec> implements VcfFileIt<E> {
 
     private static final float DEFAULT_MAX_LR = Float.MAX_VALUE;
 
@@ -327,6 +327,11 @@ public class VcfIt<E extends GTRec> implements SampleFileIt<E> {
         return vcfHeader.samples();
     }
 
+    @Override
+    public VcfHeader vcfHeader() {
+        return vcfHeader;
+    }
+
     @Override
     public String toString() {
         StringBuilder sb = new StringBuilder(80);


=====================================
vcf/VcfWriter.java
=====================================
@@ -44,6 +44,8 @@ public final class VcfWriter {
             + "estimated REF dose [P(RA) + 2*P(RR)] and true REF dose\">";
     private static final String IMP_INFO = "##INFO=<ID=IMP,Number=0,Type=Flag,"
             + "Description=\"Imputed marker\">";
+    private static final String END_INFO = "##INFO=<ID=END,Number=1,Type=Integer,"
+            + "Description=\"End position of the variant described in this record  (for use with symbolic alleles)\">";
 
     private static final String GT_FORMAT = "##FORMAT=<ID=GT,Number=1,Type=String,"
             + "Description=\"Genotype\">";
@@ -132,6 +134,7 @@ public final class VcfWriter {
             out.println(DR2_INFO);
             out.println(IMP_INFO);
         }
+        out.println(END_INFO);
         out.println(GT_FORMAT);
         if (ds) {
             out.println(DS_FORMAT);



View it on GitLab: https://salsa.debian.org/med-team/beagle/-/compare/c9568fa92ceda801780b470be0bbdc4126e7e245...914312b3fa341e7d8df05239a711b68733facae5

-- 
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/compare/c9568fa92ceda801780b470be0bbdc4126e7e245...914312b3fa341e7d8df05239a711b68733facae5
You're receiving this email because of your account on salsa.debian.org.


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241216/8389a3f0/attachment-0001.htm>


More information about the debian-med-commit mailing list