[med-svn] [Git][med-team/beagle][master] 7 commits: Fix clean target Closes: #1043854
Andreas Tille (@tille)
gitlab at salsa.debian.org
Mon Dec 16 15:23:46 GMT 2024
Andreas Tille pushed to branch master at Debian Med / beagle
Commits:
4c36d9b1 by Andreas Tille at 2024-12-16T15:52:35+01:00
Fix clean target Closes: #1043854
- - - - -
51b7c5c9 by Andreas Tille at 2024-12-16T15:53:55+01:00
New upstream version 241029
- - - - -
d8130812 by Andreas Tille at 2024-12-16T15:53:55+01:00
New upstream version
- - - - -
8149b6c5 by Andreas Tille at 2024-12-16T15:53:56+01:00
Update upstream source from tag 'upstream/241029'
Update to upstream version '241029'
with Debian dir 6ff5a3201ff432ad85e6def4213edd4547fa9dcc
- - - - -
7e09db76 by Andreas Tille at 2024-12-16T15:53:56+01:00
Standards-Version: 4.7.0 (routine-update)
- - - - -
61f30c04 by Andreas Tille at 2024-12-16T16:19:27+01:00
Fix lintian-overrides
- - - - -
914312b3 by Andreas Tille at 2024-12-16T16:21:47+01:00
Upload to unstable
- - - - -
25 changed files:
- beagleutil/SampleIds.java
- + blbutil/BGZipIt.java
- blbutil/InputIt.java
- blbutil/SampleFileIt.java
- + blbutil/VcfFileIt.java
- bref/Bref3.java
- bref/Bref3Reader.java
- bref/UnBref3.java
- debian/beagle-doc.lintian-overrides
- debian/changelog
- debian/clean
- debian/control
- main/Main.java
- main/Pedigree.java
- main/WindowWriter.java
- phase/FixedPhaseData.java
- phase/PhaseData.java
- phase/Stage2Baum.java
- vcf/MarkerMap.java
- vcf/RefTargSlidingWindow.java
- vcf/Samples.java
- vcf/TargSlidingWindow.java
- vcf/VcfHeader.java
- vcf/VcfIt.java
- vcf/VcfWriter.java
Changes:
=====================================
beagleutil/SampleIds.java
=====================================
@@ -19,6 +19,7 @@
package beagleutil;
import java.util.Arrays;
+import vcf.Samples;
/**
* <p>Class {@code SampleIds} is a singleton class that represents a
@@ -87,6 +88,33 @@ public final class SampleIds {
return indexer.getIndices(ids);
}
+ /**
+ * Returns an array mapping sample identifier indices to sample indices.
+ * Indices for sample identifiers not present in this list of samples
+ * are mapped to {@code -1}.
+ * @param samples a list of sample identifiers
+ * @return an array mapping sample identifier indices to sample indices
+ * @throws IllegalArgumentException if two sample identifiers are the
+ * same string
+ * @throws NullPointerException if {@code samples == null}
+ */
+ public int[] idIndexToIndex(Samples samples) {
+ String[] ids = samples.ids();
+ int[] idIndex = new int[ids.length];
+ for (int j=0; j<ids.length; ++j) {
+ idIndex[j] = sampleIds.getIndex(ids[j]);
+ }
+ int[] idIndexToIndex = new int[sampleIds.size()];
+ Arrays.fill(idIndexToIndex, -1);
+ for (int j=0; j<ids.length; ++j) {
+ if (idIndexToIndex[idIndex[j]] != -1) {
+ throw new IllegalArgumentException("Dupicate sample: " + ids[j]);
+ }
+ idIndexToIndex[idIndex[j]] = j;
+ }
+ return idIndexToIndex;
+ }
+
/**
* Returns the index of the specified sampled identifier, or returns
* {@code -1} if the specified sample identifier is not indexed.
=====================================
blbutil/BGZipIt.java
=====================================
@@ -0,0 +1,371 @@
+/*
+ * Copyright (C) 2014-2021 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import ints.IntList;
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * <p>Class {@code BGZipIt} is a {@code blbutil.FileIt<String>} whose
+ * {@code next()} method returns lines of a bgzip-compressed file.
+ * </p>
+ * <p>The GZIP file format specification is described
+ * <a href="https://www.ietf.org/rfc/rfc1952.txt">RFC 1952</a>
+ * and the BGZIP file format specification is described in the
+ * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">
+ * Sequence Alignment/Map Format Specification</a>
+ * </p>
+ * <p>Instances of class {@code BGZipIt} are not thread safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BGZipIt implements FileIt<String> {
+
+ private static final byte CR = 0x0D;
+ private static final byte LF = 0x0A;
+ private static final byte[] EOF = new byte[0];
+
+ private static final byte GZIP_ID1 = 31;
+ private static final byte GZIP_ID2 = (byte) 139;
+ private static final byte GZIP_CM = 8;
+ private static final byte GZIP_FLG = (1 << 2); // only required set bit
+ private static final byte GZIP_XLEN1 = 6;
+ private static final byte GZIP_XLEN2 = 0;
+ private static final byte BGZIP_SI1 = 66;
+ private static final byte BGZIP_SI2 = 67;
+ private static final byte BGZIP_SLEN1 = 2;
+ private static final byte BGZIP_SLEN2 = 0;
+
+ private final InputStream is;
+ private final File source;
+ private final int nBufferedBlocks;
+ private final ArrayDeque<String> lines;
+ private byte[] leftOverBytes;
+
+ /**
+ * Constructs a new {@code BGZipIt} instance from the specified data
+ * @param is an input stream that reads from a gzip-compressed
+ * VCF file
+ * @param nBufferedBlocks the number of buffered gzip blocks
+ * @throws IllegalArgumentException if {@code nBufferedBlocks < 1}
+ * @throws NullPointerException if {@code is == null}
+ */
+ public BGZipIt(InputStream is, int nBufferedBlocks) {
+ this(is, nBufferedBlocks, null);
+ }
+
+ /**
+ * Constructs a new {@code BGZipIt} instance from the specified data
+ * @param is an input stream that reads gzip-compressed
+ * VCF data
+ * @param nBufferedBlocks the number of buffered gzip blocks
+ * @param source the gzip-compressed VCF file that is read
+ * @throws IllegalArgumentException if {@code nBufferedBlocks < 1}
+ * @throws NullPointerException if {@code is == null}
+ */
+ public BGZipIt(InputStream is, int nBufferedBlocks, File source) {
+ if (nBufferedBlocks < 1) {
+ throw new IllegalArgumentException(String.valueOf(nBufferedBlocks));
+ }
+ this.is = is;
+ this.source = source;
+ this.nBufferedBlocks = nBufferedBlocks;
+ this.leftOverBytes = new byte[0];
+ this.lines = new ArrayDeque<>();
+ fillBuffer();
+ }
+
+ @Override
+ public void close() {
+ try {
+ is.close();
+ } catch (IOException ex) {
+ Utilities.exit(ex);
+ }
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements.
+ * @return {@code true} if the iteration has more elements
+ */
+ @Override
+ public boolean hasNext() {
+ return lines.isEmpty()==false;
+ }
+
+ /**
+ * Returns the next line of the VCF file. End of line characters are
+ * not included in the returned line.
+ * @return the next line of the VCF file
+ * @throws NoSuchElementException if the VCF file has no more lines
+ */
+ @Override
+ public String next() {
+ String s = lines.remove();
+ if (lines.isEmpty()) {
+ fillBuffer();
+ }
+ return s;
+ }
+
+ @Override
+ public File file() {
+ return source;
+ }
+
+ private void fillBuffer() {
+ byte[][] blocks = readAndInflateBlocks(is, leftOverBytes, nBufferedBlocks);
+ if (blocks.length>0) {
+ int[] eolIndices = IntStream.range(0, blocks.length)
+ .parallel()
+ .flatMap(j -> eolIndices(j, blocks[j]))
+ .toArray();
+ leftOverBytes = leftOverBytes(blocks, eolIndices);
+ addToLines(blocks, eolIndices, lines);
+ if (lines.isEmpty() && leftOverBytes.length>0) {
+ fillBuffer();
+ }
+ }
+ }
+
+ private static IntStream eolIndices(int block, byte[] bytes) {
+ IntList il = new IntList();
+ for (int b=0; b<bytes.length; ++b) {
+ if (bytes[b]==LF) {
+ il.add(block);
+ il.add(b);
+ }
+ }
+ return il.stream();
+ }
+
+ private static byte[] leftOverBytes(byte[][] blocks, int[] eolIndices) {
+ if (blocks.length==0) {
+ return new byte[0];
+ }
+ else {
+ int lastBlock = blocks.length-1;
+ int endIndex = blocks[lastBlock].length;
+ if (eolIndices.length==0) {
+ return merge(blocks, 0, 0, lastBlock, endIndex);
+ }
+ else {
+ int startBlock = eolIndices[eolIndices.length-2];
+ int startIndex = eolIndices[eolIndices.length-1] + 1;
+ return merge(blocks, startBlock, startIndex, lastBlock, endIndex);
+ }
+ }
+ }
+
+ private static void addToLines(byte[][] blocks, int[] eolIndices,
+ ArrayDeque<String> lines) {
+ List<String> tmpList = IntStream.range(0, eolIndices.length)
+ .parallel()
+ .filter(j -> (j & 0b1)==0)
+ .mapToObj(j -> toString(blocks, eolIndices, j))
+ .collect(Collectors.toList());
+ lines.addAll(tmpList);
+ }
+
+ private static String toString(byte[][] blocks, int[] eolIndices, int index) {
+ int block = eolIndices[index];
+ int endIndex = eolIndices[index + 1];
+ byte[] merged;
+ if (index==0) {
+ merged = merge(blocks, 0, 0, block, endIndex);
+ }
+ else {
+ assert index>=2;
+ int startBlock = eolIndices[index-2];
+ int startIndex = eolIndices[index-1] + 1;
+ merged = merge(blocks, startBlock, startIndex, block, endIndex);
+ }
+ int lengthM1 = merged.length-1;
+ if (lengthM1>=0 && merged[lengthM1]==CR) {
+ // Correct for CR LF line ending on Windows systems
+ return new String(merged, 0, lengthM1, StandardCharsets.UTF_8);
+ }
+ else {
+ return new String(merged, StandardCharsets.UTF_8);
+ }
+ }
+
+ private static byte[] merge(byte[][] blocks, int startBlock, int startIndex,
+ int lastBlock, int endIndex) {
+ // merge correctly handles startIndex == blocks[startBlock].length
+ if (lastBlock==startBlock) {
+ return Arrays.copyOfRange(blocks[startBlock], startIndex, endIndex);
+ }
+ else {
+ int size = 0;
+ for (int j=startBlock; j<lastBlock; ++j) {
+ size += blocks[j].length;
+ }
+ size -= startIndex;
+ size += endIndex;
+ byte[] merged = new byte[size];
+ int len = (blocks[startBlock].length - startIndex);
+ System.arraycopy(blocks[startBlock], startIndex, merged, 0, len);
+ for (int j=(startBlock + 1); j<lastBlock; ++j) {
+ System.arraycopy(blocks[j], 0, merged, len, blocks[j].length);
+ len += blocks[j].length;
+ }
+ System.arraycopy(blocks[lastBlock], 0, merged, len, endIndex);
+ assert merged.length == (len + endIndex);
+ return merged;
+ }
+ }
+
+ private static byte[][] readAndInflateBlocks(InputStream is, byte[] initialBytes, int nBlocks) {
+ ArrayList<byte[]> compressedBlocks = new ArrayList<>(nBlocks);
+ for (int j=0; j<nBlocks; ++j) {
+ byte[] ba = readCompressedBlock(is);
+ if (ba.length>0) {
+ compressedBlocks.add(ba);
+ }
+ else if (ba==EOF) {
+ break;
+ }
+ }
+ byte[][] blocks = compressedBlocks.stream()
+ .parallel()
+ .map(ba -> inflateBlock(ba))
+ .toArray(byte[][]::new);
+ if (blocks.length>0 && initialBytes.length>0) {
+ int newLength = initialBytes.length + blocks[0].length;
+ byte[] prependedBlock = Arrays.copyOf(initialBytes, newLength);
+ System.arraycopy(blocks[0], 0, prependedBlock, initialBytes.length,
+ blocks[0].length);
+ blocks[0] = prependedBlock;
+ }
+ return blocks;
+ }
+
+ private static byte[] readCompressedBlock(InputStream is) {
+ byte[] ba = new byte[18];
+ try {
+ int bytesRead = 0;
+ int offset = 0;
+ while (offset<ba.length
+ && (bytesRead = is.read(ba, offset, ba.length - offset)) != -1) {
+ offset += bytesRead;
+ }
+ if (offset==0) {
+ return EOF;
+ }
+ if (offset==ba.length && isStartOfBgzipBlock(ba)) {
+ int blockSize = ((ba[16] & 0xff) | ((ba[17] & 0xff) << 8)) + 1;
+ ba = Arrays.copyOf(ba, blockSize);
+ while (offset<ba.length
+ && (bytesRead = is.read(ba, offset, ba.length - offset)) != -1) {
+ offset += bytesRead;
+ }
+ if (offset < ba.length) {
+ Utilities.exit("Premature end of BGZIP block");
+ }
+ }
+ else {
+ Utilities.exit("Invalid BGZIP block header");
+ }
+ }
+ catch (IOException e) {
+ Utilities.exit(e);
+ }
+ return ba;
+ }
+
+ private static byte[] inflateBlock(byte[] ba) {
+ ByteArrayOutputStream os = new ByteArrayOutputStream(ba.length);
+ byte[] buffer = new byte[1<<13];
+ try (ByteArrayInputStream bais = new ByteArrayInputStream(ba);
+ GZIPInputStream gzis = new GZIPInputStream(bais)) {
+ int bytesRead;
+ while ((bytesRead = gzis.read(buffer)) != -1) {
+ os.write(buffer, 0, bytesRead);
+ }
+ }
+ catch (IOException e) {
+ Utilities.exit(e);
+ }
+ return os.toByteArray();
+ }
+
+ /**
+ * Returns {@code true} if the first 16 bytes of the specified input stream
+ * are a gzip header that includes a 6 byte extra field containing
+ * the block size as described in the bgzip specification, and returns
+ * {@code false} otherwise. The method sets a mark before reading
+ * the initial bytes from the stream, and resets the stream to the
+ * mark position before returning.
+ * @param bis a buffered input stream
+ * @return {@code true} if the first 16 bytes of the specified input stream
+ * are a gzip header that includes a 6 byte extra field containing
+ * the block size as described in the bgzip specification
+ */
+ public static boolean beginsWithBgzipBlock(BufferedInputStream bis) {
+ assert bis.markSupported();
+ int maxBytes = 16;
+ int bytesRead = 0;
+ int offset = 0;
+ byte[] ba = new byte[maxBytes];
+ bis.mark(maxBytes);
+ try {
+ while (offset<ba.length
+ && (bytesRead = bis.read(ba, offset, ba.length - offset)) != -1) {
+ offset += bytesRead;
+ }
+ bis.reset();
+ }
+ catch(IOException ex) {
+ Utilities.exit(ex);
+ }
+ return offset==ba.length && isStartOfBgzipBlock(ba);
+ }
+
+ private static boolean isStartOfBgzipBlock(byte[] buffer) {
+ // isStartOfBgzipBlock() returns false if additional non-bgzip
+ // subfields are present
+ return (buffer.length >= 16
+ && buffer[0] == GZIP_ID1)
+ && (buffer[1] == GZIP_ID2)
+ && (buffer[2] == GZIP_CM)
+ && ((buffer[3] & GZIP_FLG)!=0)
+ && (buffer[10] == GZIP_XLEN1)
+ && (buffer[11] == GZIP_XLEN2)
+ && (buffer[12] == BGZIP_SI1)
+ && (buffer[13] == BGZIP_SI2)
+ && (buffer[14] == BGZIP_SLEN1)
+ && (buffer[15] == BGZIP_SLEN2);
+ }
+}
=====================================
blbutil/InputIt.java
=====================================
@@ -18,6 +18,7 @@
*/
package blbutil;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
@@ -37,7 +38,15 @@ import java.util.zip.GZIPInputStream;
* is trapped, an error message is written to standard out, and the
* Java Virtual Machine is terminated.
* </p>
- * Instances of class {@code InputIt} are not thread-safe.
+ * <p>The GZIP file format specification is described
+ * <a href="https://www.ietf.org/rfc/rfc1952.txt">RFC 1952</a>
+ * and the BGZIP file format specification is described in the
+ * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">
+ * Sequence Alignment/Map Format Specification</a>
+ * </p>
+ *
+ * <p>Instances of class {@code InputIt} are not thread-safe.
+ * </p>
*
* @author Brian L. Browning {@code <browning at uw.edu>}
*/
@@ -52,7 +61,7 @@ public class InputIt implements FileIt<String> {
* size that will iterate through lines of the specified input stream.
*
* @param is input stream of text data
- *
+ * @param file the file that is the source of the input stream
*/
private InputIt(InputStream is, File file) {
BufferedReader br = null;
@@ -73,6 +82,7 @@ public class InputIt implements FileIt<String> {
* that will iterate through the lines of the specified input stream.
*
* @param is input stream of text data
+ * @param file the file that is the source of the input stream
* @param bufferSize the buffer size in bytes
*
* @throws IllegalArgumentException if {@code bufferSize < 0}
@@ -175,38 +185,30 @@ public class InputIt implements FileIt<String> {
}
/**
- * Constructs and returns an {@code InputIt} instance with the specified
- * buffer size that iterates through lines of text read from standard input.
- *
- * @param bufferSize the buffer size in bytes
- *
- * @return a new {@code InputIt} instance that iterates
- * through lines of text read from standard input
- *
- * @throws IllegalArgumentException if {@code bufferSize < 0}
- */
- public static InputIt fromStdIn(int bufferSize) {
- File file = null;
- return new InputIt(System.in, file, bufferSize);
- }
-
- /**
- * Constructs and returns an {@code InputIt} instance with the default
- * buffer size that iterates through lines of the specified compressed
- * or uncompressed text file. If the filename ends in ".gz", the file
- * must be either BGZIP-compressed or GZIP-compressed.
- *
+ * Constructs and returns a buffered {@code FileIt<String>} instance
+ * that iterates through lines of the specified compressed or
+ * uncompressed text file. If the filename ends in ".gz" or ".bgz", the
+ * file must be GZIP-compressed.
* @param file a compressed or uncompressed text file
- * @return a new {@code InputIt} instance that iterates
- * through lines of the specified text file
+ * @param nBufferedBlocks the number buffered GZIP blocks if the
+ * specified file is bgzip-compressed
+ * @return {@code FileIt<String>} instance that iterates through the
+ * lines of the specified file
*
* @throws NullPointerException if {@code file == null}
*/
- public static InputIt fromGzipFile(File file) {
+ public static FileIt<String> fromBGZipFile(File file, int nBufferedBlocks) {
+ String filename = file.getName();
try {
InputStream is = new FileInputStream(file);
- if (file.getName().endsWith(".gz")) {
- return new InputIt(new GZIPInputStream(is), file);
+ BufferedInputStream bis = new BufferedInputStream(is);
+ if (filename.endsWith(".gz") || filename.endsWith(".bgz")) {
+ if (BGZipIt.beginsWithBgzipBlock(bis)) {
+ return new BGZipIt(bis, nBufferedBlocks, file);
+ }
+ else {
+ return new InputIt(new GZIPInputStream(bis), file);
+ }
}
else {
return new InputIt(is, file);
@@ -223,24 +225,22 @@ public class InputIt implements FileIt<String> {
}
/**
- * Constructs and returns an {@code InputIt} instance with the specified
- * buffer size that iterates through lines of the specified compressed
- * or uncompressed text file. If the filename ends in ".gz", the file must
- * be either BGZIP-compressed or GZIP-compressed.
+ * Constructs and returns a buffered {@code InputIt} instance that
+ * iterates through lines of the specified compressed or uncompressed
+ * text file. If the filename ends in ".gz", the file must be
+ * tGZIP-compressed.
*
* @param file a compressed or uncompressed text file
- * @param bufferSize the buffer size in bytes
- * @return a new {@code InputIt} instance that iterates
+ * @return a buffered {@code InputIt} instance that iterates
* through lines of the specified text file
- *
- * @throws IllegalArgumentException if {@code bufferSize < 0}
* @throws NullPointerException if {@code file == null}
*/
- public static InputIt fromGzipFile(File file, int bufferSize) {
+ public static InputIt fromGzipFile(File file) {
+ String filename = file.getName();
try {
InputStream is = new FileInputStream(file);
- if (file.getName().endsWith(".gz")) {
- return new InputIt(new GZIPInputStream(is), file, bufferSize);
+ if (filename.endsWith(".gz") || filename.endsWith(".bgz")) {
+ return new InputIt(new GZIPInputStream(is), file);
}
else {
return new InputIt(is, file);
@@ -256,14 +256,13 @@ public class InputIt implements FileIt<String> {
return null;
}
- /**
- * Constructs and returns an {@code InputIt} instance with the default
- * buffer size that iterates through lines of the specified text file.
+ /**
+ * Constructs and returns a buffered {@code InputIt} instance
+ * that iterates through lines of the specified text file.
*
* @param file a text file
- * @return a new {@code InputIt} instance that iterates through
+ * @return a buffered {@code InputIt} instance that iterates through
* lines of the specified text file
- *
* @throws NullPointerException if {@code filename == null}
*/
public static InputIt fromTextFile(File file) {
@@ -276,27 +275,4 @@ public class InputIt implements FileIt<String> {
assert false;
return null;
}
-
- /**
- * Constructs and returns an {@code InputIt} instance with the specified
- * buffer size that iterates through lines of the specified text file.
- *
- * @param file a text file
- * @param bufferSize the buffer size in bytes
- * @return a new {@code InputIt} instance that iterates through
- * lines of the specified text file
- *
- * @throws IllegalArgumentException if {@code bufferSize < 0}
- * @throws NullPointerException if {@code filename == null}
- */
- public static InputIt fromTextFile(File file, int bufferSize) {
- try {
- return new InputIt(new FileInputStream(file), file, bufferSize);
- }
- catch(FileNotFoundException e) {
- Utilities.exit(e, "Error opening " + file);
- }
- assert false;
- return null;
- }
}
=====================================
blbutil/SampleFileIt.java
=====================================
@@ -21,7 +21,7 @@ package blbutil;
import vcf.Samples;
/**
- * <p>An iterator for records in a file. Each records contains
+ * <p>An iterator for records in a file. Each record contains
* data for the same set of samples.
*</p>
* Instances of class {@code SampleFileIt} are not thread-safe.
=====================================
blbutil/VcfFileIt.java
=====================================
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014-2021 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import vcf.VcfHeader;
+
+/**
+ * <p>An iterator for records in a VCF file. Each record contains
+ * data for the same set of samples.
+ *</p>
+ * Instances of class {@code VcfFileIt} are not thread-safe.
+ *
+ * @param <E> the type of the elements returned by this iterator's
+ * {@code next()} method.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface VcfFileIt<E> extends SampleFileIt<E> {
+
+ /**
+ * Returns the VCF meta-information lines and header line
+ * @return the VCF meta-information lines and header line
+ */
+ VcfHeader vcfHeader();
+}
=====================================
bref/Bref3.java
=====================================
@@ -19,7 +19,6 @@
package bref;
import blbutil.Const;
-import blbutil.FileIt;
import blbutil.InputIt;
import blbutil.SampleFileIt;
import blbutil.Utilities;
@@ -29,7 +28,7 @@ import vcf.RefIt;
import vcf.Samples;
/**
- * <p>Class {@code Bref3} converts files in VCF format into
+ * <p>Class {@code Bref3} converts files in VCF format into
* bref version 3 format.
* </p>
* <p>Instances of class {@code Bref3} are not thread-safe.</p>
@@ -38,7 +37,7 @@ import vcf.Samples;
*/
public class Bref3 {
- private static final String program = "bref3.22Jul22.46e.jar";
+ private static final String PROGRAM = "bref3.29Oct24.c8e.jar";
/**
* The {@code main()} method is the entry point to the bref program.
@@ -105,26 +104,26 @@ public class Bref3 {
}
private static SampleFileIt<RefGTRec> refIt(String fileName) {
- FileIt<String> it = null;
if (fileName==null) {
- it = InputIt.fromStdIn();
- }
- else if (fileName.endsWith(".gz")) {
- it = InputIt.fromGzipFile(new File(fileName));
+ return RefIt.create(InputIt.fromStdIn());
}
else {
- it = InputIt.fromTextFile(new File(fileName));
+ File file = new File(fileName);
+ int nCores = Runtime.getRuntime().availableProcessors();
+ System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism",
+ String.valueOf(nCores));
+ int nBufferedBlocks = nCores << 2;
+ return RefIt.create(InputIt.fromBGZipFile(file, nBufferedBlocks));
}
- return RefIt.create(it);
}
private static BrefWriter brefOut(Samples samples, int maxNSeq) {
File outFile = null; // write to standard output
if (maxNSeq<0) {
- return new AsIsBref3Writer(program, samples, outFile);
+ return new AsIsBref3Writer(PROGRAM, samples, outFile);
}
else {
- return new CompressBref3Writer(program, samples, maxNSeq, outFile);
+ return new CompressBref3Writer(PROGRAM, samples, maxNSeq, outFile);
}
}
@@ -133,24 +132,24 @@ public class Bref3 {
sb.append("usage:");
sb.append(Const.nl);
sb.append(" java -jar ");
- sb.append(program);
+ sb.append(PROGRAM);
sb.append(" help");
sb.append(Const.nl);
sb.append(Const.nl);
sb.append(" java -jar ");
- sb.append(program);
+ sb.append(PROGRAM);
sb.append(" [vcf] <nseq> > [bref3]");
sb.append(Const.nl);
sb.append(Const.nl);
sb.append(" cat [vcf] | java -jar ");
- sb.append(program);
+ sb.append(PROGRAM);
sb.append(" <nseq> > [bref3]");
sb.append(Const.nl);
sb.append(Const.nl);
sb.append("where");
sb.append(Const.nl);
sb.append(" [bref3] = the output bref3 file");
- sb.append(Const.nl);
+ sb.append(Const.nl);
sb.append(" [vcf] = A VCF file with phased, non-missing genotype data. If the");
sb.append(Const.nl);
sb.append(" file is gzip-compressed, its filename must end in \".gz\"");
=====================================
bref/Bref3Reader.java
=====================================
@@ -85,7 +85,7 @@ public final class Bref3Reader {
boolean[] isDiploid = new boolean[sampleIds.length];
Arrays.fill(isDiploid, true);
this.program = programString;
- this.samples = Samples.fromIds(sampleIds, isDiploid);
+ this.samples = new Samples(sampleIds, isDiploid);
this.markerFilter = markerFilter;
this.nHaps = 2*samples.size();
this.byteBuffer = new byte[2*nHaps];
=====================================
bref/UnBref3.java
=====================================
@@ -37,7 +37,7 @@ import vcf.VcfWriter;
*/
public class UnBref3 {
- private static final String program = "unbref3.22Jul22.46e.jar";
+ private static final String program = "unbref3.29Oct24.c8e.jar";
/**
* The {@code main()} method is the entry point to the bref program.
=====================================
debian/beagle-doc.lintian-overrides
=====================================
@@ -1,3 +1,4 @@
# jQuery from libjs-jquery is not compatible
-beagle-doc: embedded-javascript-library * please use libjs-jquery
-beagle-doc: embedded-javascript-library * please use libjs-jquery-ui
+beagle-doc: embedded-javascript-library please use libjs-jquery [usr/share/doc/beagle/api/script-dir/jquery-3.7.1.min.js]
+beagle-doc: embedded-javascript-library please use libjs-jquery-ui [usr/share/doc/beagle/api/script-dir/jquery-ui.min.css]
+beagle-doc: embedded-javascript-library please use libjs-jquery-ui [usr/share/doc/beagle/api/script-dir/jquery-ui.min.js]
=====================================
debian/changelog
=====================================
@@ -1,3 +1,14 @@
+beagle (241029-1) unstable; urgency=medium
+
+ * Team upload.
+ * New upstream version
+ * Fix clean target
+ Closes: #1043854
+ * Standards-Version: 4.7.0 (routine-update)
+ * Fix lintian-overrides
+
+ -- Andreas Tille <tille at debian.org> Mon, 16 Dec 2024 16:19:36 +0100
+
beagle (220722-1) unstable; urgency=medium
* Team upload.
=====================================
debian/clean
=====================================
@@ -1 +1,2 @@
SRC/
+b*.jar
=====================================
debian/control
=====================================
@@ -7,7 +7,7 @@ Build-Depends: debhelper-compat (= 13),
javahelper
Build-Depends-Indep: default-jdk,
libhtsjdk-java
-Standards-Version: 4.6.1
+Standards-Version: 4.7.0
Vcs-Browser: https://salsa.debian.org/med-team/beagle
Vcs-Git: https://salsa.debian.org/med-team/beagle.git
Homepage: https://faculty.washington.edu/browning/beagle/beagle.html
=====================================
main/Main.java
=====================================
@@ -58,12 +58,12 @@ public class Main {
/**
* The program name and commit version.
*/
- public static final String PROGRAM = "beagle.22Jul22.46e.jar";
+ public static final String PROGRAM = "beagle.29Oct24.c8e.jar";
/**
* The command to invoke the program.
*/
- public static final String COMMAND = "java -jar beagle.22Jul22.46e.jar";
+ public static final String COMMAND = "java -jar beagle.29Oct24.c8e.jar";
/**
* The copyright string.
=====================================
main/Pedigree.java
=====================================
@@ -99,7 +99,7 @@ public class Pedigree {
private static void readPedFile(Samples samples, File pedFile,
int[] fathers, int[] mothers, int[][] offspring) {
if (pedFile != null) {
- int[] idIndexToIndex = samples.idIndexToIndex();
+ int[] idIndexToIndex = SampleIds.instance().idIndexToIndex(samples);
boolean[] processed = new boolean[samples.size()];
IntList[] children = new IntList[samples.size()];
try (FileIt<String> pedIt=InputIt.fromGzipFile(pedFile)) {
=====================================
main/WindowWriter.java
=====================================
@@ -122,7 +122,7 @@ public class WindowWriter implements Closeable {
* @throws IllegalArgumentException if
* {@code stateProbs.size() != impData.nTargHaps()}
* @throws IndexOutOfBoundsException if
- * {@code refStart < 0 || refEnd > impData.refGT().nMarkers()}
+ * {@code start < 0 || end > impData.refGT().nMarkers()}
* @throws NullPointerException if {@code impData==null || stateProbs==null}
* @throws NullPointerException if any element of {@code stateProbs} is
* {@code null}
=====================================
phase/FixedPhaseData.java
=====================================
@@ -60,7 +60,7 @@ public class FixedPhaseData {
private final MarkerMap map;
private final Steps stage1Steps;
private final GT targGT;
- private final Optional<RefGT> refGT;
+ private final Optional<RefGT> restrictedRefGT;
private final int overlap;
private final MarkerMap stage1Map;
@@ -117,7 +117,7 @@ public class FixedPhaseData {
this.map = markerMap(window.genMap(), window.targGT().markers());
this.targGT = phasedOverlap==null ? window.targGT() :
new SplicedGT(phasedOverlap, window.targGT());
- this.refGT = window.restrictRefGT();
+ this.restrictedRefGT = window.restrictRefGT();
this.overlap = phasedOverlap==null ? 0 : phasedOverlap.nMarkers();
this.nHaps = nHaps(window);
@@ -132,7 +132,7 @@ public class FixedPhaseData {
this.ibsStep = par.step_scale()*medianDiff(stage1Map.genPos());
this.stage1Steps = new Steps(stage1Map, ibsStep);
this.stage1TargGT = targGT;
- this.stage1RefGT = refGT;
+ this.stage1RefGT = restrictedRefGT;
this.stage1XRefGT = stage1RefGT.isPresent()
? Optional.of(XRefGT.fromPhasedGT(stage1RefGT.get(), par.nthreads()))
: Optional.empty();
@@ -152,7 +152,7 @@ public class FixedPhaseData {
this.ibsStep = par.step_scale()*medianDiff(stage1Map.genPos());
this.stage1Steps = new Steps(stage1Map, ibsStep);
this.stage1TargGT = targGT.restrict(hiFreqMarkers, hiFreqInd);
- this.stage1RefGT = restrict(refGT, hiFreqMarkers, hiFreqInd);
+ this.stage1RefGT = restrict(restrictedRefGT, hiFreqMarkers, hiFreqInd);
this.stage1XRefGT = stage1RefGT.isPresent()
? Optional.of(XRefGT.fromPhasedGT(stage1RefGT.get(), par.nthreads()))
: Optional.empty();
@@ -370,8 +370,8 @@ public class FixedPhaseData {
* Returns the optional phased, nonmissing reference genotypes.
* @return the optional phased, nonmissing reference genotypes
*/
- public Optional<RefGT> refGT() {
- return refGT;
+ public Optional<RefGT> restrictedRefGT() {
+ return restrictedRefGT;
}
/**
=====================================
phase/PhaseData.java
=====================================
@@ -137,11 +137,11 @@ public class PhaseData {
* Sets the allele mismatch probability to the specified value.
* @param pMismatch the allele mismatch probability
* @throws IllegalArgumentException if
- * {@code pMismatch <= 0.0 || pMismatch >= 1.0
+ * {@code pMismatch < 0.0 || pMismatch > 1.0
* || Float.isFinite(pMismatch) == false}
*/
public void updatePMismatch(float pMismatch) {
- if (pMismatch <= 0.0 || pMismatch >= 1.0
+ if (pMismatch < 0.0 || pMismatch > 1.0
|| Float.isFinite(pMismatch)==false) {
throw new IllegalArgumentException(String.valueOf(pMismatch));
}
=====================================
phase/Stage2Baum.java
=====================================
@@ -68,7 +68,7 @@ public class Stage2Baum {
this.probs = new float[2][nStage1Markers][stateProbs.maxStates()];
this.unphTargGT = fpd.targGT();
- this.refGT = fpd.refGT();
+ this.refGT = fpd.restrictedRefGT();
this.nTargHaps = fpd.targGT().nHaps();
this.stage2Haps = stage2Haps;
this.stage1To2 = fpd.stage1To2();
=====================================
vcf/MarkerMap.java
=====================================
@@ -97,8 +97,10 @@ public class MarkerMap {
String s = "Window has only one position: CHROM=" + a.chrom() + " POS=" + a.pos();
throw new IllegalArgumentException(s);
}
- return Math.abs(genMap.genPos(b)-genMap.genPos(a))
+ double meanSingleBaseDist = Math.abs(genMap.genPos(b) - genMap.genPos(a))
/ Math.abs(b.pos()-a.pos());
+ // require meanSingleBaseDist to be >= 0.01 * mean human single base genetic distance
+ return Math.max(meanSingleBaseDist, 1e-8);
}
private MarkerMap(double[] gPos) {
=====================================
vcf/RefTargSlidingWindow.java
=====================================
@@ -18,6 +18,7 @@
*/
package vcf;
+import beagleutil.ChromIds;
import blbutil.Const;
import blbutil.FileIt;
import blbutil.Filter;
@@ -90,7 +91,8 @@ public class RefTargSlidingWindow implements SlidingWindow {
private static SampleFileIt<GTRec> targIt(Par par, Filter<String> sFilter,
Filter<Marker> mFilter) {
- FileIt<String> it = InputIt.fromGzipFile(par.gt());
+ int nBufferedBlocks = par.nthreads() << 3;
+ FileIt<String> it = InputIt.fromBGZipFile(par.gt(), nBufferedBlocks);
SampleFileIt<GTRec> targIt = VcfIt.create(it, sFilter, mFilter,
VcfIt.TO_LOWMEM_GT_REC);
if (par.chromInt() != null) {
@@ -100,7 +102,7 @@ public class RefTargSlidingWindow implements SlidingWindow {
}
private static SampleFileIt<RefGTRec> refIt(Par par,
- Filter<String> sFilter, Filter<Marker> mFilter) {
+ Filter<String> sampleFilter, Filter<Marker> markerFilter) {
SampleFileIt<RefGTRec> refIt;
String filename = par.ref().toString();
if (filename.endsWith(".bref")) {
@@ -109,17 +111,20 @@ public class RefTargSlidingWindow implements SlidingWindow {
Utilities.exit(s);
}
if (filename.endsWith(".bref3")) {
- refIt = new Bref3It(par.ref(), mFilter);
+ refIt = new Bref3It(par.ref(), markerFilter);
} else {
if (filename.endsWith(".vcf") == false
- && filename.endsWith(".vcf.gz") == false) {
+ && filename.endsWith(".vcf.gz") == false
+ && filename.endsWith(".vcf.bgz") == false) {
System.err.println(Const.nl
- + "WARNING: unrecognized reference file type "
- + "(expected \".bref3\", \".vcf\", or \".vcf.gz\")"
+ + "ERROR: unrecognized reference filename extension: "
+ + Const.nl
+ + " Expected \".bref3\", \".vcf\", \".vcf.gz\", or \".vcf.bgz\""
+ Const.nl);
}
- FileIt<String> it = InputIt.fromGzipFile(par.ref());
- refIt = RefIt.create(it, sFilter, mFilter);
+ int nBufferedBlocks = par.nthreads() << 3;
+ FileIt<String> it = InputIt.fromBGZipFile(par.ref(), nBufferedBlocks);
+ refIt = RefIt.create(it, sampleFilter, markerFilter);
}
if (par.chromInt() != null) {
refIt = new IntervalVcfIt<>(refIt, par.chromInt());
@@ -286,7 +291,6 @@ public class RefTargSlidingWindow implements SlidingWindow {
nextRefRec = refIt.hasNext() ? refIt.next() : null;
}
nextTargRec = targIt.hasNext() ? targIt.next() : null;
-
}
if (impute) {
while (nextRefRec!=null
@@ -297,7 +301,7 @@ public class RefTargSlidingWindow implements SlidingWindow {
nextRefRec = refIt.hasNext() ? refIt.next() : null;
}
}
- return window(refOverlapEnd, windowIndex);
+ return window(refOverlapEnd, windowIndex, chromIndex, endPos);
}
private void resetLists() {
@@ -312,27 +316,48 @@ public class RefTargSlidingWindow implements SlidingWindow {
inTargOverlap.clear();
}
- private Window window(int refOverlapEnd, int windowIndex) {
- if (refRecs.isEmpty()) {
- String s = Const.nl
- + Const.nl + "Marker window contains no reference markers. Do the reference and"
- + Const.nl + "target VCF files contain the same chromosomes in the same order?"
- + Const.nl;
- throw new IllegalArgumentException(s);
+ private Window window(int refOverlapEnd, int windowIndex, int chromIndex,
+ int endPos) {
+ if (targRecs.isEmpty() || refRecs.isEmpty()) {
+ throw new IllegalArgumentException(
+ emptyWindowErrorMessage(chromIndex, endPos));
}
RefGT refGT = new RefGT(refRecs.toArray(new RefGTRec[0]));
BasicGT targGT = new BasicGT(targRecs.toArray(new GTRec[0]));
boolean lastWindow = (nextTargRec==null || nextRefRec==null);
- MarkerIndices markerIndices = markerIndices(refGT, lastWindow, refOverlapEnd);
+ MarkerIndices markerIndices = markerIndices(refGT, lastWindow,
+ refOverlapEnd, endPos);
return new Window(genMap, windowIndex, lastWindow,
markerIndices, refGT, targGT);
}
+ private String emptyWindowErrorMessage(int chromIndex, int endPos) {
+ assert refRecs.isEmpty() || targRecs.isEmpty();
+ if (refRecs.isEmpty()) {
+ return "The window ending at "
+ + ChromIds.instance().id(chromIndex) + ":" + endPos
+ + Const.nl + "contains no reference markers"
+ + Const.nl + "Do the reference and target VCF files contain the same"
+ + Const.nl + "chromosomes in the same order?"
+ + Const.nl;
+ }
+ else {
+ assert targRecs.isEmpty();
+ return "The reference and target VCF files contain no markers in common in the window: "
+ + Const.nl + ChromIds.instance().id(chromIndex)
+ + ":" + refRecs.get(0).marker().pos()
+ + "-" + endPos
+ + Const.nl + "Do both VCF files share any markers in this window?"
+ + Const.nl + "Do both VCF files contain the same chromosomes in the same order?"
+ + Const.nl;
+ }
+ }
+
private MarkerIndices markerIndices(RefGT refGT, boolean lastWindow,
- int refOverlapEnd) {
+ int refOverlapEnd, int endPos) {
boolean chromEnd = lastWindow
|| (refRecs.get(0).marker().chromIndex() != nextRefRec.marker().chromIndex());
- int refOverlapStart = overlapStart(refGT, chromEnd, overlapCM);
+ int refOverlapStart = overlapStart(refGT, chromEnd, overlapCM, endPos);
boolean[] inTarget = new boolean[inTarg.size()];
for (int j=0; j<inTarget.length; ++j) {
inTarget[j] = inTarg.get(j);
@@ -340,15 +365,16 @@ public class RefTargSlidingWindow implements SlidingWindow {
return new MarkerIndices(inTarget, refOverlapEnd, refOverlapStart);
}
- private int overlapStart(RefGT refGT, boolean chromEnd, float overlapCM) {
+ private int overlapStart(RefGT refGT, boolean chromEnd, float overlapCM,
+ int endPos) {
if (chromEnd) {
return refGT.nMarkers();
} else {
int nMarkersM1 = refGT.nMarkers() - 1;
- Marker lastMarker = refGT.marker(nMarkersM1);
- double endGenPos = genMap.genPos(lastMarker);
+ int chromIndex = refGT.marker(nMarkersM1).chromIndex();
+ double endGenPos = genMap.genPos(chromIndex, endPos-1);
double startGenPos = endGenPos - overlapCM;
- int key = genMap.basePos(lastMarker.chromIndex(), startGenPos);
+ int key = genMap.basePos(chromIndex, startGenPos);
int low = 0;
int high = nMarkersM1;
while (low <= high) {
=====================================
vcf/Samples.java
=====================================
@@ -18,7 +18,6 @@
*/
package vcf;
-import beagleutil.SampleIds;
import java.util.Arrays;
/**
@@ -30,49 +29,50 @@ import java.util.Arrays;
*/
public final class Samples {
- private static final SampleIds sampleIds = SampleIds.instance();
- private final int[] idIndices;
+ private final String[] ids;
private final boolean[] isDiploid;
- /**
- * Constructs a new instance of {@code Samples} corresponding to
- * the specified list of diploid sample identifier indices.
- * @param idIndices an array of sample identifier indices
+ /**
+ * Constructs a new {@code Samples} instance corresponding to the
+ * specified list of sample identifiers. A warning is printed to standard
+ * error if any string occurs more than once in the {@code ids} array.
+ * @param ids an array of sample identifiers
* @param isDiploid a boolean array whose {@code k}-th value is {@code true}
* if the {@code k}-th sample is diploid, and is {@code false} if the
* {@code k}-th sample is haploid
- * @throws IllegalArgumentException if
- * {@code idIndices.length != isDiploid.length}
- * @throws IllegalArgumentException if the specified {@code idIndices} array
- * has two or more elements that are equal
- * @throws IndexOutOfBoundsException if any element of the specified
- * {@code idIndices} array is negative or greater than or equal to
- * {@code beagleutil.SampleIds.instance().size()}
- * @throws NullPointerException if
- * {@code idIndices == null || isDiploid == null}
+ *
+ * @throws IllegalArgumentException if {@code ids.length != isDiploid.length}
+ * @throws IllegalArgumentException if there exists {@code j} such that
+ * {@code ((0 <= j) && j < ids.length) && (ids[j].length()==0)}
+ * @throws NullPointerException if {@code ids == null || isDiploid == null}
+ * @throws NullPointerException if there exists {@code j} such that
+ * {@code ((0 <= j) && j < ids.length) && (ids[j]==null))}
*/
- public Samples(int[] idIndices, boolean[] isDiploid) {
- if (idIndices.length!=isDiploid.length) {
+ public Samples(String[] ids, boolean[] isDiploid) {
+ if (ids.length!=isDiploid.length) {
throw new IllegalArgumentException(String.valueOf(isDiploid));
}
- checkForDuplicates(idIndices);
- this.idIndices = idIndices.clone();
+ checkForNullsAndDuplicates(ids);
+ this.ids = ids.clone();
this.isDiploid = isDiploid.clone();
}
- private static void checkForDuplicates(int[] idIndices) {
- int[] copy = Arrays.stream(idIndices).parallel().sorted().toArray();
- if (copy[0]<0) {
- throw new IllegalArgumentException(String.valueOf(copy[0]));
+ private static void checkForNullsAndDuplicates(String[] ids) {
+ String[] sortedCopy = Arrays.stream(ids)
+ .parallel()
+ .sorted()
+ .toArray(String[]::new);
+ if (sortedCopy.length>0 && sortedCopy[0].length()==0) {
+ throw new IllegalArgumentException("Empty string identifier");
}
- for (int j=1; j<copy.length; ++j) {
- if (copy[j-1]==copy[j]) {
- throw new IllegalArgumentException(String.valueOf(copy[j]));
+ for (int j=1; j<sortedCopy.length; ++j) {
+ if (sortedCopy[j].length()==0) {
+ throw new IllegalArgumentException("Empty string identifier");
+ }
+ if (sortedCopy[j].equals(sortedCopy[j-1])) {
+ System.err.println("Warning: duplicate sample identifier: "
+ + sortedCopy[j]);
}
- }
- int last=idIndices.length-1;
- if (copy[last]>=sampleIds.size()) {
- throw new IllegalArgumentException(String.valueOf(copy[last]));
}
}
@@ -90,51 +90,14 @@ public final class Samples {
public static Samples combine(Samples first, Samples second) {
int n1 = first.size();
int n2 = second.size();
- int n = n1+n2;
- int[] idIndices = new int[n];
+ int n = n1 + n2;
+ String[] ids = new String[n];
boolean[] isDiploid = new boolean[n];
- System.arraycopy(first.idIndices, 0, idIndices, 0, n1);
- System.arraycopy(second.idIndices, 0, idIndices, n1, n2);
+ System.arraycopy(first.ids, 0, ids, 0, n1);
+ System.arraycopy(second.ids, 0, ids, n1, n2);
System.arraycopy(first.isDiploid, 0, isDiploid, 0, n1);
System.arraycopy(second.isDiploid, 0, isDiploid, n1, n2);
- return new Samples(idIndices, isDiploid);
- }
-
- /**
- * Returns an array mapping sample identifier indices to sample indices.
- * Indices for sample identifiers not present in this list of samples
- * are mapped to {@code -1}.
- * @return an array mapping sample identifier indices to sample indices
- */
- public int[] idIndexToIndex() {
- int[] idIndexToIndex = new int[sampleIds.size()];
- Arrays.fill(idIndexToIndex, -1);
- for (int j=0; j<idIndices.length; ++j) {
- int idIndex = idIndices[j];
- assert idIndexToIndex[idIndex] == -1; // no duplicate sample IDs
- idIndexToIndex[idIndex] = j;
- }
- return idIndexToIndex;
- }
-
- /**
- * Constructs and returns a {@code Samples} instance
- * corresponding to the specified list of sample identifiers.
- * @param ids an array of sample identifiers
- * @param isDiploid a boolean array whose {@code k}-th value is {@code true}
- * if the {@code k}-th sample is diploid, and is {@code false} if the
- * {@code k}-th sample is haploid
- * @return a {@code Samples} instance corresponding to the specified
- * list of sample identifiers
- *
- * @throws IllegalArgumentException if
- * {@code ids.length != isDiploid.length}
- * @throws IllegalArgumentException if the specified array
- * has two or more elements that are equal as strings
- * @throws NullPointerException if {@code ids == null || isDiploid == null}
- */
- public static Samples fromIds(String[] ids, boolean[] isDiploid) {
- return new Samples(sampleIds.getIndices(ids), isDiploid);
+ return new Samples(ids, isDiploid);
}
/**
@@ -144,8 +107,8 @@ public final class Samples {
@Override
public int hashCode() {
int hash = 59;
- hash += 31*Arrays.hashCode(this.isDiploid);
- hash += 31*Arrays.hashCode(this.idIndices);
+ hash += 29*Arrays.hashCode(this.isDiploid);
+ hash += 29*Arrays.hashCode(this.ids);
return hash;
}
@@ -171,20 +134,7 @@ public final class Samples {
if (Arrays.equals(this.isDiploid, other.isDiploid)==false) {
return false;
}
- return Arrays.equals(this.idIndices, other.idIndices);
- }
-
- /**
- * Returns the sample identifier index corresponding to the sample
- * with the specified index in this list of samples.
- * @param index a sample index
- * @return the sample identifier index corresponding to the sample
- * with the specified index in this list of samples
- * @throws IndexOutOfBoundsException if
- * {@code index < 0 || index >= this.size()}
- */
- public int idIndex(int index) {
- return idIndices[index];
+ return Arrays.equals(this.ids, other.ids);
}
/**
@@ -192,7 +142,7 @@ public final class Samples {
* @return the number of samples in this list
*/
public int size() {
- return idIndices.length;
+ return ids.length;
}
/**
@@ -205,7 +155,7 @@ public final class Samples {
* {@code index < 0 || index >= this.size()}
*/
public String id(int index) {
- return sampleIds.id(idIndices[index]);
+ return ids[index];
}
/**
@@ -216,7 +166,7 @@ public final class Samples {
* @return this list of samples as an array of sample identifiers
*/
public String[] ids() {
- return sampleIds.ids(idIndices);
+ return ids.clone();
}
/**
=====================================
vcf/TargSlidingWindow.java
=====================================
@@ -84,7 +84,8 @@ public class TargSlidingWindow implements SlidingWindow {
}
private static SampleFileIt<GTRec> targIt(Par par) {
- FileIt<String> it = InputIt.fromGzipFile(par.gt());
+ int nBufferedBlocks = par.nthreads() << 3;
+ FileIt<String> it = InputIt.fromBGZipFile(par.gt(), nBufferedBlocks);
Filter<String> sFilter = FilterUtil.sampleFilter(par.excludesamples());
Filter<Marker> mFilter = FilterUtil.markerFilter(par.excludemarkers());
SampleFileIt<GTRec> targIt = VcfIt.create(it, sFilter, mFilter,
=====================================
vcf/VcfHeader.java
=====================================
@@ -195,7 +195,7 @@ public final class VcfHeader {
ids[j] = headerFields[SAMPLE_OFFSET + includedIndices[j]];
restrictedIsDiploid[j] = isDiploid[includedIndices[j]];
}
- return Samples.fromIds(ids, restrictedIsDiploid);
+ return new Samples(ids, restrictedIsDiploid);
}
/**
=====================================
vcf/VcfIt.java
=====================================
@@ -20,7 +20,7 @@ package vcf;
import blbutil.FileIt;
import blbutil.Filter;
-import blbutil.SampleFileIt;
+import blbutil.VcfFileIt;
import java.io.File;
import java.util.ArrayDeque;
import java.util.ArrayList;
@@ -46,7 +46,7 @@ import java.util.stream.IntStream;
*
* @author Brian L. Browning {@code <browning at uw.edu>}
*/
-public class VcfIt<E extends GTRec> implements SampleFileIt<E> {
+public class VcfIt<E extends GTRec> implements VcfFileIt<E> {
private static final float DEFAULT_MAX_LR = Float.MAX_VALUE;
@@ -327,6 +327,11 @@ public class VcfIt<E extends GTRec> implements SampleFileIt<E> {
return vcfHeader.samples();
}
+ @Override
+ public VcfHeader vcfHeader() {
+ return vcfHeader;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder(80);
=====================================
vcf/VcfWriter.java
=====================================
@@ -44,6 +44,8 @@ public final class VcfWriter {
+ "estimated REF dose [P(RA) + 2*P(RR)] and true REF dose\">";
private static final String IMP_INFO = "##INFO=<ID=IMP,Number=0,Type=Flag,"
+ "Description=\"Imputed marker\">";
+ private static final String END_INFO = "##INFO=<ID=END,Number=1,Type=Integer,"
+ + "Description=\"End position of the variant described in this record (for use with symbolic alleles)\">";
private static final String GT_FORMAT = "##FORMAT=<ID=GT,Number=1,Type=String,"
+ "Description=\"Genotype\">";
@@ -132,6 +134,7 @@ public final class VcfWriter {
out.println(DR2_INFO);
out.println(IMP_INFO);
}
+ out.println(END_INFO);
out.println(GT_FORMAT);
if (ds) {
out.println(DS_FORMAT);
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/compare/c9568fa92ceda801780b470be0bbdc4126e7e245...914312b3fa341e7d8df05239a711b68733facae5
--
View it on GitLab: https://salsa.debian.org/med-team/beagle/-/compare/c9568fa92ceda801780b470be0bbdc4126e7e245...914312b3fa341e7d8df05239a711b68733facae5
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20241216/8389a3f0/attachment-0001.htm>
More information about the debian-med-commit
mailing list