[med-svn] [htsjdk] 01/05: New upstream version 2.8.1+dfsg
Andreas Tille
tille at debian.org
Sun Dec 11 07:57:13 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository htsjdk.
commit ed6db4b8ee34111af375c725d60b0c5c1f88a9b7
Author: Andreas Tille <tille at debian.org>
Date: Sun Dec 11 08:48:34 2016 +0100
New upstream version 2.8.1+dfsg
---
.../samtools/SAMSequenceDictionaryCodec.java | 114 ++++
.../java/htsjdk/samtools/SAMTextHeaderCodec.java | 38 +-
.../java/htsjdk/samtools/SamFileValidator.java | 39 +-
.../java/htsjdk/samtools/SamInputResource.java | 26 +-
src/main/java/htsjdk/samtools/SamPairUtil.java | 49 +-
src/main/java/htsjdk/samtools/SamReader.java | 24 +
.../java/htsjdk/samtools/SamReaderFactory.java | 44 +-
.../seekablestream/ByteArraySeekableStream.java | 24 +
.../htsjdk/samtools/util/AbstractLocusInfo.java | 146 +++++
...cusIterator.java => AbstractLocusIterator.java} | 407 +++++---------
.../samtools/util/AbstractRecordAndOffset.java | 130 +++++
.../samtools/util/BlockCompressedInputStream.java | 61 +-
.../java/htsjdk/samtools/util/CollectionUtil.java | 10 +-
.../samtools/util/CustomGzipOutputStream.java | 24 +
.../htsjdk/samtools/util/EdgeReadIterator.java | 228 ++++++++
.../samtools/util/EdgingRecordAndOffset.java | 219 ++++++++
src/main/java/htsjdk/samtools/util/IOUtil.java | 21 +-
.../java/htsjdk/samtools/util/IntervalList.java | 49 +-
src/main/java/htsjdk/samtools/util/Log.java | 2 +-
.../htsjdk/samtools/util/SamLocusIterator.java | 613 ++++-----------------
src/main/java/htsjdk/tribble/Feature.java | 4 +-
src/main/java/htsjdk/tribble/SimpleFeature.java | 5 -
.../java/htsjdk/tribble/bed/SimpleBEDFeature.java | 8 -
.../java/htsjdk/tribble/example/CountRecords.java | 4 -
.../htsjdk/tribble/example/ExampleBinaryCodec.java | 2 +-
.../htsjdk/tribble/gelitext/GeliTextFeature.java | 6 -
.../java/htsjdk/tribble/index/IndexFactory.java | 12 +-
.../index/interval/IntervalIndexCreator.java | 4 +-
.../tribble/index/linear/LinearIndexCreator.java | 4 +-
.../tribble/index/tabix/TabixIndexCreator.java | 2 +-
.../htsjdk/variant/variantcontext/CommonInfo.java | 54 +-
.../htsjdk/variant/variantcontext/JEXLMap.java | 115 ++--
.../variantcontext/JexlMissingValueTreatment.java | 39 ++
.../variantcontext/StructuralVariantType.java} | 32 +-
.../variant/variantcontext/VariantContext.java | 117 ++--
.../variantcontext/VariantContextComparator.java | 2 +-
.../variantcontext/VariantContextUtils.java | 35 +-
.../variant/variantcontext/VariantJEXLContext.java | 2 +-
.../variantcontext/writer/BCF2FieldWriter.java | 2 +-
.../variant/variantcontext/writer/BCF2Writer.java | 6 +-
.../writer/SortingVariantContextWriterBase.java | 10 +-
src/main/java/htsjdk/variant/vcf/VCFConstants.java | 5 +-
src/main/java/htsjdk/variant/vcf/VCFEncoder.java | 4 +-
.../java/htsjdk/variant/vcf/VCFFileReader.java | 2 +-
.../htsjdk/variant/vcf/VCFFilterHeaderLine.java | 18 +-
.../samtools/SAMSequenceDictionaryCodecTest.java | 122 ++++
.../java/htsjdk/samtools/SamReaderFactoryTest.java | 38 +-
.../java/htsjdk/samtools/ValidateSamFileTest.java | 108 +++-
.../java/htsjdk/samtools/sra/AbstractSRATest.java | 4 +-
.../samtools/util/AbstractLocusInfoTest.java | 79 +++
.../util/AbstractLocusIteratorTestTemplate.java | 68 +++
.../samtools/util/AbstractRecordAndOffsetTest.java | 63 +++
.../util/BlockCompressedOutputStreamTest.java | 57 +-
.../htsjdk/samtools/util/EdgeReadIteratorTest.java | 402 ++++++++++++++
.../samtools/util/EdgingRecordAndOffsetTest.java | 94 ++++
.../htsjdk/samtools/util/SamLocusIteratorTest.java | 35 +-
src/test/java/htsjdk/tribble/bed/BEDCodecTest.java | 18 +-
.../java/htsjdk/tribble/gelitext/GeliTextTest.java | 2 +-
.../variantcontext/VariantContextTestProvider.java | 2 +-
.../variantcontext/VariantContextUnitTest.java | 195 ++++++-
.../variantcontext/VariantJEXLContextUnitTest.java | 52 +-
.../java/htsjdk/variant/vcf/VCFHeaderUnitTest.java | 4 +-
.../htsjdk/tribble/vcfexample.vcf.truncated.gz | Bin 0 -> 470 bytes
.../htsjdk/tribble/vcfexample.vcf.truncated.hdr.gz | Bin 0 -> 460 bytes
.../htsjdk/variant/structuralvariants.vcf | 22 +
65 files changed, 3012 insertions(+), 1115 deletions(-)
diff --git a/src/main/java/htsjdk/samtools/SAMSequenceDictionaryCodec.java b/src/main/java/htsjdk/samtools/SAMSequenceDictionaryCodec.java
new file mode 100644
index 0000000..e6e3ba5
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/SAMSequenceDictionaryCodec.java
@@ -0,0 +1,114 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.LineReader;
+import java.io.BufferedWriter;
+
+/**
+ * "On the fly" codec SAMSequenceDictionaryCodec.
+ * Encodes each sequence and directly writes it to the Dictionary file.
+ *
+ * To use this class you should provide BufferedWriter to it, and so you should close it as you stop using this class.
+ * You can work with this class as shown below.
+ *
+ * Example of using this class:
+ *
+ * List<SAMSequenceRecord> dict = ...;
+ *
+ * //open BufferedReader and close in try-with-resources
+ * try(BufferedWriter writer = new BufferedWriter(new FileWriter("path/to/file"))) {
+ * SAMSequenceDictionaryCodec codec = new SAMSequenceDictionaryCodec(writer);
+ *
+ * //we have list of sequences, so encode header line and after that encode each sequence
+ * codec.encodeHeaderLine(false);
+ * dict.forEach(codec::encodeSequenceRecord);
+ *}
+ *
+ * or
+ *
+ * SAMSequenceDictionary dict = ...;
+ *
+ * //open BufferedReader and close in try-with-resources
+ * try(BufferedWriter writer = new BufferedWriter(new FileWriter("path/to/file"))) {
+ * SAMSequenceDictionaryCodec codec = new SAMSequenceDictionaryCodec(writer);
+ *
+ * //we have complete {@link SAMSequenceDictionary}, so just encode it.
+ * codec.encode(dict);
+ *}
+ *
+ * @author Pavel_Silin at epam.com, EPAM Systems, Inc. <www.epam.com>
+ */
+public class SAMSequenceDictionaryCodec {
+
+ private static final SAMFileHeader EMPTY_HEADER = new SAMFileHeader();
+
+ private final SAMTextHeaderCodec codec;
+
+ public SAMSequenceDictionaryCodec(final BufferedWriter writer) {
+ codec = new SAMTextHeaderCodec();
+ codec.setmFileHeader(EMPTY_HEADER);
+ codec.setWriter(writer);
+ }
+
+ /**
+ * Write {@link SAMSequenceRecord}.
+ * @param sequenceRecord object to be converted to text.
+ */
+ public void encodeSequenceRecord(final SAMSequenceRecord sequenceRecord) {
+ codec.encodeSequenceRecord(sequenceRecord);
+ }
+
+ /**
+ * Write Header line.
+ * @param keepExistingVersionNumber boolean flag to keep existing version number.
+ */
+ public void encodeHeaderLine(final boolean keepExistingVersionNumber) {
+ codec.encodeHeaderLine(keepExistingVersionNumber);
+ }
+
+ /**
+ * Reads text SAM header and converts to a SAMSequenceDictionary object.
+ * @param reader Where to get header text from.
+ * @param source Name of the input file, for error messages. May be null.
+ * @return complete SAMSequenceDictionary object.
+ */
+ public SAMSequenceDictionary decode(final LineReader reader, final String source) {
+ return codec.decode(reader, source).getSequenceDictionary();
+ }
+
+ /**
+ * Convert {@link SAMSequenceDictionary} from in-memory representation to text representation.
+ * @param dictionary object to be converted to text.
+ */
+ public void encode(final SAMSequenceDictionary dictionary) {
+ codec.encodeHeaderLine(false);
+ dictionary.getSequences().forEach(this::encodeSequenceRecord);
+ }
+
+ public void setValidationStringency(final ValidationStringency validationStringency) {
+ codec.setValidationStringency(validationStringency);
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java
index 491bf9b..fb4b02a 100644
--- a/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java
+++ b/src/main/java/htsjdk/samtools/SAMTextHeaderCodec.java
@@ -70,6 +70,14 @@ public class SAMTextHeaderCodec {
public static final String COMMENT_PREFIX = HEADER_LINE_START + HeaderRecordType.CO.name() + FIELD_SEPARATOR;
+ void setWriter(final BufferedWriter writer) {
+ this.writer = writer;
+ }
+
+ void setmFileHeader(final SAMFileHeader header) {
+ this.mFileHeader = header;
+ }
+
/**
* Reads text SAM header and converts to a SAMFileHeader object.
* @param reader Where to get header text from.
@@ -80,8 +88,8 @@ public class SAMTextHeaderCodec {
mFileHeader = new SAMFileHeader();
mReader = reader;
mSource = source;
- sequences = new ArrayList<SAMSequenceRecord>();
- readGroups = new ArrayList<SAMReadGroupRecord>();
+ sequences = new ArrayList<>();
+ readGroups = new ArrayList<>();
while (advanceLine() != null) {
final ParsedHeaderLine parsedHeaderLine = new ParsedHeaderLine(mCurrentLine);
@@ -387,6 +395,30 @@ public class SAMTextHeaderCodec {
}
}
+ /**
+ * Encode {@link SAMSequenceRecord}.
+ * Designed for using in {@link SAMSequenceDictionaryCodec}, allows to implement recording on the fly.
+ * @throws IllegalStateException, if writer is null.
+ */
+ void encodeSequenceRecord(final SAMSequenceRecord sequenceRecord) {
+ if (writer == null) {
+ throw new IllegalStateException("writer couldn't be null");
+ }
+ writeSQLine(sequenceRecord);
+ }
+
+ /**
+ * Encode HD line.
+ * Designed for using in {@link SAMSequenceDictionaryCodec}, allows to implement recording on the fly.
+ * @throws IllegalStateException, if writer is null.
+ */
+ void encodeHeaderLine(final boolean keepExistingVersionNumber) {
+ if (writer == null) {
+ throw new IllegalStateException("writer couldn't be null");
+ }
+ writeHDLine(keepExistingVersionNumber);
+ }
+
private void println(final String s) {
try {
writer.append(s);
@@ -438,7 +470,7 @@ public class SAMTextHeaderCodec {
}
private void writeSQLine(final SAMSequenceRecord sequenceRecord) {
- final int numAttributes =sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0;
+ final int numAttributes = sequenceRecord.getAttributes() != null ? sequenceRecord.getAttributes().size() : 0;
final String[] fields = new String[3 + numAttributes];
fields[0] = HEADER_LINE_START + HeaderRecordType.SQ;
fields[1] = SAMSequenceRecord.SEQUENCE_NAME_TAG + TAG_KEY_VALUE_SEPARATOR + sequenceRecord.getSequenceName();
diff --git a/src/main/java/htsjdk/samtools/SamFileValidator.java b/src/main/java/htsjdk/samtools/SamFileValidator.java
index cf18a7f..e40bfe9 100644
--- a/src/main/java/htsjdk/samtools/SamFileValidator.java
+++ b/src/main/java/htsjdk/samtools/SamFileValidator.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2009-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -81,25 +81,40 @@ import java.util.Set;
* @see SAMRecord#isValid()
*/
public class SamFileValidator {
- private Histogram<Type> errorsByType = new Histogram<Type>();
+
+ private final static Log log = Log.getInstance(SamFileValidator.class);
+
private final PrintWriter out;
+ private Histogram<Type> errorsByType;
private PairEndInfoMap pairEndInfoByName;
- private ReferenceSequenceFileWalker refFileWalker = null;
- private boolean verbose = false;
- private int maxVerboseOutput = 100;
+ private ReferenceSequenceFileWalker refFileWalker;
+ private boolean verbose;
+ private int maxVerboseOutput;
private SAMSortOrderChecker orderChecker;
- private Set<Type> errorsToIgnore = EnumSet.noneOf(Type.class);
- private boolean ignoreWarnings = false;
- private boolean bisulfiteSequenced = false;
- private IndexValidationStringency indexValidationStringency = IndexValidationStringency.NONE;
- private boolean sequenceDictionaryEmptyAndNoWarningEmitted = false;
- private final int maxTempFiles;
+ private Set<Type> errorsToIgnore;
+ private boolean ignoreWarnings;
+ private boolean bisulfiteSequenced;
+ private IndexValidationStringency indexValidationStringency;
+ private boolean sequenceDictionaryEmptyAndNoWarningEmitted;
- private final static Log log = Log.getInstance(SamFileValidator.class);
+ private final int maxTempFiles;
public SamFileValidator(final PrintWriter out, final int maxTempFiles) {
this.out = out;
this.maxTempFiles = maxTempFiles;
+ this.errorsByType = new Histogram<>();
+ this.refFileWalker = null;
+ this.maxVerboseOutput = 100;
+ this.indexValidationStringency = IndexValidationStringency.NONE;
+ this.errorsToIgnore = EnumSet.noneOf(Type.class);
+ this.verbose = false;
+ this.ignoreWarnings = false;
+ this.bisulfiteSequenced = false;
+ this.sequenceDictionaryEmptyAndNoWarningEmitted = false;
+ }
+
+ Histogram<Type> getErrorsByType() {
+ return errorsByType;
}
/**
diff --git a/src/main/java/htsjdk/samtools/SamInputResource.java b/src/main/java/htsjdk/samtools/SamInputResource.java
index 39d679d..f25d97b 100644
--- a/src/main/java/htsjdk/samtools/SamInputResource.java
+++ b/src/main/java/htsjdk/samtools/SamInputResource.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package htsjdk.samtools;
import htsjdk.samtools.seekablestream.SeekableFileStream;
@@ -470,4 +494,4 @@ class SRAInputResource extends InputResource {
public SRAAccession asSRAAccession() {
return accession;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/htsjdk/samtools/SamPairUtil.java b/src/main/java/htsjdk/samtools/SamPairUtil.java
index 01a59cb..ee1707b 100644
--- a/src/main/java/htsjdk/samtools/SamPairUtil.java
+++ b/src/main/java/htsjdk/samtools/SamPairUtil.java
@@ -182,6 +182,15 @@ public class SamPairUtil {
}
/**
+ * Write the mate info for two SAMRecords. This will always clear/remove any mate cigar tag that is present.
+ * @param rec1 the first SAM record
+ * @param rec2 the second SAM record
+ */
+ public static void setMateInfo(final SAMRecord rec1, final SAMRecord rec2) {
+ setMateInfo(rec1, rec2, false);
+ }
+
+ /**
* Write the mate info for two SAMRecords
* @param rec1 the first SAM record. Must have a non-null SAMFileHeader.
* @param rec2 the second SAM record. Must have a non-null SAMFileHeader.
@@ -270,6 +279,7 @@ public class SamPairUtil {
* @param rec2 the second SAM record
* @param header the SAM file header
* @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
+ * @deprecated use {@link #setMateInfo(SAMRecord, SAMRecord, boolean)} instead
*/
@Deprecated
public static void setMateInfo(final SAMRecord rec1, final SAMRecord rec2, final SAMFileHeader header, final boolean setMateCigar) {
@@ -281,9 +291,11 @@ public class SamPairUtil {
* @param rec1 the first SAM record
* @param rec2 the second SAM record
* @param header the SAM file header
+ * @deprecated use {@link #setMateInfo(SAMRecord, SAMRecord)} instead
*/
+ @Deprecated
public static void setMateInfo(final SAMRecord rec1, final SAMRecord rec2, final SAMFileHeader header) {
- setMateInfo(rec1, rec2, false);
+ setMateInfo(rec1, rec2);
}
/**
@@ -322,26 +334,43 @@ public class SamPairUtil {
/**
* This method will clear any mate cigar already present.
+ * @deprecated use {@link #setProperPairAndMateInfo(SAMRecord, SAMRecord, List)} instead
*/
+ @Deprecated
public static void setProperPairAndMateInfo(final SAMRecord rec1, final SAMRecord rec2,
final SAMFileHeader header,
- final List<PairOrientation> exepectedOrientations) {
- setProperPairAndMateInfo(rec1, rec2, header, exepectedOrientations, false);
+ final List<PairOrientation> expectedOrientations) {
+ setProperPairAndMateInfo(rec1, rec2, expectedOrientations);
}
/**
- * @param rec1
- * @param rec2
- * @param header
- * @param exepectedOrientations
* @param addMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
+ * @deprecated use {@link #setProperPairAndMateInfo(SAMRecord, SAMRecord, List, boolean)}
*/
+ @Deprecated
public static void setProperPairAndMateInfo(final SAMRecord rec1, final SAMRecord rec2,
final SAMFileHeader header,
- final List<PairOrientation> exepectedOrientations,
+ final List<PairOrientation> expectedOrientations,
+ final boolean addMateCigar) {
+ setProperPairAndMateInfo(rec1, rec2, expectedOrientations, addMateCigar);
+ }
+
+ /**
+ * This method will clear any mate cigar already present.
+ */
+ public static void setProperPairAndMateInfo(final SAMRecord rec1, final SAMRecord rec2,
+ final List<PairOrientation> expectedOrientations) {
+ setProperPairAndMateInfo(rec1, rec2, expectedOrientations, false);
+ }
+
+ /**
+ * @param addMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
+ */
+ public static void setProperPairAndMateInfo(final SAMRecord rec1, final SAMRecord rec2,
+ final List<PairOrientation> expectedOrientations,
final boolean addMateCigar) {
- setMateInfo(rec1, rec2, header, addMateCigar);
- setProperPairFlags(rec1, rec2, exepectedOrientations);
+ setMateInfo(rec1, rec2, addMateCigar);
+ setProperPairFlags(rec1, rec2, expectedOrientations);
}
public static void setProperPairFlags(final SAMRecord rec1, final SAMRecord rec2, final List<PairOrientation> expectedOrientations) {
diff --git a/src/main/java/htsjdk/samtools/SamReader.java b/src/main/java/htsjdk/samtools/SamReader.java
index 6bd6c21..2f1b2f9 100644
--- a/src/main/java/htsjdk/samtools/SamReader.java
+++ b/src/main/java/htsjdk/samtools/SamReader.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
diff --git a/src/main/java/htsjdk/samtools/SamReaderFactory.java b/src/main/java/htsjdk/samtools/SamReaderFactory.java
index 40f7113..8769f48 100644
--- a/src/main/java/htsjdk/samtools/SamReaderFactory.java
+++ b/src/main/java/htsjdk/samtools/SamReaderFactory.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package htsjdk.samtools;
import htsjdk.samtools.cram.ref.CRAMReferenceSource;
@@ -283,11 +307,27 @@ public abstract class SamReaderFactory {
Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE)
);
File sourceFile = data.asFile();
+ // calling asFile is safe even if indexMaybe is a Google Cloud Storage bucket
+ // (in that case we just get null)
final File indexFile = indexMaybe == null ? null : indexMaybe.asFile();
if (SamStreams.isBAMFile(bufferedStream)) {
if (sourceFile == null || !sourceFile.isFile()) {
- // Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo
- primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory);
+ // check whether we can seek
+ final SeekableStream indexSeekable = indexMaybe == null ? null : indexMaybe.asUnbufferedSeekableStream();
+ // do not close bufferedStream, it's the same stream we're getting here.
+ SeekableStream sourceSeekable = data.asUnbufferedSeekableStream();
+ if (null == sourceSeekable || null == indexSeekable) {
+ // not seekable.
+ // it's OK that we consumed a bit of the stream already, this ctor expects it.
+ primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory);
+ } else {
+ // seekable.
+ // need to return to the beginning because it's the same stream we used earlier
+ // and read a bit from, and that form of the ctor expects the stream to start at 0.
+ sourceSeekable.seek(0);
+ primitiveSamReader = new BAMFileReader(
+ sourceSeekable, indexSeekable, false, asynchronousIO, validationStringency, this.samRecordFactory);
+ }
} else {
bufferedStream.close();
primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory);
diff --git a/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java b/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java
index a0ebaaa..4f8c322 100644
--- a/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java
+++ b/src/main/java/htsjdk/samtools/seekablestream/ByteArraySeekableStream.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package htsjdk.samtools.seekablestream;
import htsjdk.samtools.seekablestream.SeekableStream;
diff --git a/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java b/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java
new file mode 100644
index 0000000..4e02007
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/AbstractLocusInfo.java
@@ -0,0 +1,146 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMSequenceRecord;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * The unit of iteration for AbstractLocusIterator.
+ * Holds information about the locus (the SAMSequenceRecord and 1-based position on the reference),
+ * plus list of AbstractRecordAndOffset objects,
+ * If <code>RecordAndOffset</code> class is used, one object represents one aligned read that overlaps the locus.
+ * If <code>TypedRecordAndOffset</code> class is used, one object represents one aligned read,
+ * that starts or ends at the locus.
+ *
+ * @author Darina_Nikolaeva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ *
+ */
+public class AbstractLocusInfo<E extends AbstractRecordAndOffset> implements Locus {
+ /**
+ * Reference sequence, to which the reads are aligned.
+ **/
+ private final SAMSequenceRecord referenceSequence;
+ /**
+ * Position in the sequence, to which the reads are aligned.
+ **/
+ private final int position;
+
+ /**
+ * Initial size for the list of <code>AbstractRecordAndOffset</code> objects
+ **/
+ private final static int INITIAL_LIST_SIZE = 100;
+
+ /**
+ * List of aligned to current position reads
+ **/
+ private final List<E> recordAndOffsets = new ArrayList<>(INITIAL_LIST_SIZE);
+
+ /**
+ * @param referenceSequence reference sequence to which the reads are aligned
+ * @param position position in the sequence to which the reads are aligned
+ */
+ public AbstractLocusInfo(final SAMSequenceRecord referenceSequence, final int position) {
+ this.referenceSequence = referenceSequence;
+ this.position = position;
+ }
+
+ /**
+ * Accumulates info for one read aligned to the locus. Method doesn't check, that <code>recordAndOffset</code>
+ * is really aligned to current reference position, so it must have valid reference sequence and
+ * position or further processing can go wrong.
+ *
+ * @param recordAndOffset object to add to current locus
+ */
+ public void add(E recordAndOffset) {
+ recordAndOffsets.add(recordAndOffset);
+ }
+
+ /**
+ * @return the index of reference sequence
+ */
+ public int getSequenceIndex() {
+ return referenceSequence.getSequenceIndex();
+ }
+
+ /**
+ * @return 1-based reference position
+ */
+ public int getPosition() {
+ return position;
+ }
+
+ /**
+ * @deprecated since name of the method can be confusing, new implementation should be used
+ * {@code getRecordAndOffsets()}
+ * @return unmodifiable list of aligned to the reference position <code>recordsAndOffsets</code>
+ */
+ @Deprecated
+ public List<E> getRecordAndPositions() {
+ return Collections.unmodifiableList(recordAndOffsets);
+ }
+
+ /**
+ * @return unmodifiable list of aligned to the reference position <code>recordsAndOffsets</code>
+ */
+ public List<E> getRecordAndOffsets() {
+ return Collections.unmodifiableList(recordAndOffsets);
+ }
+
+ /**
+ * @return the name of reference sequence
+ */
+ public String getSequenceName() {
+ return referenceSequence.getSequenceName();
+ }
+
+ @Override
+ public String toString() {
+ return referenceSequence.getSequenceName() + ":" + position;
+ }
+
+ /**
+ * @return the length of reference sequence
+ */
+ public int getSequenceLength() {
+ return referenceSequence.getSequenceLength();
+ }
+
+ /**
+ * @return the number of records overlapping the position
+ */
+ public int size() {
+ return this.recordAndOffsets.size();
+ }
+
+ /**
+ * @return <code>true</code> if RecordAndOffset list is empty;
+ */
+ public boolean isEmpty() {
+ return getRecordAndOffsets().isEmpty();
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java
similarity index 58%
copy from src/main/java/htsjdk/samtools/util/SamLocusIterator.java
copy to src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java
index 33bcfd3..6ff8e83 100644
--- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java
+++ b/src/main/java/htsjdk/samtools/util/AbstractLocusIterator.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2010 The Broad Institute
+ * Copyright (c) 2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,12 +21,28 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+
package htsjdk.samtools.util;
-import htsjdk.samtools.*;
-import htsjdk.samtools.filter.*;
-import java.util.*;
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.CigarOperator;
+import htsjdk.samtools.SAMException;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.filter.AggregateFilter;
+import htsjdk.samtools.filter.DuplicateReadFilter;
+import htsjdk.samtools.filter.FilteringSamIterator;
+import htsjdk.samtools.filter.SamRecordFilter;
+import htsjdk.samtools.filter.SecondaryOrSupplementaryFilter;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis.
@@ -35,138 +51,56 @@ import java.util.*;
* By default duplicate reads and non-primary alignments are filtered out. Filtering may be changed
* via setSamFilters().
*
- * @author alecw at broadinstitute.org
+ * @author Darina_Nikolaeva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
*/
-public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, CloseableIterator<SamLocusIterator.LocusInfo> {
- private static final Log LOG = Log.getInstance(SamLocusIterator.class);
-
- /**
- * Holds a SAMRecord plus the zero-based offset into that SAMRecord's bases and quality scores that corresponds
- * to the base and quality at the genomic position described the containing LocusInfo.
- */
- public static class RecordAndOffset {
- private final SAMRecord record;
- private final int offset;
- public RecordAndOffset(final SAMRecord record, final int offset) {
- this.offset = offset;
- this.record = record;
- }
-
- /** Zero-based offset into the read corresponding to the current position in LocusInfo */
- public int getOffset() { return offset; }
- public SAMRecord getRecord() { return record; }
- public byte getReadBase() { return record.getReadBases()[offset]; }
- public byte getBaseQuality() { return record.getBaseQualities()[offset]; }
- }
-
- /**
- * The unit of iteration. Holds information about the locus (the SAMSequenceRecord and 1-based position
- * on the reference), plus List of ReadAndOffset objects, one for each read that overlaps the locus;
- * two more List_s_ of ReadAndOffset objects include reads that overlap the locus with insertions and deletions
- * respectively
- */
- public static final class LocusInfo implements Locus {
- private final SAMSequenceRecord referenceSequence;
- private final int position;
- private final List<RecordAndOffset> recordAndOffsets = new ArrayList<RecordAndOffset>(100);
- private List<RecordAndOffset> deletedInRecord = null;
- private List<RecordAndOffset> insertedInRecord = null;
-
- LocusInfo(final SAMSequenceRecord referenceSequence, final int position) {
- this.referenceSequence = referenceSequence;
- this.position = position;
- }
-
- /** Accumulate info for one read at the locus. */
- public void add(final SAMRecord read, final int position) {
- recordAndOffsets.add(new RecordAndOffset(read, position));
- }
-
- /** Accumulate info for one read with a deletion */
- public void addDeleted(final SAMRecord read, int previousPosition) {
- if (deletedInRecord == null) {
- deletedInRecord = new ArrayList<>();
- }
- deletedInRecord.add(new RecordAndOffset(read, previousPosition));
- }
-
- /**
- * Accumulate info for one read with an insertion.
- * For this locus, the reads in the insertion are included also in recordAndOffsets
- */
- public void addInserted(final SAMRecord read, int firstPosition) {
- if (insertedInRecord == null) {
- insertedInRecord = new ArrayList<>();
- }
- insertedInRecord.add(new RecordAndOffset(read, firstPosition));
- }
-
- public int getSequenceIndex() { return referenceSequence.getSequenceIndex(); }
-
- /** @return 1-based reference position */
- public int getPosition() { return position; }
- public List<RecordAndOffset> getRecordAndPositions() { return Collections.unmodifiableList(recordAndOffsets); }
- public String getSequenceName() { return referenceSequence.getSequenceName(); }
- @Override public String toString() { return referenceSequence.getSequenceName() + ":" + position; }
- public int getSequenceLength() {return referenceSequence.getSequenceLength();}
- public List<RecordAndOffset> getDeletedInRecord() {
- return (deletedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(deletedInRecord);
- }
- public List<RecordAndOffset> getInsertedInRecord() {
- return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord);
- }
- /** @return the number of records overlapping the position, with deletions included if they are being tracked. */
- public int size() { return this.recordAndOffsets.size() + ((deletedInRecord == null) ? 0 : deletedInRecord.size()); }
-
- /**
- * @return <code>true</code> if all the RecordAndOffset lists are empty;
- * <code>false</code> if at least one have records
- */
- public boolean isEmpty() {
- return recordAndOffsets.isEmpty() &&
- (deletedInRecord == null || deletedInRecord.isEmpty()) &&
- (insertedInRecord == null || insertedInRecord.isEmpty());
- }
- }
+public abstract class AbstractLocusIterator<T extends AbstractRecordAndOffset, K extends AbstractLocusInfo<T>> implements Iterable<K>, CloseableIterator<K> {
+ static final Log LOG = Log.getInstance(AbstractLocusIterator.class);
private final SamReader samReader;
- private final ReferenceSequenceMask referenceSequenceMask;
+ final private ReferenceSequenceMask referenceSequenceMask;
private PeekableIterator<SAMRecord> samIterator;
private List<SamRecordFilter> samFilters = Arrays.asList(new SecondaryOrSupplementaryFilter(),
new DuplicateReadFilter());
- private final List<Interval> intervals;
+ final List<Interval> intervals;
+
+ /**
+ * If true, do indexed lookup to improve performance. Not relevant if intervalList == null.
+ * It is no longer the case the useIndex==true can make performance worse. It should always perform at least
+ * as well as useIndex==false, and generally will be much faster.
+ */
private final boolean useIndex;
/**
* LocusInfos on this list are ready to be returned by iterator. All reads that overlap
- * the locus have been accumulated before the LocusInfo is moved into this list.
+ * the locus have been accumulated before the AbstractLocusInfo is moved into this list.
*/
- private final ArrayList<LocusInfo> complete = new ArrayList<LocusInfo>(100);
+ private final ArrayList<K> complete = new ArrayList<>(100);
/**
* LocusInfos for which accumulation is in progress. When {@link #accumulateSamRecord(SAMRecord)} is called
* the state of this list is guaranteed to be either:
- * a) Empty, or
- * b) That the element at index 0 corresponds to the same genomic locus as the first aligned base
- * in the read being accumulated
- *
+ * a) Empty, or
+ * b) That the element at index 0 corresponds to the same genomic locus as the first aligned base
+ * in the read being accumulated
+ * <p>
* Before each new read is accumulated the accumulator is examined and:
- * i) any LocusInfos at positions earlier than the read start are moved to {@link #complete}
- * ii) any uncovered positions between the last LocusInfo and the first aligned base of the new read
- * have LocusInfos created and added to {@link #complete} if we are emitting uncovered loci
+ * i) any LocusInfos at positions earlier than the read start are moved to {@link #complete}
+ * ii) any uncovered positions between the last AbstractLocusInfo and the first aligned base of the new read
+ * have LocusInfos created and added to {@link #complete} if we are emitting uncovered loci
*/
- private final ArrayList<LocusInfo> accumulator = new ArrayList<LocusInfo>(100);
+ final ArrayList<K> accumulator = new ArrayList<>(100);
private int qualityScoreCutoff = Integer.MIN_VALUE;
private int mappingQualityScoreCutoff = Integer.MIN_VALUE;
private boolean includeNonPfReads = true;
/**
- * If true, emit a LocusInfo for every locus in the target map, or if no target map,
- * emit a LocusInfo for every locus in the reference sequence.
- * If false, emit a LocusInfo only if a locus has coverage.
+ * If true, emit a AbstractLocusInfo for every locus in the target map, or if no target map,
+ * emit a AbstractLocusInfo for every locus in the reference sequence.
+ * If false, emit a AbstractLocusInfo only if a locus has coverage.
*/
private boolean emitUncoveredLoci = true;
@@ -180,44 +114,38 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
*/
private int maxReadsToAccumulatePerLocus = Integer.MAX_VALUE;
- // Set to true when we have enforced the accumulation limit for the first time
+ /**
+ * Set to true when we have enforced the accumulation limit for the first time
+ */
private boolean enforcedAccumulationLimit = false;
/**
* If true, include indels in the LocusInfo
*/
- private boolean includeIndels = false;
+ protected boolean includeIndels = false;
-
- // When there is a target mask, these members remember the last locus for which a LocusInfo has been
- // returned, so that any uncovered locus in the target mask can be covered by a 0-coverage LocusInfo
+ /**
+ * When there is a target mask, these members remember the last locus for which a AbstractLocusInfo has been
+ * returned, so that any uncovered locus in the target mask can be covered by a 0-coverage AbstractLocusInfo
+ */
private int lastReferenceSequence = 0;
+
+ /**
+ * Last processed locus position in the reference
+ */
private int lastPosition = 0;
- // Set to true when past all aligned reads in input SAM file
+ /**
+ * Set to true when past all aligned reads in input SAM file
+ */
private boolean finishedAlignedReads = false;
- private final LocusComparator<Locus> locusComparator = new LocusComparator<Locus>();
-
+ private final LocusComparator<Locus> locusComparator = new LocusComparator<>();
/**
- * Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use
- * BAM index even if available.
+ * Last processed interval, relevant only if list of intervals is defined.
*/
- public SamLocusIterator(final SamReader samReader) {
- this(samReader, null);
- }
-
- /**
- * Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use
- * BAM index even if available.
- *
- * @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is
- * passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class.
- */
- public SamLocusIterator(final SamReader samReader, final IntervalList intervalList) {
- this(samReader, intervalList, samReader.hasIndex());
- }
+ private int lastInterval = 0;
/**
* Prepare to iterate through the given SAM records, skipping non-primary alignments
@@ -229,12 +157,15 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
* It is no longer the case the useIndex==true can make performance worse. It should always perform at least
* as well as useIndex==false, and generally will be much faster.
*/
- public SamLocusIterator(final SamReader samReader, final IntervalList intervalList, final boolean useIndex) {
+
+ public AbstractLocusIterator(final SamReader samReader, final IntervalList intervalList, final boolean useIndex) {
+ final String className = this.getClass().getSimpleName();
if (samReader.getFileHeader().getSortOrder() == null || samReader.getFileHeader().getSortOrder() == SAMFileHeader.SortOrder.unsorted) {
- LOG.warn("SamLocusIterator constructed with samReader that has SortOrder == unsorted. ", "" +
+
+ LOG.warn(className + " constructed with samReader that has SortOrder == unsorted. ", "" +
"Assuming SAM is coordinate sorted, but exceptions may occur if it is not.");
} else if (samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
- throw new SAMException("SamLocusIterator cannot operate on a SAM file that is not coordinate sorted.");
+ throw new SAMException(className + " cannot operate on a SAM file that is not coordinate sorted.");
}
this.samReader = samReader;
this.useIndex = useIndex;
@@ -247,9 +178,13 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
}
- public Iterator<LocusInfo> iterator() {
+ /**
+ * @return iterator over all/all covered locus position in reference according to <code>emitUncoveredLoci</code>
+ * value.
+ */
+ public Iterator<K> iterator() {
if (samIterator != null) {
- throw new IllegalStateException("Cannot call iterator() more than once on SamLocusIterator");
+ throw new IllegalStateException("Cannot call iterator() more than once on " + this.getClass().getSimpleName());
}
CloseableIterator<SAMRecord> tempIterator;
if (intervals != null) {
@@ -260,10 +195,13 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
if (samFilters != null) {
tempIterator = new FilteringSamIterator(tempIterator, new AggregateFilter(samFilters));
}
- samIterator = new PeekableIterator<SAMRecord>(tempIterator);
+ samIterator = new PeekableIterator<>(tempIterator);
return this;
}
+ /**
+ * Closes inner <code>SamIterator</>.
+ */
public void close() {
this.samIterator.close();
}
@@ -273,10 +211,10 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
/**
- * Returns true if there are more LocusInfo objects that can be returned, due to any of the following reasons:
- * 1) there are more aligned reads in the SAM file
- * 2) there are LocusInfos in some stage of accumulation
- * 3) there are loci in the target mask that have yet to be accumulated (even if there are no reads covering them)
+ * Returns true if there are more AbstractLocusInfo<T> objects that can be returned, due to any of the following reasons:
+ * 1) there are more aligned reads in the SAM file
+ * 2) there are AbstractLocusInfo<T>s in some stage of accumulation
+ * 3) there are loci in the target mask that have yet to be accumulated (even if there are no reads covering them)
*/
public boolean hasNext() {
if (this.samIterator == null) {
@@ -284,7 +222,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
while (complete.isEmpty() && ((!accumulator.isEmpty()) || samHasMore() || hasRemainingMaskBases())) {
- final LocusInfo locusInfo = next();
+ final K locusInfo = next();
if (locusInfo != null) {
complete.add(0, locusInfo);
}
@@ -293,7 +231,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
/**
- * Returns true if there are more bases at which the locus iterator must emit LocusInfos because
+ * @return true if there are more bases at which the locus iterator must emit AbstractLocusInfo<T>s because
* there are loci beyond the last emitted loci which are in the set of loci to be emitted and
* the iterator is setup to emit uncovered loci - so we can guarantee we'll emit those loci.
*/
@@ -312,9 +250,10 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
/**
* hasNext() has been fixed so that if it returns true, next() is now guaranteed not to return null.
+ *
+ * @return information about next locus position in reference sequence
*/
- public LocusInfo next() {
-
+ public K next() {
// if we don't have any completed entries to return, try and make some!
while (complete.isEmpty() && samHasMore()) {
final SAMRecord rec = samIterator.peek();
@@ -345,7 +284,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
// emit everything that is before the start of the current read, because we know no more
// coverage will be accumulated for those loci.
while (!accumulator.isEmpty() && locusComparator.compare(accumulator.get(0), alignmentStart) < 0) {
- final LocusInfo first = accumulator.get(0);
+ final K first = accumulator.get(0);
populateCompleteQueue(alignmentStart);
if (!complete.isEmpty()) {
return complete.remove(0);
@@ -359,7 +298,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
// be the same position as the first base of the read (or insertion if first)
if (!accumulator.isEmpty()) {
if (accumulator.get(0).getSequenceIndex() != rec.getReferenceIndex() ||
- accumulator.get(0).position != start) {
+ accumulator.get(0).getPosition() != start) {
throw new IllegalStateException("accumulator should be empty or aligned with current SAMRecord");
}
}
@@ -401,11 +340,13 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
}
+
/**
* @return true if we have surpassed the maximum accumulation threshold for the first locus in the accumulator, false otherwise
*/
+
private boolean surpassedAccumulationThreshold() {
- final boolean surpassesThreshold = !accumulator.isEmpty() && accumulator.get(0).recordAndOffsets.size() >= maxReadsToAccumulatePerLocus;
+ final boolean surpassesThreshold = !accumulator.isEmpty() && accumulator.get(0).getRecordAndOffsets().size() >= maxReadsToAccumulatePerLocus;
if (surpassesThreshold && !enforcedAccumulationLimit) {
LOG.warn("We have encountered greater than " + maxReadsToAccumulatePerLocus + " reads at position " + accumulator.get(0).toString() + " and will ignore the remaining reads at this position. Note that further warnings will be suppressed.");
enforcedAccumulationLimit = true;
@@ -414,95 +355,37 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
/**
- * Check if cigar start with an insertion, ignoring other operators that do not consume references bases
- * @param cigar the cigar
- * @return <code>true</code> if the first operator to consume reference bases or be an insertion, is an insertion; <code>false</code> otherwise
- */
- private static boolean startWithInsertion(final Cigar cigar) {
- for (final CigarElement element : cigar.getCigarElements()) {
- if (element.getOperator()==CigarOperator.I) return true;
- if (!element.getOperator().consumesReferenceBases()) continue;
- break;
- }
- return false;
- }
-
- /**
* Capture the loci covered by the given SAMRecord in the LocusInfos in the accumulator,
* creating new LocusInfos as needed.
+ *
+ * @param rec record to add to accumulator
*/
- private void accumulateSamRecord(final SAMRecord rec) {
-
- // get the accumulator offset
- int accOffset = getAccumulatorOffset(rec);
-
- final int minQuality = getQualityScoreCutoff();
- final boolean dontCheckQualities = minQuality == 0;
- final byte[] baseQualities = dontCheckQualities ? null : rec.getBaseQualities();
-
- // interpret the CIGAR string and add the base info
- for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) {
- final int readStart = alignmentBlock.getReadStart();
- final int refStart = alignmentBlock.getReferenceStart();
- final int blockLength = alignmentBlock.getLength();
-
- for (int i = 0; i < blockLength; ++i) {
- // 0-based offset into the read of the current base
- final int readOffset = readStart + i - 1;
-
- // if the quality score cutoff is met, accumulate the base info
- if (dontCheckQualities || baseQualities.length == 0 || baseQualities[readOffset] >= minQuality) {
- // 0-based offset from the aligned position of the first base in the read to the aligned position of the current base.
- final int refOffset = refStart + i - accOffset;
- accumulator.get(refOffset).add(rec, readOffset);
- }
- }
- }
- }
+ abstract void accumulateSamRecord(final SAMRecord rec);
+
/**
* Requires that the accumulator for the record is previously fill with
* {@link #accumulateSamRecord(htsjdk.samtools.SAMRecord)}.
* Include in the LocusInfo the indels; the quality threshold does not affect insertions/deletions
*/
- private void accumulateIndels(final SAMRecord rec) {
- // get the cigar elements
- final List<CigarElement> cigar = rec.getCigar().getCigarElements();
- // 0-based offset into the read of the current base
- int readBase = 0;
- // 0-based offset for the reference of the current base
- // the accumulator could have the previous position because an indel is accumulating
- int refBase = rec.getAlignmentStart() - getAccumulatorOffset(rec);
- // iterate over the cigar element
- for (int elementIndex = 0; elementIndex < cigar.size(); elementIndex++) {
- final CigarElement e = cigar.get(elementIndex);
- final CigarOperator operator = e.getOperator();
- if (operator.equals(CigarOperator.I)) {
- System.err.println("");
- // insertions are included in the previous base
- accumulator.get(refBase - 1).addInserted(rec, readBase);
- readBase += e.getLength();
- } else if (operator.equals(CigarOperator.D)) {
- // accumulate for each position that spans the deletion
- for (int i = 0; i < e.getLength(); i++) {
- // the offset is the one for the previous base
- accumulator.get(refBase + i).addDeleted(rec, readBase - 1);
- }
- refBase += e.getLength();
- } else {
- if (operator.consumesReadBases()) readBase += e.getLength();
- if (operator.consumesReferenceBases()) refBase += e.getLength();
- }
- }
- }
+ abstract void accumulateIndels(final SAMRecord rec);
/**
- * Create the next relevant zero-coverage LocusInfo
+ * @param rec aligned SamRecord
+ * @param readOffset offset from start of read
+ * @param length length of aligned block
+ * @param refPosition position in the reference sequence
+ * @return RecordAndOffset
+ */
+ abstract T createRecordAndOffset(SAMRecord rec, int readOffset, int length, int refPosition);
+
+ /**
+ * Create the next relevant zero-coverage AbstractLocusInfo<T>
*
* @param stopBeforeLocus don't go up to this sequence and position
- * @return a zero-coverage LocusInfo, or null if there is none before the stopBefore locus
+ * @return a zero-coverage AbstractLocusInfo<T>, or null if there is none before the stopBefore locus
*/
- private LocusInfo createNextUncoveredLocusInfo(final Locus stopBeforeLocus) {
+ private K createNextUncoveredLocusInfo(final Locus stopBeforeLocus) {
while (lastReferenceSequence <= stopBeforeLocus.getSequenceIndex() &&
lastReferenceSequence <= referenceSequenceMask.getMaxSequenceIndex()) {
@@ -524,21 +407,29 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
lastPosition = 0;
} else if (lastReferenceSequence < stopBeforeLocus.getSequenceIndex() || nextbit < stopBeforeLocus.getPosition()) {
lastPosition = nextbit;
- return new LocusInfo(getReferenceSequence(lastReferenceSequence), lastPosition);
+ return createLocusInfo(getReferenceSequence(lastReferenceSequence), lastPosition);
} else if (nextbit >= stopBeforeLocus.getPosition()) {
return null;
}
}
-
return null;
}
/**
- * Pop the first entry from the LocusInfo accumulator into the complete queue. In addition,
+ * @param referenceSequence processed reference sequence
+ * @param lastPosition last processed reference locus position
+ * @return <code>AbstractLocusInfo<T></code> for the lastPosition
+ */
+ abstract K createLocusInfo(SAMSequenceRecord referenceSequence, int lastPosition);
+
+ /**
+ * Pop the first entry from the AbstractLocusInfo<T> accumulator into the complete queue. In addition,
* check the ReferenceSequenceMask and if there are intervening mask positions between the last popped base and the one
* about to be popped, put those on the complete queue as well.
* Note that a single call to this method may not empty the accumulator completely, or even
- * empty it at all, because it may just put a zero-coverage LocusInfo into the complete queue.
+ * empty it at all, because it may just put a zero-coverage AbstractLocusInfo<T> into the complete queue.
+ *
+ * @param stopBeforeLocus don't go up to this sequence and position
*/
private void populateCompleteQueue(final Locus stopBeforeLocus) {
// Because of gapped alignments, it is possible to create LocusInfo's with no reads associated with them.
@@ -550,14 +441,14 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
if (accumulator.isEmpty()) {
return;
}
- final LocusInfo locusInfo = accumulator.get(0);
+ final K locusInfo = accumulator.get(0);
if (locusComparator.compare(stopBeforeLocus, locusInfo) <= 0) {
return;
}
// If necessary, emit a zero-coverage LocusInfo
if (emitUncoveredLoci) {
- final LocusInfo zeroCoverage = createNextUncoveredLocusInfo(locusInfo);
+ final K zeroCoverage = createNextUncoveredLocusInfo(locusInfo);
if (zeroCoverage != null) {
complete.add(zeroCoverage);
return;
@@ -580,29 +471,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
lastPosition = locusInfo.getPosition();
}
- /**
- * Ensure that the queue is populated and get the accumulator offset for the current record
- */
- private int getAccumulatorOffset(SAMRecord rec) {
- final SAMSequenceRecord ref = getReferenceSequence(rec.getReferenceIndex());
- final int alignmentStart = rec.getAlignmentStart();
- final int alignmentEnd = rec.getAlignmentEnd();
- final int alignmentLength = alignmentEnd - alignmentStart;
- // get the offset for an insertion if we are tracking them
- final int insOffset = (includeIndels && startWithInsertion(rec.getCigar())) ? 1 : 0;
- // if there is an insertion in the first base and it is not tracked in the accumulator, add it
- if (insOffset == 1 && accumulator.isEmpty()) {
- accumulator.add(new LocusInfo(ref, alignmentStart - 1));
- }
-
- // Ensure there are LocusInfos up to and including this position
- for (int i = accumulator.size(); i <= alignmentLength + insOffset; ++i) {
- accumulator.add(new LocusInfo(ref, alignmentStart + i - insOffset));
- }
- return alignmentStart - insOffset;
- }
-
- private SAMSequenceRecord getReferenceSequence(final int referenceSequenceIndex) {
+ protected SAMSequenceRecord getReferenceSequence(final int referenceSequenceIndex) {
return samReader.getFileHeader().getSequence(referenceSequenceIndex);
}
@@ -610,6 +479,21 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
}
+ /**
+ * Check if cigar start with an insertion, ignoring other operators that do not consume references bases
+ *
+ * @param cigar the cigar
+ * @return <code>true</code> if the first operator to consume reference bases or be an insertion, is an insertion; <code>false</code> otherwise
+ */
+ protected static boolean startWithInsertion(final Cigar cigar) {
+ for (final CigarElement element : cigar.getCigarElements()) {
+ if (element.getOperator() == CigarOperator.I) return true;
+ if (!element.getOperator().consumesReferenceBases()) continue;
+ break;
+ }
+ return false;
+ }
+
// --------------------------------------------------------------------------------------------
// Helper methods below this point...
// --------------------------------------------------------------------------------------------
@@ -669,6 +553,15 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
this.maxReadsToAccumulatePerLocus = maxReadsToAccumulatePerLocus;
}
+ protected List<Interval> getIntervals() {
+ return intervals;
+ }
+
+ protected Interval getCurrentInterval() {
+ if (intervals == null) return null;
+ return intervals.get(lastInterval);
+ }
+
public boolean isIncludeIndels() {
return includeIndels;
}
@@ -676,6 +569,4 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
public void setIncludeIndels(final boolean includeIndels) {
this.includeIndels = includeIndels;
}
-
}
-
diff --git a/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java b/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java
new file mode 100644
index 0000000..28b9d34
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/AbstractRecordAndOffset.java
@@ -0,0 +1,130 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMRecord;
+
+/**
+ * Holds a SAMRecord plus the zero-based offset into that SAMRecord's bases and quality scores that corresponds
+ * to the base and quality at the genomic position described the containing AbstractLocusInfo. One object represents
+ * one base for <code>SamLocusIterator.RecordAndOffset</code> implementation or one alignment block of
+ * <code>SAMRecord</code> for <code>TypedRecordAndOffset</code> implementation.
+ *
+ * @author Darina_Nikolaeva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ */
+public class AbstractRecordAndOffset {
+
+ /**
+ * A SAMRecord aligned to reference position
+ */
+ protected final SAMRecord record;
+ /**
+ * Zero-based offset in the read corresponding to the current position in AbstractLocusInfo
+ */
+ protected final int offset;
+
+ /**
+ * @param record inner SAMRecord
+ * @param offset from the start of the read
+ * @param length of alignment block
+ * @param refPos corresponding to read offset reference position
+ */
+ public AbstractRecordAndOffset(final SAMRecord record, final int offset, int length, int refPos) {
+ this(record, offset);
+ }
+
+ /**
+ * @param record inner SAMRecord
+ * @param offset from the start of the read
+ */
+ public AbstractRecordAndOffset(final SAMRecord record, final int offset) {
+ this.offset = offset;
+ this.record = record;
+ }
+
+ /**
+ * @return offset of aligned read base from the start of the read.
+ */
+ public int getOffset() {
+ return offset;
+ }
+
+ /**
+ * @return inner <code>SAMRecord</code> object.
+ */
+ public SAMRecord getRecord() {
+ return record;
+ }
+
+ /**
+ * @return the read base according to <code>offset</code>.
+ */
+ public byte getReadBase() {
+ return record.getReadBases()[offset];
+ }
+
+ /**
+ * @return the length of alignment block represented by the object.
+ */
+ public int getLength() {
+ return 1;
+ }
+
+ /**
+ * @return the position in reference sequence, to which the start of alignment block is aligned.
+ */
+ public int getRefPos() {
+ return -1;
+ }
+
+ /**
+ * @return read name of inner SAMRecord.
+ */
+ public String getReadName() {
+ return record.getReadName();
+ }
+
+ /**
+ * @return array of base qualities of inner SAMRecord.
+ */
+ public byte[] getBaseQualities() {
+ return record.getBaseQualities();
+ }
+
+ /**
+ * @return the base quality according to <code>offset</code>.
+ */
+ public byte getBaseQuality() {
+ return record.getBaseQualities()[offset];
+ }
+
+ protected void validateOffset(int offset, final byte[] array) {
+ if (offset < 0 || offset >= array.length) {
+ throw new IllegalArgumentException("The requested position is not covered by this " + this.getClass().getSimpleName() +
+ " object.");
+ }
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java
index 0261b19..b0ac001 100755
--- a/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java
+++ b/src/main/java/htsjdk/samtools/util/BlockCompressedInputStream.java
@@ -50,6 +50,13 @@ import java.util.Arrays;
* c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format
*/
public class BlockCompressedInputStream extends InputStream implements LocationAware {
+
+ public final static String INCORRECT_HEADER_SIZE_MSG = "Incorrect header size for file: ";
+ public final static String UNEXPECTED_BLOCK_LENGTH_MSG = "Unexpected compressed block length: ";
+ public final static String PREMATURE_END_MSG = "Premature end of file: ";
+ public final static String CANNOT_SEEK_STREAM_MSG = "Cannot seek on stream based file ";
+ public final static String INVALID_FILE_PTR_MSG = "Invalid file pointer: ";
+
private InputStream mStream = null;
private SeekableStream mFile = null;
private byte[] mFileBuffer = null;
@@ -84,8 +91,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
/**
* Use this ctor if you wish to call seek()
*/
- public BlockCompressedInputStream(final File file)
- throws IOException {
+ public BlockCompressedInputStream(final File file) throws IOException {
mFile = new SeekableFileStream(file);
mStream = null;
@@ -121,8 +127,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
* Note that although the next caller can read this many bytes without blocking, the available() method call itself
* may block in order to fill an internal buffer if it has been exhausted.
*/
- public int available()
- throws IOException {
+ public int available() throws IOException {
if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.length) {
readBlock();
}
@@ -143,8 +148,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
/**
* Closes the underlying InputStream or RandomAccessFile
*/
- public void close()
- throws IOException {
+ public void close() throws IOException {
if (mFile != null) {
mFile.close();
mFile = null;
@@ -164,8 +168,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
* @return the next byte of data, or -1 if the end of the stream is reached.
*/
- public int read()
- throws IOException {
+ public int read() throws IOException {
return (available() > 0) ? (mCurrentBlock[mCurrentOffset++] & 0xFF) : -1;
}
@@ -180,8 +183,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
* @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
* the stream has been reached.
*/
- public int read(final byte[] buffer)
- throws IOException {
+ public int read(final byte[] buffer) throws IOException {
return read(buffer, 0, buffer.length);
}
@@ -253,8 +255,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
* @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
* the stream has been reached.
*/
- public int read(final byte[] buffer, int offset, int length)
- throws IOException {
+ public int read(final byte[] buffer, int offset, int length) throws IOException {
final int originalLength = length;
while (length > 0) {
final int available = available();
@@ -280,10 +281,9 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
*
* @param pos virtual file pointer
*/
- public void seek(final long pos)
- throws IOException {
+ public void seek(final long pos) throws IOException {
if (mFile == null) {
- throw new IOException("Cannot seek on stream based file");
+ throw new IOException(CANNOT_SEEK_STREAM_MSG);
}
// Decode virtual file pointer
// Upper 48 bits is the byte offset into the compressed stream of a block.
@@ -302,7 +302,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
}
if (uncompressedOffset > available ||
(uncompressedOffset == available && !eof())) {
- throw new IOException("Invalid file pointer: " + pos);
+ throw new IOException(INVALID_FILE_PTR_MSG + pos + " for " + mFile.getSource());
}
mCurrentOffset = uncompressedOffset;
}
@@ -342,8 +342,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
* @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported().
* @return true if the given file looks like a valid BGZF file.
*/
- public static boolean isValidFile(final InputStream stream)
- throws IOException {
+ public static boolean isValidFile(final InputStream stream) throws IOException {
if (!stream.markSupported()) {
throw new RuntimeException("Cannot test non-buffered stream");
}
@@ -363,8 +362,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
buffer[13] == BlockCompressedStreamConstants.BGZF_ID2);
}
- private void readBlock()
- throws IOException {
+ private void readBlock() throws IOException {
if (mFileBuffer == null) {
mFileBuffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
@@ -378,16 +376,16 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
return;
}
if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) {
- throw new IOException("Premature end of file");
+ throw new IOException(INCORRECT_HEADER_SIZE_MSG + mFile.getSource());
}
final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
- throw new IOException("Unexpected compressed block length: " + blockLength);
+ throw new IOException(UNEXPECTED_BLOCK_LENGTH_MSG + blockLength + " for " + mFile.getSource());
}
final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH;
count = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining);
if (count != remaining) {
- throw new FileTruncatedException("Premature end of file");
+ throw new FileTruncatedException(PREMATURE_END_MSG + mFile.getSource());
}
inflateBlock(mFileBuffer, blockLength);
mCurrentOffset = 0;
@@ -395,8 +393,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
mLastBlockLength = blockLength;
}
- private void inflateBlock(final byte[] compressedBlock, final int compressedLength)
- throws IOException {
+ private void inflateBlock(final byte[] compressedBlock, final int compressedLength) throws IOException {
final int uncompressedLength = unpackInt32(compressedBlock, compressedLength-4);
byte[] buffer = mCurrentBlock;
mCurrentBlock = null;
@@ -404,15 +401,14 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
try {
buffer = new byte[uncompressedLength];
} catch (final NegativeArraySizeException e) {
- throw new RuntimeIOException("BGZF file has invalid uncompressedLength: " + uncompressedLength, e);
+ throw new RuntimeIOException(mFile.getSource() + " has invalid uncompressedLength: " + uncompressedLength, e);
}
}
blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
mCurrentBlock = buffer;
}
- private int readBytes(final byte[] buffer, final int offset, final int length)
- throws IOException {
+ private int readBytes(final byte[] buffer, final int offset, final int length) throws IOException {
if (mFile != null) {
return readBytes(mFile, buffer, offset, length);
} else if (mStream != null) {
@@ -422,8 +418,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
}
}
- private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length)
- throws IOException {
+ private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length) throws IOException {
int bytesRead = 0;
while (bytesRead < length) {
final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
@@ -435,8 +430,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
return bytesRead;
}
- private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
- throws IOException {
+ private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length) throws IOException {
int bytesRead = 0;
while (bytesRead < length) {
final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
@@ -462,8 +456,7 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
- public static FileTermination checkTermination(final File file)
- throws IOException {
+ public static FileTermination checkTermination(final File file) throws IOException {
final long fileSize = file.length();
if (fileSize < BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length) {
return FileTermination.DEFECTIVE;
diff --git a/src/main/java/htsjdk/samtools/util/CollectionUtil.java b/src/main/java/htsjdk/samtools/util/CollectionUtil.java
index a80319b..0354a5b 100755
--- a/src/main/java/htsjdk/samtools/util/CollectionUtil.java
+++ b/src/main/java/htsjdk/samtools/util/CollectionUtil.java
@@ -104,8 +104,10 @@ public class CollectionUtil {
/**
* Partitions a collection into groups based on a characteristics of that group. Partitions are embodied in a map, whose keys are the
* value of that characteristic, and the values are the partition of elements whose characteristic evaluate to that key.
+ *
+ * @deprecated use java8 .stream().collect(Collectors.groupingBy(()-> function)) instead
*/
- @Deprecated //use java8 .stream().collect(Collectors.groupingBy(()-> function)) instead
+ @Deprecated
public static <K, V> Map<K, Collection<V>> partition(final Collection<V> collection, final Partitioner<V, K> p) {
final MultiMap<K, V> partitionToValues = new MultiMap<>();
for (final V entry : collection) {
@@ -113,7 +115,11 @@ public class CollectionUtil {
}
return partitionToValues;
}
- @Deprecated //not needed, use Collectors.groupingBy instead
+
+ /**
+ * @deprecated use Collectors.groupingBy instead
+ */
+ @Deprecated
public static abstract class Partitioner<V, K> {
public abstract K getPartition(final V v);
}
diff --git a/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java b/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java
new file mode 100644
index 0000000..cb3652e
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/CustomGzipOutputStream.java
@@ -0,0 +1,24 @@
+package htsjdk.samtools.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.zip.GZIPOutputStream;
+
+/**
+ * Hacky little class used to allow us to set the compression level on a GZIP output stream which, for some
+ * bizarre reason, is not exposed in the standard API.
+ *
+ * @author Tim Fennell
+ */
+public class CustomGzipOutputStream extends GZIPOutputStream {
+ CustomGzipOutputStream(final OutputStream outputStream, final int bufferSize, final int compressionLevel) throws
+ IOException {
+ super(outputStream, bufferSize);
+ this.def.setLevel(compressionLevel);
+ }
+
+ CustomGzipOutputStream(final OutputStream outputStream, final int compressionLevel) throws IOException {
+ super(outputStream);
+ this.def.setLevel(compressionLevel);
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java b/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java
new file mode 100644
index 0000000..0d779a8
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/EdgeReadIterator.java
@@ -0,0 +1,228 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.AlignmentBlock;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SamReader;
+
+/**
+ * Iterator that traverses a SAM File, accumulating information on a per-locus basis.
+ * Optionally takes a target interval list, in which case the loci returned are the ones covered by
+ * the interval list. If no target interval list, whatever loci are covered by the input reads are returned.
+ * By default duplicate reads and non-primary alignments are filtered out. Filtering may be changed
+ * via setSamFilters(). Difference from SamLocusIterator is that this implementation accumulates data
+ * only about start and end of alignment blocks from reads, not about each aligned base.
+ *
+ * @author Darina_Nikolaeva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ *
+ */
+public class EdgeReadIterator extends AbstractLocusIterator<EdgingRecordAndOffset, AbstractLocusInfo<EdgingRecordAndOffset>> {
+
+ /**
+ * Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use
+ * BAM index even if available.
+ *
+ * @param samReader must be coordinate sorted
+ */
+ public EdgeReadIterator(final SamReader samReader) {
+ this(samReader, null);
+ }
+
+ /**
+ * Prepare to iterate through the given SAM records, skipping non-primary alignments.
+ *
+ * @param samReader must be coordinate sorted
+ * @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is
+ * passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class.
+ */
+ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalList) {
+ this(samReader, intervalList, samReader.hasIndex());
+ }
+
+ /**
+ * Prepare to iterate through the given SAM records, skipping non-primary alignments
+ *
+ * @param samReader must be coordinate sorted
+ * @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is
+ * passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class.
+ * @param useIndex If true, do indexed lookup to improve performance. Not relevant if intervalList == null.
+ * It is no longer the case the useIndex==true can make performance worse. It should always perform at least
+ * as well as useIndex==false, and generally will be much faster.
+ */
+ public EdgeReadIterator(final SamReader samReader, final IntervalList intervalList, final boolean useIndex) {
+ super(samReader, intervalList, useIndex);
+ }
+
+ /**
+ * Capture the loci covered by the given SAMRecord in the AbstractLocusInfos in the accumulator,
+ * creating new AbstractLocusInfos as needed. EdgingRecordAndOffset object are created only for start
+ * and end of each alignment block of SAMRecord.
+ * If list of intervals is defined, start or/and length of alignment block are shifted to match the interval, to
+ * prevent exceeding the interval.
+ * @param rec SAMRecord to process and add to <code>AbstractLocusInfo</code>
+ */
+ @Override
+ void accumulateSamRecord(SAMRecord rec) {
+ // interpret the CIGAR string and add the base info
+ for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) {
+ // 0-based offset into the read of the current base
+ final int readOffset = alignmentBlock.getReadStart() - 1;
+ // 1-based reference position that the current base aligns to
+ final int refPos = alignmentBlock.getReferenceStart();
+
+ // 0-based offset from the aligned position of the first base in the read to the aligned position
+ // of the current base.
+ final int refOffset = refPos - rec.getAlignmentStart();
+ final int refOffsetEnd = refPos - rec.getAlignmentStart() + alignmentBlock.getLength();
+
+
+ // Ensure there are AbstractLocusInfos up to and including this position
+ for (int j = accumulator.size(); j <= refOffsetEnd; ++j) {
+ accumulator.add(createLocusInfo(getReferenceSequence(rec.getReferenceIndex()),
+ rec.getAlignmentStart() + j));
+ }
+
+ /* Let's assume an alignment block starts in some locus.
+ * We put two records to the accumulator. The first one has the "begin" type which corresponds to the locus
+ * where the block starts. The second one has the "end" type which corresponds to the other locus where the block ends.
+ */
+ int refOffsetInterval = refOffset; // corresponds to the beginning of the alignment block
+ int refOffsetEndInterval = refOffsetEnd;
+ int startShift = 0;
+
+ // intersect intervals and alignment block
+ if (getIntervals() != null) {
+ // get the current interval we're processing
+ Interval interval = getCurrentInterval();
+ if (interval != null) {
+ final int intervalEnd = interval.getEnd();
+ final int intervalStart = interval.getStart();
+ // check if an interval and the alignment block overlap
+ if (!CoordMath.overlaps(refPos, refPos + alignmentBlock.getLength(), intervalStart, intervalEnd)) {
+ continue;
+ }
+ // if the alignment block starts out of an interval, shift the starting position
+ if (refPos < intervalStart) {
+ startShift = intervalStart - refPos;
+ refOffsetInterval = refOffsetInterval + startShift;
+ }
+ // if the alignment block ends out of an interval, shift the ending position
+ final int readEnd = refPos + alignmentBlock.getLength();
+ if (refPos + alignmentBlock.getLength() > intervalEnd) {
+ refOffsetEndInterval = refOffsetEndInterval - (readEnd - intervalEnd) + 1;
+ }
+ }
+ }
+ final int length = refOffsetEndInterval - refOffsetInterval;
+ // add the alignment block to the accumulator when it starts and when it ends
+ final EdgingRecordAndOffset recordAndOffset = createRecordAndOffset(rec, readOffset + startShift, length, refPos + startShift);
+ // accumulate start of the alignment block
+ accumulator.get(refOffsetInterval).add(recordAndOffset);
+ final EdgingRecordAndOffset recordAndOffsetEnd = createRecordAndOffset(recordAndOffset);
+ // accumulate end of the alignment block
+ accumulator.get(refOffsetEndInterval).add(recordAndOffsetEnd);
+ }
+ }
+
+ @Override
+ void accumulateIndels(SAMRecord rec) {
+ throw new UnsupportedOperationException("Indels accumulation is not supported for " + getClass().getSimpleName() + ".");
+ }
+
+ /**
+ * Creates a new <code>EdgingRecordAndOffset</code> for given input values
+ *
+ * @param rec aligned SamRecord
+ * @param readOffset offset from start of read
+ * @param length length of alignment block
+ * @param refPos position in the reference sequence
+ * @return created <code>EdgingRecordAndOffset</code>
+ */
+ @Override
+ EdgingRecordAndOffset createRecordAndOffset(SAMRecord rec, int readOffset, int length, int refPos) {
+ return EdgingRecordAndOffset.createBeginRecord(rec, readOffset, length, refPos);
+ }
+
+ EdgingRecordAndOffset createRecordAndOffset(EdgingRecordAndOffset startRecord) {
+ return EdgingRecordAndOffset.createEndRecord(startRecord);
+ }
+
+ /**
+ * @param referenceSequence processed reference sequence
+ * @param lastPosition last processed reference locus position
+ * @return <code>AbstractLocusInfo<T></code> for the lastPosition
+ */
+ @Override
+ AbstractLocusInfo<EdgingRecordAndOffset> createLocusInfo(SAMSequenceRecord referenceSequence, int lastPosition) {
+ return new AbstractLocusInfo<>(referenceSequence, lastPosition);
+ }
+
+ /**
+ * This method isn't supported in current implementation.
+ *
+ * @param maxReadsToAccumulatePerLocus maximum number of <code>RecordAndOffset</code> objects to store for
+ * one loci in reference sequence.
+ */
+ @Override
+ public void setMaxReadsToAccumulatePerLocus(int maxReadsToAccumulatePerLocus) {
+ if (getMaxReadsToAccumulatePerLocus() != 0) {
+ throw new UnsupportedOperationException("Locus cap is not supported for " + getClass().getSimpleName() + ".");
+ }
+ }
+
+ /**
+ * This method isn't supported in current implementation.
+ *
+ * @param qualityScoreCutoff the minimum base quality to include in <code>AbstractLocusInfo</code>.
+ */
+ @Override
+ public void setQualityScoreCutoff(int qualityScoreCutoff) {
+ throw new UnsupportedOperationException("Quality filtering is not supported for " + getClass().getSimpleName() + ".");
+ }
+
+ /**
+ * For correct work of <code>EdgeReadIterator</code> value <code>emitUncoveredLoci</code> must be true.
+ *
+ * @param emitUncoveredLoci if false, iterator will skip uncovered loci in reference sequence, otherwise
+ * empty <code>AbstractLocusInfo</code> will be created for each loci.
+ */
+ @Override
+ public void setEmitUncoveredLoci(boolean emitUncoveredLoci) {
+ if (isEmitUncoveredLoci() != emitUncoveredLoci) {
+ throw new UnsupportedOperationException(getClass().getSimpleName() + " doesn't support work with skipping " +
+ "uncovered bases.");
+ }
+ }
+
+ @Override
+ public void setIncludeIndels(boolean includeIndels) {
+ if (isIncludeIndels() != includeIndels) {
+ throw new UnsupportedOperationException("Indels accumulation is not supported for " + getClass().getSimpleName() + ".");
+ }
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java b/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java
new file mode 100644
index 0000000..b83a169
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/EdgingRecordAndOffset.java
@@ -0,0 +1,219 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMRecord;
+
+import static htsjdk.samtools.util.EdgingRecordAndOffset.Type.BEGIN;
+import static htsjdk.samtools.util.EdgingRecordAndOffset.Type.END;
+
+/**
+ * Holds a SAMRecord plus the zero-based offset into that SAMRecord's bases and quality scores that corresponds
+ * to the base and quality for the start of alignment block at the genomic position described by the AbstractLocusInfo.
+ * This is implementation for EdgeReadIterator, field <code>type</code> added to indicate whether object
+ * represents the start or the end of an alignment block.
+ * <p>
+ * Subclasses StartEdgingRecordAndOffset and EndEdgingRecordAndOffset are used in EdgeReadIterator to
+ * distinguish starting and ending of the alignment block
+ * as for each alignment block two objects of <code>EdgingRecordAndOffset</code> are created with two different types.
+ * The main idea of using EdgeReadIterator is to process alignment block starting from locus where BEGIN type occurs,
+ * aggregate information per locus and keep it until END type occurs, then remove alignment block from consideration.
+ *
+ * @author Darina_Nikolaeva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ */
+public abstract class EdgingRecordAndOffset extends AbstractRecordAndOffset {
+
+ private EdgingRecordAndOffset(SAMRecord record, int offset) {
+ super(record, offset);
+ }
+
+ public abstract EdgingRecordAndOffset getStart();
+
+ public abstract Type getType();
+
+ public abstract byte getBaseQuality(int position);
+
+ public static EdgingRecordAndOffset createBeginRecord(SAMRecord record, int offset, int length, int refPos) {
+ return new StartEdgingRecordAndOffset(record, offset, length, refPos);
+ }
+
+ public static EdgingRecordAndOffset createEndRecord(EdgingRecordAndOffset startRecord) {
+ return new EndEdgingRecordAndOffset(startRecord);
+ }
+
+ /**
+ * Describes the type of <code>TypedRecordAndOffset</code>, whether it represents the start or the end of
+ * an alignment block.
+ */
+ public enum Type {
+ BEGIN, END
+ }
+
+ private static class StartEdgingRecordAndOffset extends EdgingRecordAndOffset {
+ /**
+ * Length of alignment block of the read
+ */
+ private final int length;
+ /**
+ * A reference position to which read offset is aligned.
+ */
+ private final int refPos;
+
+ private int hash = 0;
+
+ /**
+ * @param record inner SAMRecord
+ * @param offset from the start of the read
+ * @param length of alignment block
+ * @param refPos corresponding to read offset reference position
+ */
+ protected StartEdgingRecordAndOffset(SAMRecord record, int offset, int length, int refPos) {
+ super(record, offset);
+ if (length > record.getReadLength()) {
+ throw new IllegalArgumentException("Block length cannot be larger than whole read length");
+ }
+ this.length = length;
+ this.refPos = refPos;
+ }
+
+ /**
+ * @param position in the reference
+ * @return base quality of a read base, corresponding to a given reference position
+ */
+ public byte getBaseQuality(int position) {
+ int rOffset = getRelativeOffset(position);
+ byte[] baseQualities = record.getBaseQualities();
+ validateOffset(rOffset, baseQualities);
+ return baseQualities[rOffset];
+ }
+
+ /**
+ * @return the length of alignment block represented by the object.
+ */
+ @Override
+ public int getLength() {
+ return length;
+ }
+
+ /**
+ * @return the position in reference sequence, to which the start of alignment block is aligned.
+ */
+ @Override
+ public int getRefPos() {
+ return refPos;
+ }
+
+ /**
+ * @return type of object
+ */
+ @Override
+ public Type getType() {
+ return BEGIN;
+ }
+
+ /**
+ * @return <code>EdgingRecordAndOffset</code> that represents the start of alignment block of the read
+ * for object with type END. For object with type BEGIN will return null.
+ */
+ @Override
+ public EdgingRecordAndOffset getStart() {
+ return null;
+ }
+
+ @Override
+ public int hashCode() {
+ if (hash != 0) return hash;
+ hash = record.hashCode();
+ hash = 31 * hash + length;
+ hash = 31 * hash + offset;
+ hash = 31 * hash + refPos;
+ return hash;
+ }
+
+ private int getRelativeOffset(int position) {
+ return position - refPos + offset;
+ }
+ }
+
+ private static class EndEdgingRecordAndOffset extends EdgingRecordAndOffset {
+
+ /**
+ * For object with type END this fields holds the reference to object with type BEGIN for the read.
+ */
+ final private EdgingRecordAndOffset start;
+
+ EndEdgingRecordAndOffset(EdgingRecordAndOffset record) {
+ super(record.getRecord(), record.getOffset());
+ this.start = record;
+ }
+
+ /**
+ * @param position in the reference
+ * @return base quality of a read base, corresponding to a given reference position
+ */
+ public byte getBaseQuality(int position) {
+ return start.getBaseQuality(position);
+ }
+
+ /**
+ * @return the length of alignment block represented by the object.
+ */
+ @Override
+ public int getLength() {
+ return start.getLength();
+ }
+
+ /**
+ * @return the position in reference sequence, to which the start of alignment block is aligned.
+ */
+ @Override
+ public int getRefPos() {
+ return start.getRefPos();
+ }
+
+ /**
+ * @return type of object
+ */
+ @Override
+ public Type getType() {
+ return END;
+ }
+
+ /**
+ * @return <code>EdgingRecordAndOffset</code> that represents the start of alignment block of the read
+ * for object with type END
+ */
+ @Override
+ public EdgingRecordAndOffset getStart() {
+ return start;
+ }
+
+ @Override
+ public int hashCode() {
+ return start.hashCode();
+ }
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/util/IOUtil.java b/src/main/java/htsjdk/samtools/util/IOUtil.java
index 7f0495d..07ae900 100644
--- a/src/main/java/htsjdk/samtools/util/IOUtil.java
+++ b/src/main/java/htsjdk/samtools/util/IOUtil.java
@@ -71,7 +71,7 @@ import java.util.zip.GZIPOutputStream;
*/
public class IOUtil {
/**
- * @deprecated Use Defaults.NON_ZERO_BUFFER_SIZE instead.
+ * @deprecated Use {@link Defaults#NON_ZERO_BUFFER_SIZE} instead.
*/
@Deprecated
public static final int STANDARD_BUFFER_SIZE = Defaults.NON_ZERO_BUFFER_SIZE;
@@ -944,22 +944,3 @@ public class IOUtil {
return output;
}
}
-
-
-/**
- * Hacky little class used to allow us to set the compression level on a GZIP output stream which, for some
- * bizarre reason, is not exposed in the standard API.
- *
- * @author Tim Fennell
- */
-class CustomGzipOutputStream extends GZIPOutputStream {
- CustomGzipOutputStream(final OutputStream outputStream, final int bufferSize, final int compressionLevel) throws IOException {
- super(outputStream, bufferSize);
- this.def.setLevel(compressionLevel);
- }
-
- CustomGzipOutputStream(final OutputStream outputStream, final int compressionLevel) throws IOException {
- super(outputStream);
- this.def.setLevel(compressionLevel);
- }
-}
diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java
index 32b7176..76cb508 100644
--- a/src/main/java/htsjdk/samtools/util/IntervalList.java
+++ b/src/main/java/htsjdk/samtools/util/IntervalList.java
@@ -102,9 +102,14 @@ public class IntervalList implements Iterable<Interval> {
}
}
- /** Sorts the internal collection of intervals by coordinate. */
- @Deprecated // Use sorted() instead of sort(). The sort() function modifies the object in-place and
- // is therefore difficult to work with. sorted() returns a new IntervalList that is sorted
+ /**
+ * Sorts the internal collection of intervals by coordinate.
+ *
+ * Note: this function modifies the object in-place and is therefore difficult to work with.
+ *
+ * @deprecated use {@link #sorted()} instead.
+ */
+ @Deprecated
public void sort() {
Collections.sort(this.intervals, new IntervalCoordinateComparator(this.header));
this.header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
@@ -154,19 +159,27 @@ public class IntervalList implements Iterable<Interval> {
return value;
}
- /** Sorts and uniques the list of intervals held within this interval list. */
- @Deprecated//use uniqued() instead. This function modifies the object in-place and
- // is therefore difficult to work with.
+ /**
+ * Sorts and uniques the list of intervals held within this interval list.
+ *
+ * Note: this function modifies the object in-place and is therefore difficult to work with.
+ *
+ * @deprecated use {@link #uniqued()} instead.
+ */
+ @Deprecated
public void unique() {
unique(true);
}
/**
* Sorts and uniques the list of intervals held within this interval list.
+ *
+ * Note: this function modifies the object in-place and is therefore difficult to work with.
+ *
* @param concatenateNames If false, interval names are not concatenated when merging intervals to save space.
+ * @deprecated use {@link #uniqued(boolean)} instead.
*/
- @Deprecated//use uniqued() instead. This function modifies the object in-place and
- // is therefore difficult to work with
+ @Deprecated
public void unique(final boolean concatenateNames) {
sort();
final List<Interval> tmp = getUniqueIntervals(concatenateNames);
@@ -186,10 +199,12 @@ public class IntervalList implements Iterable<Interval> {
*
* Note: has the side-effect of sorting the stored intervals in coordinate order if not already sorted.
*
+ * Note: this function modifies the object in-place and is therefore difficult to work with.
+ *
* @return the set of unique intervals condensed from the contained intervals
+ * @deprecated use {@link #uniqued()#getIntervals()} instead.
*/
- @Deprecated//use uniqued().getIntervals() instead. This function modifies the object in-place and
- // is therefore difficult to work with
+ @Deprecated
public List<Interval> getUniqueIntervals() {
return getUniqueIntervals(true);
}
@@ -249,14 +264,14 @@ public class IntervalList implements Iterable<Interval> {
}
/**
- * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals()
- * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
- */
- @Deprecated //use uniqued(concatenateNames).getIntervals() or the static version instead to avoid changing the underlying object.
- /**
- * Merges list of intervals and reduces them like htsjdk.samtools.util.IntervalList#getUniqueIntervals()
- * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
+ * Merges list of intervals and reduces them like {@link #getUniqueIntervals()}.
+ *
+ * Note: this function modifies the object in-place and is therefore difficult to work with.
+ *
+ * @param concatenateNames If false, the merged interval has the name of the earlier interval. This keeps name shorter.
+ * @deprecated use {@link #uniqued(boolean)#getIntervals()} or {@link #getUniqueIntervals(IntervalList, boolean)} instead.
*/
+ @Deprecated
public List<Interval> getUniqueIntervals(final boolean concatenateNames) {
if (getHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
sort();
diff --git a/src/main/java/htsjdk/samtools/util/Log.java b/src/main/java/htsjdk/samtools/util/Log.java
index efd7b67..acbd3c4 100644
--- a/src/main/java/htsjdk/samtools/util/Log.java
+++ b/src/main/java/htsjdk/samtools/util/Log.java
@@ -43,7 +43,7 @@ public final class Log {
/** Enumeration for setting log levels. */
public static enum LogLevel { ERROR, WARNING, INFO, DEBUG }
- private static LogLevel globalLogLevel = LogLevel.DEBUG;
+ private static LogLevel globalLogLevel = LogLevel.INFO;
private final Class<?> clazz;
private final String className;
diff --git a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java
index 33bcfd3..7a60756 100644
--- a/src/main/java/htsjdk/samtools/util/SamLocusIterator.java
+++ b/src/main/java/htsjdk/samtools/util/SamLocusIterator.java
@@ -23,10 +23,16 @@
*/
package htsjdk.samtools.util;
-import htsjdk.samtools.*;
-import htsjdk.samtools.filter.*;
+import htsjdk.samtools.AlignmentBlock;
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.CigarOperator;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SamReader;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
/**
* Iterator that traverses a SAM File, accumulating information on a per-locus basis.
@@ -37,172 +43,14 @@ import java.util.*;
*
* @author alecw at broadinstitute.org
*/
-public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, CloseableIterator<SamLocusIterator.LocusInfo> {
- private static final Log LOG = Log.getInstance(SamLocusIterator.class);
-
- /**
- * Holds a SAMRecord plus the zero-based offset into that SAMRecord's bases and quality scores that corresponds
- * to the base and quality at the genomic position described the containing LocusInfo.
- */
- public static class RecordAndOffset {
- private final SAMRecord record;
- private final int offset;
-
- public RecordAndOffset(final SAMRecord record, final int offset) {
- this.offset = offset;
- this.record = record;
- }
-
- /** Zero-based offset into the read corresponding to the current position in LocusInfo */
- public int getOffset() { return offset; }
- public SAMRecord getRecord() { return record; }
- public byte getReadBase() { return record.getReadBases()[offset]; }
- public byte getBaseQuality() { return record.getBaseQualities()[offset]; }
- }
-
- /**
- * The unit of iteration. Holds information about the locus (the SAMSequenceRecord and 1-based position
- * on the reference), plus List of ReadAndOffset objects, one for each read that overlaps the locus;
- * two more List_s_ of ReadAndOffset objects include reads that overlap the locus with insertions and deletions
- * respectively
- */
- public static final class LocusInfo implements Locus {
- private final SAMSequenceRecord referenceSequence;
- private final int position;
- private final List<RecordAndOffset> recordAndOffsets = new ArrayList<RecordAndOffset>(100);
- private List<RecordAndOffset> deletedInRecord = null;
- private List<RecordAndOffset> insertedInRecord = null;
-
- LocusInfo(final SAMSequenceRecord referenceSequence, final int position) {
- this.referenceSequence = referenceSequence;
- this.position = position;
- }
-
- /** Accumulate info for one read at the locus. */
- public void add(final SAMRecord read, final int position) {
- recordAndOffsets.add(new RecordAndOffset(read, position));
- }
-
- /** Accumulate info for one read with a deletion */
- public void addDeleted(final SAMRecord read, int previousPosition) {
- if (deletedInRecord == null) {
- deletedInRecord = new ArrayList<>();
- }
- deletedInRecord.add(new RecordAndOffset(read, previousPosition));
- }
-
- /**
- * Accumulate info for one read with an insertion.
- * For this locus, the reads in the insertion are included also in recordAndOffsets
- */
- public void addInserted(final SAMRecord read, int firstPosition) {
- if (insertedInRecord == null) {
- insertedInRecord = new ArrayList<>();
- }
- insertedInRecord.add(new RecordAndOffset(read, firstPosition));
- }
-
- public int getSequenceIndex() { return referenceSequence.getSequenceIndex(); }
-
- /** @return 1-based reference position */
- public int getPosition() { return position; }
- public List<RecordAndOffset> getRecordAndPositions() { return Collections.unmodifiableList(recordAndOffsets); }
- public String getSequenceName() { return referenceSequence.getSequenceName(); }
- @Override public String toString() { return referenceSequence.getSequenceName() + ":" + position; }
- public int getSequenceLength() {return referenceSequence.getSequenceLength();}
- public List<RecordAndOffset> getDeletedInRecord() {
- return (deletedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(deletedInRecord);
- }
- public List<RecordAndOffset> getInsertedInRecord() {
- return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord);
- }
- /** @return the number of records overlapping the position, with deletions included if they are being tracked. */
- public int size() { return this.recordAndOffsets.size() + ((deletedInRecord == null) ? 0 : deletedInRecord.size()); }
-
- /**
- * @return <code>true</code> if all the RecordAndOffset lists are empty;
- * <code>false</code> if at least one have records
- */
- public boolean isEmpty() {
- return recordAndOffsets.isEmpty() &&
- (deletedInRecord == null || deletedInRecord.isEmpty()) &&
- (insertedInRecord == null || insertedInRecord.isEmpty());
- }
- }
-
-
- private final SamReader samReader;
- private final ReferenceSequenceMask referenceSequenceMask;
- private PeekableIterator<SAMRecord> samIterator;
- private List<SamRecordFilter> samFilters = Arrays.asList(new SecondaryOrSupplementaryFilter(),
- new DuplicateReadFilter());
- private final List<Interval> intervals;
- private final boolean useIndex;
-
- /**
- * LocusInfos on this list are ready to be returned by iterator. All reads that overlap
- * the locus have been accumulated before the LocusInfo is moved into this list.
- */
- private final ArrayList<LocusInfo> complete = new ArrayList<LocusInfo>(100);
-
- /**
- * LocusInfos for which accumulation is in progress. When {@link #accumulateSamRecord(SAMRecord)} is called
- * the state of this list is guaranteed to be either:
- * a) Empty, or
- * b) That the element at index 0 corresponds to the same genomic locus as the first aligned base
- * in the read being accumulated
- *
- * Before each new read is accumulated the accumulator is examined and:
- * i) any LocusInfos at positions earlier than the read start are moved to {@link #complete}
- * ii) any uncovered positions between the last LocusInfo and the first aligned base of the new read
- * have LocusInfos created and added to {@link #complete} if we are emitting uncovered loci
- */
- private final ArrayList<LocusInfo> accumulator = new ArrayList<LocusInfo>(100);
-
- private int qualityScoreCutoff = Integer.MIN_VALUE;
- private int mappingQualityScoreCutoff = Integer.MIN_VALUE;
- private boolean includeNonPfReads = true;
-
- /**
- * If true, emit a LocusInfo for every locus in the target map, or if no target map,
- * emit a LocusInfo for every locus in the reference sequence.
- * If false, emit a LocusInfo only if a locus has coverage.
- */
- private boolean emitUncoveredLoci = true;
-
- /**
- * If set, this will cap the number of reads we accumulate for any given position.
- * Note that if we hit the maximum threshold at the first position in the accumulation queue,
- * then we throw further reads overlapping that position completely away (including for subsequent positions).
- * This is a useful feature if one wants to minimize the memory footprint in files with a few massively large pileups,
- * but it must be pointed out that it could cause major bias because of the non-random nature with which the cap is
- * applied (the first maxReadsToAccumulatePerLocus reads are kept and all subsequent ones are dropped).
- */
- private int maxReadsToAccumulatePerLocus = Integer.MAX_VALUE;
-
- // Set to true when we have enforced the accumulation limit for the first time
- private boolean enforcedAccumulationLimit = false;
-
- /**
- * If true, include indels in the LocusInfo
- */
- private boolean includeIndels = false;
-
-
- // When there is a target mask, these members remember the last locus for which a LocusInfo has been
- // returned, so that any uncovered locus in the target mask can be covered by a 0-coverage LocusInfo
- private int lastReferenceSequence = 0;
- private int lastPosition = 0;
-
- // Set to true when past all aligned reads in input SAM file
- private boolean finishedAlignedReads = false;
-
- private final LocusComparator<Locus> locusComparator = new LocusComparator<Locus>();
+public class SamLocusIterator extends AbstractLocusIterator<SamLocusIterator.RecordAndOffset, SamLocusIterator.LocusInfo> {
/**
* Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use
* BAM index even if available.
+ *
+ * @param samReader must be coordinate sorted
*/
public SamLocusIterator(final SamReader samReader) {
this(samReader, null);
@@ -212,6 +60,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
* Prepare to iterate through the given SAM records, skipping non-primary alignments. Do not use
* BAM index even if available.
*
+ * @param samReader must be coordinate sorted
* @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is
* passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class.
*/
@@ -230,209 +79,18 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
* as well as useIndex==false, and generally will be much faster.
*/
public SamLocusIterator(final SamReader samReader, final IntervalList intervalList, final boolean useIndex) {
- if (samReader.getFileHeader().getSortOrder() == null || samReader.getFileHeader().getSortOrder() == SAMFileHeader.SortOrder.unsorted) {
- LOG.warn("SamLocusIterator constructed with samReader that has SortOrder == unsorted. ", "" +
- "Assuming SAM is coordinate sorted, but exceptions may occur if it is not.");
- } else if (samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
- throw new SAMException("SamLocusIterator cannot operate on a SAM file that is not coordinate sorted.");
- }
- this.samReader = samReader;
- this.useIndex = useIndex;
- if (intervalList != null) {
- intervals = intervalList.uniqued().getIntervals();
- this.referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
- } else {
- intervals = null;
- this.referenceSequenceMask = new WholeGenomeReferenceSequenceMask(samReader.getFileHeader());
- }
- }
-
- public Iterator<LocusInfo> iterator() {
- if (samIterator != null) {
- throw new IllegalStateException("Cannot call iterator() more than once on SamLocusIterator");
- }
- CloseableIterator<SAMRecord> tempIterator;
- if (intervals != null) {
- tempIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(samReader, intervals, useIndex);
- } else {
- tempIterator = samReader.iterator();
- }
- if (samFilters != null) {
- tempIterator = new FilteringSamIterator(tempIterator, new AggregateFilter(samFilters));
- }
- samIterator = new PeekableIterator<SAMRecord>(tempIterator);
- return this;
- }
-
- public void close() {
- this.samIterator.close();
- }
-
- private boolean samHasMore() {
- return !finishedAlignedReads && (samIterator.peek() != null);
- }
-
- /**
- * Returns true if there are more LocusInfo objects that can be returned, due to any of the following reasons:
- * 1) there are more aligned reads in the SAM file
- * 2) there are LocusInfos in some stage of accumulation
- * 3) there are loci in the target mask that have yet to be accumulated (even if there are no reads covering them)
- */
- public boolean hasNext() {
- if (this.samIterator == null) {
- iterator();
- }
-
- while (complete.isEmpty() && ((!accumulator.isEmpty()) || samHasMore() || hasRemainingMaskBases())) {
- final LocusInfo locusInfo = next();
- if (locusInfo != null) {
- complete.add(0, locusInfo);
- }
- }
- return !complete.isEmpty();
- }
-
- /**
- * Returns true if there are more bases at which the locus iterator must emit LocusInfos because
- * there are loci beyond the last emitted loci which are in the set of loci to be emitted and
- * the iterator is setup to emit uncovered loci - so we can guarantee we'll emit those loci.
- */
- private boolean hasRemainingMaskBases() {
- // if there are more sequences in the mask, by definition some of them must have
- // marked bases otherwise if we're in the last sequence, but we're not at the last marked position,
- // there is also more in the mask
- if (!emitUncoveredLoci) {
- // If not emitting uncovered loci, this check is irrelevant
- return false;
- }
- return (lastReferenceSequence < referenceSequenceMask.getMaxSequenceIndex() ||
- (lastReferenceSequence == referenceSequenceMask.getMaxSequenceIndex() &&
- lastPosition < referenceSequenceMask.nextPosition(lastReferenceSequence, lastPosition)));
- }
-
- /**
- * hasNext() has been fixed so that if it returns true, next() is now guaranteed not to return null.
- */
- public LocusInfo next() {
-
- // if we don't have any completed entries to return, try and make some!
- while (complete.isEmpty() && samHasMore()) {
- final SAMRecord rec = samIterator.peek();
-
- // There might be unmapped reads mixed in with the mapped ones, but when a read
- // is encountered with no reference index it means that all the mapped reads have been seen.
- if (rec.getReferenceIndex() == -1) {
- this.finishedAlignedReads = true;
- continue;
-
- }
- // Skip over an unaligned read that has been forced to be sorted with the aligned reads
- if (rec.getReadUnmappedFlag()
- || rec.getMappingQuality() < this.mappingQualityScoreCutoff
- || (!this.includeNonPfReads && rec.getReadFailsVendorQualityCheckFlag())) {
- samIterator.next();
- continue;
- }
-
- int start = rec.getAlignmentStart();
- // only if we are including indels and the record does not start in the first base of the reference
- // the stop locus to populate the queue is not the same if the record starts with an insertion
- if (includeIndels && start != 1 && startWithInsertion(rec.getCigar())) {
- // the start to populate is one less
- start--;
- }
- final Locus alignmentStart = new LocusImpl(rec.getReferenceIndex(), start);
- // emit everything that is before the start of the current read, because we know no more
- // coverage will be accumulated for those loci.
- while (!accumulator.isEmpty() && locusComparator.compare(accumulator.get(0), alignmentStart) < 0) {
- final LocusInfo first = accumulator.get(0);
- populateCompleteQueue(alignmentStart);
- if (!complete.isEmpty()) {
- return complete.remove(0);
- }
- if (!accumulator.isEmpty() && first == accumulator.get(0)) {
- throw new SAMException("Stuck in infinite loop");
- }
- }
-
- // at this point, either the accumulator list is empty or the head should
- // be the same position as the first base of the read (or insertion if first)
- if (!accumulator.isEmpty()) {
- if (accumulator.get(0).getSequenceIndex() != rec.getReferenceIndex() ||
- accumulator.get(0).position != start) {
- throw new IllegalStateException("accumulator should be empty or aligned with current SAMRecord");
- }
- }
-
- // Store the loci for the read in the accumulator
- if (!surpassedAccumulationThreshold()) {
- accumulateSamRecord(rec);
- // Store the indels if requested
- if (includeIndels) {
- accumulateIndels(rec);
- }
- }
- samIterator.next();
- }
-
- final Locus endLocus = new LocusImpl(Integer.MAX_VALUE, Integer.MAX_VALUE);
- // if we have nothing to return to the user, and we're at the end of the SAM iterator,
- // push everything into the complete queue
- if (complete.isEmpty() && !samHasMore()) {
- while (!accumulator.isEmpty()) {
- populateCompleteQueue(endLocus);
- if (!complete.isEmpty()) {
- return complete.remove(0);
- }
- }
- }
-
- // if there are completed entries, return those
- if (!complete.isEmpty()) {
- return complete.remove(0);
- } else if (emitUncoveredLoci) {
- final Locus afterLastMaskPositionLocus = new LocusImpl(referenceSequenceMask.getMaxSequenceIndex(),
- referenceSequenceMask.getMaxPosition() + 1);
- // In this case... we're past the last read from SAM so see if we can
- // fill out any more (zero coverage) entries from the mask
- return createNextUncoveredLocusInfo(afterLastMaskPositionLocus);
- } else {
- return null;
- }
- }
-
- /**
- * @return true if we have surpassed the maximum accumulation threshold for the first locus in the accumulator, false otherwise
- */
- private boolean surpassedAccumulationThreshold() {
- final boolean surpassesThreshold = !accumulator.isEmpty() && accumulator.get(0).recordAndOffsets.size() >= maxReadsToAccumulatePerLocus;
- if (surpassesThreshold && !enforcedAccumulationLimit) {
- LOG.warn("We have encountered greater than " + maxReadsToAccumulatePerLocus + " reads at position " + accumulator.get(0).toString() + " and will ignore the remaining reads at this position. Note that further warnings will be suppressed.");
- enforcedAccumulationLimit = true;
- }
- return surpassesThreshold;
- }
-
- /**
- * Check if cigar start with an insertion, ignoring other operators that do not consume references bases
- * @param cigar the cigar
- * @return <code>true</code> if the first operator to consume reference bases or be an insertion, is an insertion; <code>false</code> otherwise
- */
- private static boolean startWithInsertion(final Cigar cigar) {
- for (final CigarElement element : cigar.getCigarElements()) {
- if (element.getOperator()==CigarOperator.I) return true;
- if (!element.getOperator().consumesReferenceBases()) continue;
- break;
- }
- return false;
+ super(samReader, intervalList, useIndex);
}
/**
* Capture the loci covered by the given SAMRecord in the LocusInfos in the accumulator,
- * creating new LocusInfos as needed.
+ * creating new LocusInfos as needed. RecordAndOffset object are created for each aligned base of
+ * <code>SAMRecord</code>.
+ *
+ * @param rec SAMRecord to process and add to <code>LocusInfo</code>
*/
- private void accumulateSamRecord(final SAMRecord rec) {
-
+ @Override
+ void accumulateSamRecord(final SAMRecord rec) {
// get the accumulator offset
int accOffset = getAccumulatorOffset(rec);
@@ -454,7 +112,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
if (dontCheckQualities || baseQualities.length == 0 || baseQualities[readOffset] >= minQuality) {
// 0-based offset from the aligned position of the first base in the read to the aligned position of the current base.
final int refOffset = refStart + i - accOffset;
- accumulator.get(refOffset).add(rec, readOffset);
+ accumulator.get(refOffset).add(new RecordAndOffset(rec, readOffset));
}
}
}
@@ -465,7 +123,8 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
* {@link #accumulateSamRecord(htsjdk.samtools.SAMRecord)}.
* Include in the LocusInfo the indels; the quality threshold does not affect insertions/deletions
*/
- private void accumulateIndels(final SAMRecord rec) {
+ @Override
+ void accumulateIndels(SAMRecord rec) {
// get the cigar elements
final List<CigarElement> cigar = rec.getCigar().getCigarElements();
// 0-based offset into the read of the current base
@@ -497,90 +156,6 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
/**
- * Create the next relevant zero-coverage LocusInfo
- *
- * @param stopBeforeLocus don't go up to this sequence and position
- * @return a zero-coverage LocusInfo, or null if there is none before the stopBefore locus
- */
- private LocusInfo createNextUncoveredLocusInfo(final Locus stopBeforeLocus) {
- while (lastReferenceSequence <= stopBeforeLocus.getSequenceIndex() &&
- lastReferenceSequence <= referenceSequenceMask.getMaxSequenceIndex()) {
-
- if (lastReferenceSequence == stopBeforeLocus.getSequenceIndex() &&
- lastPosition + 1 >= stopBeforeLocus.getPosition()) {
- return null;
- }
-
- final int nextbit = referenceSequenceMask.nextPosition(lastReferenceSequence, lastPosition);
-
- // try the next reference sequence
- if (nextbit == -1) {
- // No more in this reference sequence
- if (lastReferenceSequence == stopBeforeLocus.getSequenceIndex()) {
- lastPosition = stopBeforeLocus.getPosition();
- return null;
- }
- lastReferenceSequence++;
- lastPosition = 0;
- } else if (lastReferenceSequence < stopBeforeLocus.getSequenceIndex() || nextbit < stopBeforeLocus.getPosition()) {
- lastPosition = nextbit;
- return new LocusInfo(getReferenceSequence(lastReferenceSequence), lastPosition);
- } else if (nextbit >= stopBeforeLocus.getPosition()) {
- return null;
- }
- }
-
- return null;
- }
-
- /**
- * Pop the first entry from the LocusInfo accumulator into the complete queue. In addition,
- * check the ReferenceSequenceMask and if there are intervening mask positions between the last popped base and the one
- * about to be popped, put those on the complete queue as well.
- * Note that a single call to this method may not empty the accumulator completely, or even
- * empty it at all, because it may just put a zero-coverage LocusInfo into the complete queue.
- */
- private void populateCompleteQueue(final Locus stopBeforeLocus) {
- // Because of gapped alignments, it is possible to create LocusInfo's with no reads associated with them.
- // Skip over these if not including indels
- while (!accumulator.isEmpty() && accumulator.get(0).isEmpty() &&
- locusComparator.compare(accumulator.get(0), stopBeforeLocus) < 0) {
- accumulator.remove(0);
- }
- if (accumulator.isEmpty()) {
- return;
- }
- final LocusInfo locusInfo = accumulator.get(0);
- if (locusComparator.compare(stopBeforeLocus, locusInfo) <= 0) {
- return;
- }
-
- // If necessary, emit a zero-coverage LocusInfo
- if (emitUncoveredLoci) {
- final LocusInfo zeroCoverage = createNextUncoveredLocusInfo(locusInfo);
- if (zeroCoverage != null) {
- complete.add(zeroCoverage);
- return;
- }
- }
-
- // At this point we know we're going to process the LocusInfo, so remove it from the accumulator.
- accumulator.remove(0);
-
- // fill in any gaps based on our genome mask
- final int sequenceIndex = locusInfo.getSequenceIndex();
-
-
- // only add to the complete queue if it's in the mask (or we have no mask!)
- if (referenceSequenceMask.get(locusInfo.getSequenceIndex(), locusInfo.getPosition())) {
- complete.add(locusInfo);
- }
-
- lastReferenceSequence = sequenceIndex;
- lastPosition = locusInfo.getPosition();
- }
-
- /**
* Ensure that the queue is populated and get the accumulator offset for the current record
*/
private int getAccumulatorOffset(SAMRecord rec) {
@@ -594,7 +169,6 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
if (insOffset == 1 && accumulator.isEmpty()) {
accumulator.add(new LocusInfo(ref, alignmentStart - 1));
}
-
// Ensure there are LocusInfos up to and including this position
for (int i = accumulator.size(); i <= alignmentLength + insOffset; ++i) {
accumulator.add(new LocusInfo(ref, alignmentStart + i - insOffset));
@@ -602,12 +176,28 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
return alignmentStart - insOffset;
}
- private SAMSequenceRecord getReferenceSequence(final int referenceSequenceIndex) {
- return samReader.getFileHeader().getSequence(referenceSequenceIndex);
+
+ /**
+ * @param rec aligned SamRecord
+ * @param readOffset offset from start of read
+ * @param length 1, as object represents only one aligned base
+ * @param refPos -1, as this filed isn't used for this implementation
+ * @param type null for this implementation
+ * @return created RecordAndOffset
+ */
+ @Override
+ RecordAndOffset createRecordAndOffset(SAMRecord rec, int readOffset, int length, int refPos) {
+ return new RecordAndOffset(rec, readOffset);
}
- public void remove() {
- throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
+ /**
+ * @param referenceSequence processed reference sequence
+ * @param lastPosition last processed reference locus position
+ * @return <code>LocusInfo<T></code> for the lastPosition
+ */
+ @Override
+ LocusInfo createLocusInfo(SAMSequenceRecord referenceSequence, int lastPosition) {
+ return new LocusInfo(referenceSequence, lastPosition);
}
// --------------------------------------------------------------------------------------------
@@ -615,67 +205,88 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
// --------------------------------------------------------------------------------------------
/**
- * Controls which, if any, SAMRecords are filtered. By default duplicate reads and non-primary alignments
- * are filtered out. The list of filters passed here replaces any existing filters.
- *
- * @param samFilters list of filters, or null if no filtering is desired.
+ * Implementation of <code>AbstractRecordAndOffset</code> class for <code>SamLocusIterator</code>.
+ * One object represents one aligned base of inner <code>SAMRecord</code>.
*/
- public void setSamFilters(final List<SamRecordFilter> samFilters) {
- this.samFilters = samFilters;
- }
+ public static class RecordAndOffset extends AbstractRecordAndOffset {
- public int getQualityScoreCutoff() {
- return qualityScoreCutoff;
+ /**
+ * @param record inner <code>SAMRecord</code>
+ * @param offset 0-based offset from the start of <code>SAMRecord</code>
+ */
+ public RecordAndOffset(final SAMRecord record, final int offset) {
+ super(record, offset);
+ }
}
- public void setQualityScoreCutoff(final int qualityScoreCutoff) {
- this.qualityScoreCutoff = qualityScoreCutoff;
- }
+ /**
+ * The unit of iteration. Holds information about the locus (the SAMSequenceRecord and 1-based position
+ * on the reference), plus List of ReadAndOffset objects, one for each read that overlaps the locus;
+ * two more List_s_ of ReadAndOffset objects include reads that overlap the locus with insertions and deletions
+ * respectively
+ */
+ public static final class LocusInfo extends AbstractLocusInfo<RecordAndOffset> {
- public int getMappingQualityScoreCutoff() {
- return mappingQualityScoreCutoff;
- }
+ private List<RecordAndOffset> deletedInRecord = null;
+ private List<RecordAndOffset> insertedInRecord = null;
- public void setMappingQualityScoreCutoff(final int mappingQualityScoreCutoff) {
- this.mappingQualityScoreCutoff = mappingQualityScoreCutoff;
- }
+ /**
+ * @param referenceSequence reference sequence at which the reads are aligned
+ * @param position position in the sequence at which the reads are aligned
+ */
+ public LocusInfo(SAMSequenceRecord referenceSequence, int position) {
+ super(referenceSequence, position);
+ }
- public boolean isIncludeNonPfReads() {
- return includeNonPfReads;
- }
+ /**
+ * Accumulate info for one read with a deletion
+ */
+ public void addDeleted(final SAMRecord read, int previousPosition) {
+ if (deletedInRecord == null) {
+ deletedInRecord = new ArrayList<>();
+ }
+ deletedInRecord.add(new RecordAndOffset(read, previousPosition));
+ }
- public void setIncludeNonPfReads(final boolean includeNonPfReads) {
- this.includeNonPfReads = includeNonPfReads;
- }
+ /**
+ * Accumulate info for one read with an insertion.
+ * For this locus, the reads in the insertion are included also in recordAndOffsets
+ */
- public boolean isEmitUncoveredLoci() {
- return emitUncoveredLoci;
- }
+ public void addInserted(final SAMRecord read, int firstPosition) {
- public void setEmitUncoveredLoci(final boolean emitUncoveredLoci) {
- this.emitUncoveredLoci = emitUncoveredLoci;
- }
+ if (insertedInRecord == null) {
+ insertedInRecord = new ArrayList<>();
+ }
+ insertedInRecord.add(new RecordAndOffset(read, firstPosition));
+ }
- public int getMaxReadsToAccumulatePerLocus() {
- return maxReadsToAccumulatePerLocus;
- }
+ public List<RecordAndOffset> getDeletedInRecord() {
+ return (deletedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(deletedInRecord);
+ }
- /**
- * If set, this will cap the number of reads we accumulate for any given position.
- * As is pointed out above, setting this could cause major bias because of the non-random nature with which the
- * cap is applied (the first maxReadsToAccumulatePerLocus reads are kept and all subsequent ones are dropped).
- */
- public void setMaxReadsToAccumulatePerLocus(final int maxReadsToAccumulatePerLocus) {
- this.maxReadsToAccumulatePerLocus = maxReadsToAccumulatePerLocus;
- }
+ public List<RecordAndOffset> getInsertedInRecord() {
+ return (insertedInRecord == null) ? Collections.emptyList() : Collections.unmodifiableList(insertedInRecord);
+ }
+
+ /**
+ * @return the number of records overlapping the position, with deletions included if they are being tracked.
+ */
+ @Override
+ public int size() {
+ return super.size() + ((deletedInRecord == null) ? 0 : deletedInRecord.size());
+ }
- public boolean isIncludeIndels() {
- return includeIndels;
- }
- public void setIncludeIndels(final boolean includeIndels) {
- this.includeIndels = includeIndels;
+ /**
+ * @return <code>true</code> if all the RecordAndOffset lists are empty;
+ * <code>false</code> if at least one have records
+ */
+ @Override
+ public boolean isEmpty() {
+ return getRecordAndPositions().isEmpty() &&
+ (deletedInRecord == null || deletedInRecord.isEmpty()) &&
+ (insertedInRecord == null || insertedInRecord.isEmpty());
+ }
}
-
}
-
diff --git a/src/main/java/htsjdk/tribble/Feature.java b/src/main/java/htsjdk/tribble/Feature.java
index 791986d..941790f 100644
--- a/src/main/java/htsjdk/tribble/Feature.java
+++ b/src/main/java/htsjdk/tribble/Feature.java
@@ -36,6 +36,8 @@ public interface Feature extends Locatable {
* @deprecated use getContig() instead
*/
@Deprecated
- public String getChr();
+ default public String getChr() {
+ return getContig();
+ }
}
diff --git a/src/main/java/htsjdk/tribble/SimpleFeature.java b/src/main/java/htsjdk/tribble/SimpleFeature.java
index c85cdcc..ddc62fa 100644
--- a/src/main/java/htsjdk/tribble/SimpleFeature.java
+++ b/src/main/java/htsjdk/tribble/SimpleFeature.java
@@ -39,11 +39,6 @@ public class SimpleFeature implements Feature {
this.end = end;
}
- @Deprecated
- public String getChr() {
- return contig;
- }
-
public String getContig() {
return contig;
}
diff --git a/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java b/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java
index 415b628..77a030f 100644
--- a/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java
+++ b/src/main/java/htsjdk/tribble/bed/SimpleBEDFeature.java
@@ -51,14 +51,6 @@ public class SimpleBEDFeature implements BEDFeature {
this.chr = chr;
}
-
- @Deprecated
- @Override
- public String getChr() {
- return getContig();
- }
-
-
@Override
public String getContig() {
return chr;
diff --git a/src/main/java/htsjdk/tribble/example/CountRecords.java b/src/main/java/htsjdk/tribble/example/CountRecords.java
index 94d9707..230c1bf 100644
--- a/src/main/java/htsjdk/tribble/example/CountRecords.java
+++ b/src/main/java/htsjdk/tribble/example/CountRecords.java
@@ -193,12 +193,8 @@ public class CountRecords {
// return new VCFCodec();
if (featureFile.getName().endsWith(".bed") || featureFile.getName().endsWith(".BED") )
return new BEDCodec();
- //if (featureFile.getName().endsWith(".snp") || featureFile.getName().endsWith(".rod") )
- // return new OldDbSNPCodec();
if (featureFile.getName().endsWith(".geli.calls") || featureFile.getName().endsWith(".geli") )
return new GeliTextCodec();
- //if (featureFile.getName().endsWith(".txt") || featureFile.getName().endsWith(".TXT") )
- // return new SoapSNPCodec();
throw new IllegalArgumentException("Unable to determine correct file type based on the file name, for file -> " + featureFile);
}
}
diff --git a/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java b/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java
index 219d13a..9628cc4 100644
--- a/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java
+++ b/src/main/java/htsjdk/tribble/example/ExampleBinaryCodec.java
@@ -118,7 +118,7 @@ public class ExampleBinaryCodec extends BinaryFeatureCodec<Feature> {
Iterator<FEATURE_TYPE> it = reader.iterator();
while ( it.hasNext() ) {
final Feature f = it.next();
- dos.writeUTF(f.getChr());
+ dos.writeUTF(f.getContig());
dos.writeInt(f.getStart());
dos.writeInt(f.getEnd());
}
diff --git a/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java b/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java
index 41d3877..baad1ca 100644
--- a/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java
+++ b/src/main/java/htsjdk/tribble/gelitext/GeliTextFeature.java
@@ -83,12 +83,6 @@ public class GeliTextFeature implements Feature {
this.likelihoods = likelihoods;
}
- /** Return the features reference sequence name, e.g chromosome or contig */
- @Deprecated
- public String getChr() {
- return getContig();
- }
-
@Override
public String getContig() {
return this.contig;
diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java
index 3cd1b79..4e23e93 100644
--- a/src/main/java/htsjdk/tribble/index/IndexFactory.java
+++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java
@@ -345,8 +345,8 @@ public class IndexFactory {
checkSorted(inputFile, lastFeature, currentFeature);
//should only visit chromosomes once
- final String curChr = currentFeature.getChr();
- final String lastChr = lastFeature != null ? lastFeature.getChr() : null;
+ final String curChr = currentFeature.getContig();
+ final String lastChr = lastFeature != null ? lastFeature.getContig() : null;
if(!curChr.equals(lastChr)){
if(visitedChromos.containsKey(curChr)){
String msg = "Input file must have contiguous chromosomes.";
@@ -369,15 +369,15 @@ public class IndexFactory {
}
private static String featToString(final Feature feature){
- return feature.getChr() + ":" + feature.getStart() + "-" + feature.getEnd();
+ return feature.getContig() + ":" + feature.getStart() + "-" + feature.getEnd();
}
private static void checkSorted(final File inputFile, final Feature lastFeature, final Feature currentFeature){
// if the last currentFeature is after the current currentFeature, exception out
- if (lastFeature != null && currentFeature.getStart() < lastFeature.getStart() && lastFeature.getChr().equals(currentFeature.getChr()))
+ if (lastFeature != null && currentFeature.getStart() < lastFeature.getStart() && lastFeature.getContig().equals(currentFeature.getContig()))
throw new TribbleException.MalformedFeatureFile("Input file is not sorted by start position. \n" +
- "We saw a record with a start of " + currentFeature.getChr() + ":" + currentFeature.getStart() +
- " after a record with a start of " + lastFeature.getChr() + ":" + lastFeature.getStart(), inputFile.getAbsolutePath());
+ "We saw a record with a start of " + currentFeature.getContig() + ":" + currentFeature.getStart() +
+ " after a record with a start of " + lastFeature.getContig() + ":" + lastFeature.getStart(), inputFile.getAbsolutePath());
}
diff --git a/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java b/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java
index 854b05d..e826eda 100644
--- a/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java
+++ b/src/main/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java
@@ -64,13 +64,13 @@ public class IntervalIndexCreator extends TribbleIndexCreator {
public void addFeature(final Feature feature, final long filePosition) {
// if we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
- if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getChr())) {
+ if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getContig())) {
// if we're creating a new chrIndex (not the first), make sure to dump the intervals to the old chrIndex
if (!chrList.isEmpty())
addIntervalsToLastChr(filePosition);
// create a new chr index for the current contig
- chrList.add(new ChrIndex(feature.getChr()));
+ chrList.add(new ChrIndex(feature.getContig()));
intervals.clear();
}
diff --git a/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java b/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java
index 9e68072..1158fdf 100644
--- a/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java
+++ b/src/main/java/htsjdk/tribble/index/linear/LinearIndexCreator.java
@@ -66,14 +66,14 @@ public class LinearIndexCreator extends TribbleIndexCreator {
*/
public void addFeature(final Feature feature, final long filePosition) {
// fi we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
- if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getChr())) {
+ if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getContig())) {
// if we're creating a new chrIndex (not the first), make sure to dump the blocks to the old chrIndex
if (!chrList.isEmpty())
for (int x = 0; x < blocks.size(); x++) {
blocks.get(x).setEndPosition((x + 1 == blocks.size()) ? filePosition : blocks.get(x + 1).getStartPosition());
chrList.getLast().addBlock(blocks.get(x));
}
- chrList.add(new LinearIndex.ChrIndex(feature.getChr(),binWidth));
+ chrList.add(new LinearIndex.ChrIndex(feature.getContig(),binWidth));
blocks.clear();
// Add the first block
diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java b/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java
index 9f502cb..001dabc 100644
--- a/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java
+++ b/src/main/java/htsjdk/tribble/index/tabix/TabixIndexCreator.java
@@ -73,7 +73,7 @@ public class TabixIndexCreator implements IndexCreator {
@Override
public void addFeature(final Feature feature, final long filePosition) {
- final String sequenceName = feature.getChr();
+ final String sequenceName = feature.getContig();
final int referenceIndex;
if (sequenceName.equals(currentSequenceName)) {
referenceIndex = sequenceNames.size() - 1;
diff --git a/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java b/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java
index 88b02fa..e2f9083 100644
--- a/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java
+++ b/src/main/java/htsjdk/variant/variantcontext/CommonInfo.java
@@ -37,6 +37,8 @@ import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.function.Function;
+import java.util.stream.Collectors;
/**
@@ -243,18 +245,62 @@ public final class CommonInfo implements Serializable {
return defaultValue;
}
- /** returns the value as an empty list if the key was not found,
- as a java.util.List if the value is a List or an Array,
- as a Collections.singletonList if there is only one value */
+ /**
+ * Gets the attributes from a key as a list.
+ *
+ * Note: int[] and double[] arrays are boxed.
+ *
+ * @return empty list if the key was not found; {@link Collections#singletonList(Object)} if
+ * there is only one value; a list containing the values if the value is a {@link List} or array.
+ */
@SuppressWarnings("unchecked")
public List<Object> getAttributeAsList(String key) {
Object o = getAttribute(key);
if ( o == null ) return Collections.emptyList();
if ( o instanceof List ) return (List<Object>)o;
- if ( o.getClass().isArray() ) return Arrays.asList((Object[])o);
+ if ( o.getClass().isArray() ) {
+ if (o instanceof int[]) {
+ return Arrays.stream((int[])o).boxed().collect(Collectors.toList());
+ } else if (o instanceof double[]) {
+ return Arrays.stream((double[])o).boxed().collect(Collectors.toList());
+ }
+ return Arrays.asList((Object[])o);
+ }
return Collections.singletonList(o);
}
+ private <T> List<T> getAttributeAsList(String key, Function<Object, T> transformer) {
+ return getAttributeAsList(key).stream().map(transformer).collect(Collectors.toList());
+ }
+
+ public List<String> getAttributeAsStringList(String key, String defaultValue) {
+ return getAttributeAsList(key, x -> (x == null) ? defaultValue : String.valueOf(x));
+ }
+
+ public List<Integer> getAttributeAsIntList(String key, Integer defaultValue) {
+ return getAttributeAsList(key, x -> {
+ if (x == null || x == VCFConstants.MISSING_VALUE_v4) {
+ return defaultValue;
+ } else if (x instanceof Number) {
+ return ((Number) x).intValue();
+ } else {
+ return Integer.valueOf((String)x); // throws an exception if this isn't a string
+ }
+ });
+ }
+
+ public List<Double> getAttributeAsDoubleList(String key, Double defaultValue) {
+ return getAttributeAsList(key, x -> {
+ if (x == null || x == VCFConstants.MISSING_VALUE_v4) {
+ return defaultValue;
+ } else if (x instanceof Number) {
+ return ((Number) x).doubleValue();
+ } else {
+ return Double.valueOf((String)x); // throws an exception if this isn't a string
+ }
+ });
+ }
+
public String getAttributeAsString(String key, String defaultValue) {
Object x = getAttribute(key);
if ( x == null ) return defaultValue;
diff --git a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
index b8e13c7..33ec595 100644
--- a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
+++ b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
@@ -5,11 +5,7 @@ import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.JexlException;
import org.apache.commons.jexl2.MapContext;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
/**
* This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values.
@@ -17,49 +13,87 @@ import java.util.Set;
*/
class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
+ /**
+ * If a JEXL expression contains values that are not available in the given context, the default behavior is to
+ * treat that expression as a miss match.
+ */
+ public static final JexlMissingValueTreatment DEFAULT_MISSING_VALUE_TREATMENT = JexlMissingValueTreatment.TREAT_AS_MISMATCH;
+
// our variant context and/or Genotype
private final VariantContext vc;
private final Genotype g;
- // our context
- private JexlContext jContext = null;
+ private final JexlMissingValueTreatment howToTreatMissingValues;
/**
* our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL}
* for previously un-cached {@link JexlVCMatchExp}.
*/
- private Map<JexlVCMatchExp,Boolean> jexl;
+ private final Map<JexlVCMatchExp,Boolean> jexl;
- public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
- initialize(jexlCollection);
+ // our context
+ private JexlContext jContext = null;
+
+ /**
+ * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context
+ * @param jexlCollection collection of expressions to be evaluated
+ * @param vc VariantContext to evaluate expressions against
+ * @param g genotype to evaluate expressions against, may be null
+ * @param howToTreatMissingValues how missing values in vc and g should be treated
+ */
+ public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g, final JexlMissingValueTreatment howToTreatMissingValues) {
+ this.jexl = initializeMap(jexlCollection);
this.vc = vc;
this.g = g;
+ this.howToTreatMissingValues = howToTreatMissingValues;
}
+
+ /**
+ * Construct a new JEXLMap which can evaluate expressions against a specific genotype and variant context
+ * @param jexlCollection collection of expressions to be evaluated
+ * @param vc VariantContext to evaluate expressions against
+ * @param g genotype to evaluate expressions against, may be null
+ *
+ * missing values are treated as false
+ */
+ public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
+ this(jexlCollection, vc, g, DEFAULT_MISSING_VALUE_TREATMENT);
+ }
+
+ /**
+ * Construct a new JEXLMap which can evaluate expressions against a specific VariantContext
+ * @param jexlCollection collection of expressions to be evaluated
+ * @param vc VariantContext to evaluate expressions against
+ *
+ * missing values are treated as non matches (false)
+ */
public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc) {
- this(jexlCollection, vc, null);
+ this(jexlCollection, vc, null, DEFAULT_MISSING_VALUE_TREATMENT);
}
/**
* Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression.
*
- * @throws IllegalArgumentException when {@code o} is {@code null} or
+ * @throws IllegalArgumentException when {@code key} is {@code null} or
* when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions.
*/
- public Boolean get(Object o) {
- if (o == null) {
+ public Boolean get(Object key) {
+ if (key == null) {
throw new IllegalArgumentException("Query key is null");
}
// if we've already determined the value, return it
- if (jexl.containsKey(o) && jexl.get(o) != null) {
- return jexl.get(o);
+ final Boolean value = jexl.get(key);
+ if (jexl.containsKey(key) && value != null) {
+ return value;
}
// otherwise cast the expression and try again
- final JexlVCMatchExp e = (JexlVCMatchExp) o;
- evaluateExpression(e);
- return jexl.get(e);
+ final JexlVCMatchExp exp = (JexlVCMatchExp) key;
+ final boolean matches = evaluateExpression(exp);
+ jexl.put(exp, matches);
+ return matches;
}
/**
@@ -87,9 +121,7 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
*/
public Collection<Boolean> values() {
for (final JexlVCMatchExp exp : jexl.keySet()) {
- if (jexl.get(exp) == null) {
- evaluateExpression(exp);
- }
+ jexl.computeIfAbsent(exp, k -> evaluateExpression(exp));
}
return jexl.values();
}
@@ -112,38 +144,42 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
}
/**
- * Initializes all keys with null values indicating that they have not yet been evaluated.
+ * Initializes a map and give all keys with null values indicating that they have not yet been evaluated.
* The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}.
+ *
+ * @return an initialized map of jexlExpression -> null
*/
- private void initialize(Collection<JexlVCMatchExp> jexlCollection) {
- jexl = new HashMap<>();
+ private static Map<JexlVCMatchExp,Boolean> initializeMap(final Collection<JexlVCMatchExp> jexlCollection) {
+ final Map<JexlVCMatchExp,Boolean> jexlMap = new HashMap<>(jexlCollection.size());
for (final JexlVCMatchExp exp: jexlCollection) {
- jexl.put(exp, null);
+ jexlMap.put(exp, null);
}
+
+ return jexlMap;
}
/**
* Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}).
*
* @param exp the {@link JexlVCMatchExp} to evaluate
- *
+ * @return true if the expression matched the context
* @throws IllegalArgumentException when {@code exp} is {@code null}, or
* when the Jexl expression in {@code exp} fails to evaluate the JexlContext
* constructed with the input VC or genotype.
*/
- private void evaluateExpression(final JexlVCMatchExp exp) {
+ private boolean evaluateExpression(final JexlVCMatchExp exp) {
// if the context is null, we need to create it to evaluate the JEXL expression
if (this.jContext == null) {
- createContext();
+ jContext = createContext();
}
try {
+ //TODO figure out of this can ever evaluate to null or if that isn't actually possible
final Boolean value = (Boolean) exp.exp.evaluate(jContext);
- // treat errors as no match
- jexl.put(exp, value == null ? false : value);
+ return value == null ? howToTreatMissingValues.getMissingValueOrExplode() : value;
} catch (final JexlException.Variable e) {
- // if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
- jexl.put(exp,false);
+ //this occurs when the jexl expression contained a literal that didn't match anything in the given context
+ return howToTreatMissingValues.getMissingValueOrExplode();
} catch (final JexlException e) {
// todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values()
throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e);
@@ -151,16 +187,17 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
}
/**
- * Create the internal JexlContext, only when required.
+ * Create a new JexlContext
* This code is where new JEXL context variables should get added.
+ * @return a new jexl context initialized appropriately
*/
- private void createContext() {
+ private JexlContext createContext() {
if (vc == null) {
- jContext = new MapContext(Collections.emptyMap());
+ return new MapContext(Collections.emptyMap());
} else if (g == null) {
- jContext = new VariantJEXLContext(vc);
+ return new VariantJEXLContext(vc);
} else {
- jContext = new GenotypeJEXLContext(vc, g);
+ return new GenotypeJEXLContext(vc, g);
}
}
@@ -181,7 +218,7 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
public Set<Entry<JexlVCMatchExp, Boolean>> entrySet() {
- throw new UnsupportedOperationException("clear() not supported on a JEXLMap");
+ throw new UnsupportedOperationException("entrySet() not supported on a JEXLMap");
}
// nope
diff --git a/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java b/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java
new file mode 100644
index 0000000..204cc3f
--- /dev/null
+++ b/src/main/java/htsjdk/variant/variantcontext/JexlMissingValueTreatment.java
@@ -0,0 +1,39 @@
+package htsjdk.variant.variantcontext;
+
+import java.util.function.Supplier;
+
+/**
+ * How to treat values that appear in a jexl expression but are missing in the context it's applied to
+ */
+public enum JexlMissingValueTreatment {
+ /**
+ * Treat expressions with a missing value as a mismatch and evaluate to false
+ */
+ TREAT_AS_MISMATCH(() -> false),
+
+ /**
+ * Treat expressions with a missing value as a match and evaluate to true
+ */
+ TREAT_AS_MATCH(() -> true),
+
+ /**
+ * Treat expressions with a missing value as an error and throw an {@link IllegalArgumentException}
+ */
+ THROW(() -> {throw new IllegalArgumentException("Jexl Expression couldn't be evaluated because there was a missing value.");});
+
+ private final Supplier<Boolean> resultSupplier;
+
+ JexlMissingValueTreatment(final Supplier<Boolean> resultSupplier){
+ this.resultSupplier = resultSupplier;
+ }
+
+ /**
+ * get the missing value that corresponds to this option or throw an exception
+ * @return the value that should be used in case of a missing value
+ * @throws IllegalArgumentException if this should be treated as an error
+ */
+ boolean getMissingValueOrExplode(){
+ return resultSupplier.get();
+ }
+
+}
diff --git a/src/main/java/htsjdk/tribble/Feature.java b/src/main/java/htsjdk/variant/variantcontext/StructuralVariantType.java
similarity index 59%
copy from src/main/java/htsjdk/tribble/Feature.java
copy to src/main/java/htsjdk/variant/variantcontext/StructuralVariantType.java
index 791986d..36b517a 100644
--- a/src/main/java/htsjdk/tribble/Feature.java
+++ b/src/main/java/htsjdk/variant/variantcontext/StructuralVariantType.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2013 The Broad Institute
+ * Copyright (c) 2016 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,21 +21,27 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package htsjdk.tribble;
-
-import htsjdk.samtools.util.Locatable;
+package htsjdk.variant.variantcontext;
/**
- * Represents a locus on a reference sequence. All Features are expected to return 1-based closed-ended intervals.
+ * Type of Structural Variant as defined in the VCF spec 4.2
+ *
*/
-public interface Feature extends Locatable {
-
- /**
- * Return the features reference sequence name, e.g chromosome or contig
- * @deprecated use getContig() instead
+public enum StructuralVariantType {
+ /** Deletion relative to the reference */
+ DEL,
+ /** Insertion of novel sequence relative to the reference */
+ INS,
+ /** Region of elevated copy number relative to the reference */
+ DUP,
+ /** Inversion of reference sequence */
+ INV,
+ /** Copy number variable region */
+ CNV,
+ /** breakend structural variation. VCF Specification : <cite>An arbitrary rearrangement
+ * event can be summarized as a set of novel adjacencies.
+ * Each adjacency ties together two breakends.</cite>
*/
- @Deprecated
- public String getChr();
-
+ BND
}
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
index f64b0ff..55825fb 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContext.java
@@ -25,7 +25,6 @@
package htsjdk.variant.variantcontext;
-import htsjdk.samtools.util.Tuple;
import htsjdk.tribble.Feature;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.util.ParsingUtils;
@@ -37,18 +36,7 @@ import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import java.io.Serializable;
-import java.util.AbstractMap;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.LinkedHashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
import java.util.stream.Collectors;
/**
@@ -274,7 +262,7 @@ public class VariantContext implements Feature, Serializable {
* @return an ordered list of genotype fields in use in VC. If vc has genotypes this will always include GT first
*/
public List<String> calcVCFGenotypeKeys(final VCFHeader header) {
- final Set<String> keys = new HashSet<String>();
+ final Set<String> keys = new HashSet<>();
boolean sawGoodGT = false;
boolean sawGoodQual = false;
@@ -298,11 +286,11 @@ public class VariantContext implements Feature, Serializable {
if ( sawPL ) keys.add(VCFConstants.GENOTYPE_PL_KEY);
if ( sawGenotypeFilter ) keys.add(VCFConstants.GENOTYPE_FILTER_KEY);
- List<String> sortedList = ParsingUtils.sortList(new ArrayList<String>(keys));
+ List<String> sortedList = ParsingUtils.sortList(new ArrayList<>(keys));
// make sure the GT is first
if (sawGoodGT) {
- final List<String> newList = new ArrayList<String>(sortedList.size()+1);
+ final List<String> newList = new ArrayList<>(sortedList.size() + 1);
newList.add(VCFConstants.GENOTYPE_KEY);
newList.addAll(sortedList);
sortedList = newList;
@@ -342,7 +330,7 @@ public class VariantContext implements Feature, Serializable {
* @param other the VariantContext to copy
*/
protected VariantContext(VariantContext other) {
- this(other.getSource(), other.getID(), other.getChr(), other.getStart(), other.getEnd(),
+ this(other.getSource(), other.getID(), other.getContig(), other.getStart(), other.getEnd(),
other.getAlleles(), other.getGenotypes(), other.getLog10PError(),
other.getFiltersMaybeNull(),
other.getAttributes(),
@@ -445,7 +433,7 @@ public class VariantContext implements Feature, Serializable {
Set<Allele> allelesFromGenotypes = allelesOfGenotypes(newGenotypes);
// ensure original order of genotypes
- List<Allele> rederivedAlleles = new ArrayList<Allele>(allelesFromGenotypes.size());
+ List<Allele> rederivedAlleles = new ArrayList<>(allelesFromGenotypes.size());
for (Allele allele : alleles)
if (allelesFromGenotypes.contains(allele))
rederivedAlleles.add(allele);
@@ -480,7 +468,7 @@ public class VariantContext implements Feature, Serializable {
* @return allele set
*/
private final Set<Allele> allelesOfGenotypes(Collection<Genotype> genotypes) {
- final Set<Allele> alleles = new HashSet<Allele>();
+ final Set<Allele> alleles = new HashSet<>();
boolean addedref = false;
for ( final Genotype g : genotypes ) {
@@ -747,7 +735,9 @@ public class VariantContext implements Feature, Serializable {
as a java.util.List if the value is a List or an Array,
as a Collections.singletonList if there is only one value */
public List<Object> getAttributeAsList(String key) { return commonInfo.getAttributeAsList(key); }
-
+ public List<String> getAttributeAsStringList(String key, String defaultValue) { return commonInfo.getAttributeAsStringList(key, defaultValue); }
+ public List<Integer> getAttributeAsIntList(String key, int defaultValue) { return commonInfo.getAttributeAsIntList(key, defaultValue); }
+ public List<Double> getAttributeAsDoubleList(String key, double defaultValue) { return commonInfo.getAttributeAsDoubleList(key, defaultValue); }
public CommonInfo getCommonInfo() {
return commonInfo;
}
@@ -872,7 +862,7 @@ public class VariantContext implements Feature, Serializable {
return null;
}
- List<Integer> lengths = new ArrayList<Integer>();
+ List<Integer> lengths = new ArrayList<>();
for ( Allele a : getAlternateAlleles() ) {
lengths.add(a.length() - getReference().length());
}
@@ -982,7 +972,7 @@ public class VariantContext implements Feature, Serializable {
* @throws IllegalArgumentException if sampleName isn't bound to a genotype
*/
protected GenotypesContext getGenotypes(Collection<String> sampleNames) {
- return getGenotypes().subsetToSamples(new HashSet<String>(sampleNames));
+ return getGenotypes().subsetToSamples(new HashSet<>(sampleNames));
}
public GenotypesContext getGenotypes(Set<String> sampleNames) {
@@ -1058,7 +1048,7 @@ public class VariantContext implements Feature, Serializable {
* @return chromosome count
*/
public int getCalledChrCount(Allele a) {
- return getCalledChrCount(a,new HashSet<String>(0));
+ return getCalledChrCount(a, new HashSet<>(0));
}
/**
@@ -1171,7 +1161,7 @@ public class VariantContext implements Feature, Serializable {
* Run all extra-strict validation tests on a Variant Context object
*
* @param reportedReference the reported reference allele
- * @param observedReference the actual reference allele
+ * @param observedReference the observed reference allele
* @param rsIDs the true dbSNP IDs
*/
public void extraStrictValidation(final Allele reportedReference, final Allele observedReference, final Set<String> rsIDs) {
@@ -1181,7 +1171,7 @@ public class VariantContext implements Feature, Serializable {
// validate the RS IDs
validateRSIDs(rsIDs);
- // validate the altenate alleles
+ // validate the alternate alleles
validateAlternateAlleles();
// validate the AN and AC fields
@@ -1210,7 +1200,7 @@ public class VariantContext implements Feature, Serializable {
if ( !hasGenotypes() )
return;
- // maintain a list of non-symbolic alleles reported in the REF and ALT fields of the record
+ // maintain a list of non-symbolic alleles expected in the REF and ALT fields of the record
// (we exclude symbolic alleles because it's commonly expected that they don't show up in the genotypes, e.g. with GATK gVCFs)
final List<Allele> reportedAlleles = new ArrayList<Allele>();
for ( final Allele allele : getAlleles() ) {
@@ -1219,7 +1209,7 @@ public class VariantContext implements Feature, Serializable {
}
// maintain a list of non-symbolic alleles observed in the genotypes
- final Set<Allele> observedAlleles = new HashSet<Allele>();
+ final Set<Allele> observedAlleles = new HashSet<>();
observedAlleles.add(getReference());
for ( final Genotype g : getGenotypes() ) {
if ( g.isCalled() ) {
@@ -1242,24 +1232,39 @@ public class VariantContext implements Feature, Serializable {
throw new TribbleException.InternalCodecException(String.format("one or more of the ALT allele(s) for the record at position %s:%d are not observed at all in the sample genotypes", getContig(), getStart()));
}
+ private void validateAttributeIsExpectedSize(final String attributeKey, final int numAlternateAlleles ) {
+ final List<Object> actualValues = getAttributeAsList(attributeKey);
+ if (!actualValues.isEmpty()) {
+ // always have at least one actual value
+ final int expectedValuesSize = numAlternateAlleles > 0 ? numAlternateAlleles : 1;
+ if (actualValues.size() != expectedValuesSize) {
+ throw new TribbleException.InternalCodecException(String.format("the %s tag has the incorrect number of records at position %s:%d, %d vs. %d", attributeKey, getContig(), getStart(), actualValues.size(), expectedValuesSize));
+ }
+ }
+ }
+
public void validateChromosomeCounts() {
+ final int numberOfAlternateAlleles = alleles.size() - 1;
+ validateAttributeIsExpectedSize(VCFConstants.ALLELE_COUNT_KEY, numberOfAlternateAlleles);
+ validateAttributeIsExpectedSize(VCFConstants.ALLELE_FREQUENCY_KEY, numberOfAlternateAlleles);
+
if ( !hasGenotypes() )
return;
// AN
if ( hasAttribute(VCFConstants.ALLELE_NUMBER_KEY) ) {
- int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString());
- int observedAN = getCalledChrCount();
+ final int reportedAN = Integer.valueOf(getAttribute(VCFConstants.ALLELE_NUMBER_KEY).toString());
+ final int observedAN = getCalledChrCount();
if ( reportedAN != observedAN )
throw new TribbleException.InternalCodecException(String.format("the Allele Number (AN) tag is incorrect for the record at position %s:%d, %d vs. %d", getContig(), getStart(), reportedAN, observedAN));
}
// AC
if ( hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
- ArrayList<Integer> observedACs = new ArrayList<Integer>();
+ final ArrayList<Integer> observedACs = new ArrayList<>();
// if there are alternate alleles, record the relevant tags
- if (!getAlternateAlleles().isEmpty()) {
+ if ( numberOfAlternateAlleles > 0 ) {
for ( Allele allele : getAlternateAlleles() ) {
observedACs.add(getCalledChrCount(allele));
}
@@ -1268,22 +1273,13 @@ public class VariantContext implements Feature, Serializable {
observedACs.add(0);
}
- if ( getAttribute(VCFConstants.ALLELE_COUNT_KEY) instanceof List ) {
- final List reportedACs = (List)getAttribute(VCFConstants.ALLELE_COUNT_KEY);
- if ( observedACs.size() != reportedACs.size() )
- throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have the correct number of values for the record at position %s:%d, %d vs. %d", getContig(), getStart(), reportedACs.size(), observedACs.size()));
- for (int i = 0; i < observedACs.size(); i++) {
- // need to cast to int to make sure we don't have an issue below with object equals (earlier bug) - EB
- final int reportedAC = Integer.valueOf(reportedACs.get(i).toString());
- if ( reportedAC != observedACs.get(i) )
- throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getContig(), getStart(), reportedAC, observedACs.get(i)));
- }
- } else {
- if ( observedACs.size() != 1 )
- throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have enough values for the record at position %s:%d", getContig(), getStart()));
- int reportedAC = Integer.valueOf(getAttribute(VCFConstants.ALLELE_COUNT_KEY).toString());
- if ( reportedAC != observedACs.get(0) )
- throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %d vs. %d", getContig(), getStart(), reportedAC, observedACs.get(0)));
+ final List<Object> reportedACs = getAttributeAsList(VCFConstants.ALLELE_COUNT_KEY);
+
+ for (int i = 0; i < observedACs.size(); i++) {
+ // need to cast to int to make sure we don't have an issue below with object equals (earlier bug) - EB
+ final int reportedAC = Integer.valueOf(reportedACs.get(i).toString());
+ if ( reportedAC != observedACs.get(i) )
+ throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getContig(), getStart(), reportedAC, observedACs.get(i)));
}
}
}
@@ -1482,7 +1478,7 @@ public class VariantContext implements Feature, Serializable {
// protected basic manipulation routines
private static List<Allele> makeAlleles(Collection<Allele> alleles) {
- final List<Allele> alleleList = new ArrayList<Allele>(alleles.size());
+ final List<Allele> alleleList = new ArrayList<>(alleles.size());
boolean sawRef = false;
for ( final Allele a : alleles ) {
@@ -1553,7 +1549,7 @@ public class VariantContext implements Feature, Serializable {
private final Map<String, Object> fullyDecodeAttributes(final Map<String, Object> attributes,
final VCFHeader header,
final boolean lenientDecoding) {
- final Map<String, Object> newAttributes = new HashMap<String, Object>(10);
+ final Map<String, Object> newAttributes = new HashMap<>(10);
for ( final Map.Entry<String, Object> attr : attributes.entrySet() ) {
final String field = attr.getKey();
@@ -1591,7 +1587,7 @@ public class VariantContext implements Feature, Serializable {
final String string = (String)value;
if ( string.indexOf(',') != -1 ) {
final String[] splits = string.split(",");
- final List<Object> values = new ArrayList<Object>(splits.length);
+ final List<Object> values = new ArrayList<>(splits.length);
for ( int i = 0; i < splits.length; i++ )
values.add(decodeOne(field, splits[i], format));
return values;
@@ -1600,7 +1596,7 @@ public class VariantContext implements Feature, Serializable {
}
} else if ( value instanceof List && (((List) value).get(0)) instanceof String ) {
final List<String> asList = (List<String>)value;
- final List<Object> values = new ArrayList<Object>(asList.size());
+ final List<Object> values = new ArrayList<>(asList.size());
for ( final String s : asList )
values.add(decodeOne(field, s, format));
return values;
@@ -1653,10 +1649,6 @@ public class VariantContext implements Feature, Serializable {
// tribble integration routines -- not for public consumption
//
// ---------------------------------------------------------------------------------------------------------
- @Deprecated
- public String getChr() {
- return getContig();
- }
@Override
public String getContig() {
@@ -1699,10 +1691,8 @@ public class VariantContext implements Feature, Serializable {
return getAlternateAllele(0);
return getAlternateAlleles().stream()
- .map(allele -> new Tuple<>(allele, getCalledChrCount(allele)))
- .max((alleleAndCount1, alleleAndCount2) -> Integer.compare(alleleAndCount1.b, alleleAndCount2.b))
- .get()
- .a;
+ .max(Comparator.comparing(this::getCalledChrCount))
+ .orElse(null);
}
/**
@@ -1732,4 +1722,13 @@ public class VariantContext implements Feature, Serializable {
if ( index == -1 ) throw new IllegalArgumentException("Allele " + targetAllele + " not in this VariantContex " + this);
return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index);
}
+
+ /**
+ * Search for the INFO=SVTYPE and return the type of Structural Variant
+ * @return the StructuralVariantType of null if there is no property SVTYPE
+ * */
+ public StructuralVariantType getStructuralVariantType() {
+ final String svType = this.getAttributeAsString(VCFConstants.SVTYPE, null);
+ return svType == null ? null : StructuralVariantType.valueOf(svType);
+ }
}
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java
index 5754349..d4e288f 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextComparator.java
@@ -84,7 +84,7 @@ public class VariantContextComparator implements Comparator<VariantContext>, Ser
// present. This error checking should already have been done in the constructor but it's left
// in as defence anyway.
final int contigCompare =
- this.contigIndexLookup.get(firstVariantContext.getChr()) - this.contigIndexLookup.get(secondVariantContext.getChr());
+ this.contigIndexLookup.get(firstVariantContext.getContig()) - this.contigIndexLookup.get(secondVariantContext.getContig());
return contigCompare != 0
? contigCompare
: firstVariantContext.getStart() - secondVariantContext.getStart();
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java
index 96eaa64..face55b 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java
@@ -307,6 +307,7 @@ public class VariantContextUtils {
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
+ * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching
* @param vc variant context
* @param exps expressions
* @return true if there is a match
@@ -324,7 +325,20 @@ public class VariantContextUtils {
* @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
- return match(vc,g, Collections.singletonList(exp)).get(exp);
+ return match(vc, g, Collections.singletonList(exp), JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT).get(exp);
+ }
+
+ /**
+ * Returns true if {@code exp} match {@code vc}, {@code g}.
+ * See {@link #match(VariantContext, Genotype, Collection)} for full docs.
+ * @param vc variant context
+ * @param g genotype
+ * @param exp expression
+ * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context
+ * @return true if there is a match
+ */
+ public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp, JexlMissingValueTreatment howToTreatMissingValues) {
+ return match(vc, g, Collections.singletonList(exp), howToTreatMissingValues).get(exp);
}
/**
@@ -333,13 +347,30 @@ public class VariantContextUtils {
* This the best way to apply JEXL expressions to {@link VariantContext} records.
* Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
+ * Expressions that contain literals not available in the VariantContext or Genotype will be treated as not matching
* @param vc variant context
* @param g genotype
* @param exps expressions
* @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
- return new JEXLMap(exps,vc,g);
+ return match(vc, g, exps, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT);
+ }
+
+ /**
+ * Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc}, {@code g},
+ * and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't).
+ * This the best way to apply JEXL expressions to {@link VariantContext} records.
+ * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
+ *
+ * @param vc variant context
+ * @param g genotype
+ * @param exps expressions
+ * @param howToTreatMissingValues what to do if the jexl expression contains literals that aren't in the context
+ * @return true if there is a match
+ */
+ public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps, JexlMissingValueTreatment howToTreatMissingValues) {
+ return new JEXLMap(exps, vc, g, howToTreatMissingValues);
}
/**
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
index 493499e..34cde33 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
@@ -59,7 +59,7 @@ class VariantJEXLContext implements JexlContext {
static {
attributes.put("vc", (VariantContext vc) -> vc);
- attributes.put("CHROM", VariantContext::getChr);
+ attributes.put("CHROM", VariantContext::getContig);
attributes.put("POS", VariantContext::getStart);
attributes.put("TYPE", (VariantContext vc) -> vc.getType().toString());
attributes.put("QUAL", (VariantContext vc) -> -10 * vc.getLog10PError());
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java
index 0776e4f..f9dd458 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2FieldWriter.java
@@ -255,7 +255,7 @@ public abstract class BCF2FieldWriter {
if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
throw new IllegalStateException("Current BCF2 encoder cannot handle sites " +
"with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
- + vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
+ + vc.getNAlleles() + " at " + vc.getContig() + ":" + vc.getStart());
encodingType = BCF2Type.INT8;
buildAlleleMap(vc);
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
index 74a6298..8c16aac 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/BCF2Writer.java
@@ -234,9 +234,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
//
// --------------------------------------------------------------------------------
private byte[] buildSitesData( VariantContext vc ) throws IOException {
- final int contigIndex = contigDictionary.get(vc.getChr());
+ final int contigIndex = contigDictionary.get(vc.getContig());
if ( contigIndex == -1 )
- throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr()));
+ throw new IllegalStateException(String.format("Contig %s not found in sequence dictionary from reference", vc.getContig()));
// note use of encodeRawValue to not insert the typing byte
encoder.encodeRawValue(contigIndex, BCF2Type.INT32);
@@ -391,7 +391,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
*/
private void errorUnexpectedFieldToWrite(final VariantContext vc, final String field, final String fieldType) {
throw new IllegalStateException("Found field " + field + " in the " + fieldType + " fields of VariantContext at " +
- vc.getChr() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
+ vc.getContig() + ":" + vc.getStart() + " from " + vc.getSource() + " but this hasn't been defined in the VCFHeader");
}
// --------------------------------------------------------------------------------
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java
index 11d2f10..690a781 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java
@@ -118,11 +118,11 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
since there is no implicit ordering of chromosomes:
*/
VCFRecord firstRec = queue.peek();
- if (firstRec != null && !vc.getChr().equals(firstRec.vc.getChr())) { // if we hit a new contig, flush the queue
- if (finishedChromosomes.contains(vc.getChr()))
- throw new IllegalArgumentException("Added a record at " + vc.getChr() + ":" + vc.getStart() + ", but already finished with chromosome" + vc.getChr());
+ if (firstRec != null && !vc.getContig().equals(firstRec.vc.getContig())) { // if we hit a new contig, flush the queue
+ if (finishedChromosomes.contains(vc.getContig()))
+ throw new IllegalArgumentException("Added a record at " + vc.getContig() + ":" + vc.getStart() + ", but already finished with chromosome" + vc.getContig());
- finishedChromosomes.add(firstRec.vc.getChr());
+ finishedChromosomes.add(firstRec.vc.getContig());
stopWaitingToSort();
}
@@ -159,7 +159,7 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
protected void noteCurrentRecord(VariantContext vc) {
// did the user break the contract by giving a record too late?
if (mostUpstreamWritableLoc != null && vc.getStart() < mostUpstreamWritableLoc) // went too far back, since may have already written anything that is <= mostUpstreamWritableLoc
- throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getChr() + ":" + vc.getStart() + " was just added.");
+ throw new IllegalArgumentException("Permitted to write any record upstream of position " + mostUpstreamWritableLoc + ", but a record at " + vc.getContig() + ":" + vc.getStart() + " was just added.");
}
// --------------------------------------------------------------------------------
diff --git a/src/main/java/htsjdk/variant/vcf/VCFConstants.java b/src/main/java/htsjdk/variant/vcf/VCFConstants.java
index b05856d..6a52d1d 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFConstants.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFConstants.java
@@ -63,7 +63,10 @@ public final class VCFConstants {
public static final String SOMATIC_KEY = "SOMATIC";
public static final String VALIDATED_KEY = "VALIDATED";
public static final String THOUSAND_GENOMES_KEY = "1000G";
-
+
+ // reserved INFO for structural variants
+ /** INFO Type of structural variant */
+ public static final String SVTYPE = "SVTYPE";
// separators
public static final String FORMAT_FIELD_SEPARATOR = ":";
diff --git a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java
index f65a038..a909066 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFEncoder.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFEncoder.java
@@ -72,7 +72,7 @@ public class VCFEncoder {
final StringBuilder stringBuilder = new StringBuilder();
// CHROM
- stringBuilder.append(context.getChr()).append(VCFConstants.FIELD_SEPARATOR)
+ stringBuilder.append(context.getContig()).append(VCFConstants.FIELD_SEPARATOR)
// POS
.append(String.valueOf(context.getStart())).append(VCFConstants.FIELD_SEPARATOR)
// ID
@@ -170,7 +170,7 @@ public class VCFEncoder {
private void fieldIsMissingFromHeaderError(final VariantContext vc, final String id, final String field) {
if ( ! allowMissingFieldsInHeader)
throw new IllegalStateException("Key " + id + " found in VariantContext field " + field
- + " at " + vc.getChr() + ":" + vc.getStart()
+ + " at " + vc.getContig() + ":" + vc.getStart()
+ " but this key isn't defined in the VCFHeader. We require all VCFs to have"
+ " complete VCF headers by default.");
}
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
index 18a7b80..9024f34 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFileReader.java
@@ -116,7 +116,7 @@ public class VCFFileReader implements Closeable, Iterable<VariantContext> {
final Integer intervalEnd=vc.getCommonInfo().getAttributeAsInt("END",vc.getEnd());
if(".".equals(name) || name == null)
name = "interval-" + (++intervals);
- list.add(new Interval(vc.getChr(), vc.getStart(), intervalEnd, false, name));
+ list.add(new Interval(vc.getContig(), vc.getStart(), intervalEnd, false, name));
}
}
diff --git a/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java b/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
index 6c71865..5130963 100644
--- a/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
+++ b/src/main/java/htsjdk/variant/vcf/VCFFilterHeaderLine.java
@@ -33,6 +33,8 @@ import java.util.Arrays;
* A class representing a key=value entry for FILTER fields in the VCF header
*/
public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
+
+ private static final long serialVersionUID = 1L;
/**
* create a VCF filter header line
@@ -40,7 +42,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
* @param name the name for this header line
* @param description the description for this header line
*/
- public VCFFilterHeaderLine(String name, String description) {
+ public VCFFilterHeaderLine(final String name, final String description) {
super("FILTER", name, description);
}
@@ -48,7 +50,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
* Convenience constructor for FILTER whose description is the name
* @param name
*/
- public VCFFilterHeaderLine(String name) {
+ public VCFFilterHeaderLine(final String name) {
super("FILTER", name, name);
}
@@ -58,7 +60,7 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
* @param line the header line
* @param version the vcf header version
*/
- public VCFFilterHeaderLine(String line, VCFHeaderVersion version) {
+ public VCFFilterHeaderLine(final String line, final VCFHeaderVersion version) {
super(line, version, "FILTER", Arrays.asList("ID", "Description"));
}
@@ -66,4 +68,12 @@ public class VCFFilterHeaderLine extends VCFSimpleHeaderLine {
public boolean shouldBeAddedToDictionary() {
return true;
}
-}
\ No newline at end of file
+
+ /**
+ * get the "Description" field
+ * @return the "Description" field
+ */
+ public String getDescription() {
+ return getGenericFieldValue("Description");
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java
new file mode 100644
index 0000000..32de1cd
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryCodecTest.java
@@ -0,0 +1,122 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 20016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.LineReader;
+import htsjdk.samtools.util.StringLineReader;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import javax.sound.sampled.Line;
+import java.io.BufferedWriter;
+import java.io.StringWriter;
+import java.util.List;
+import java.util.Random;
+
+import static org.testng.Assert.*;
+
+/**
+ * @author Pavel_Silin at epam.com, EPAM Systems, Inc. <www.epam.com>
+ */
+public class SAMSequenceDictionaryCodecTest {
+
+ private static final Random random = new Random();
+ private SAMSequenceDictionary dictionary;
+ private StringWriter writer;
+ private SAMSequenceDictionaryCodec codec;
+ private BufferedWriter bufferedWriter;
+
+ @BeforeMethod
+ public void setUp() throws Exception {
+ String[] seqs = new String[]{"chr1", "chr2", "chr12", "chr16", "chrX"};
+ dictionary = new SAMSequenceDictionary();
+ for (String seq : seqs) {
+ dictionary.addSequence(new SAMSequenceRecord(seq, random.nextInt(10_000_000)));
+ }
+ writer = new StringWriter();
+ bufferedWriter = new BufferedWriter(writer);
+ codec = new SAMSequenceDictionaryCodec(bufferedWriter);
+ }
+
+ @Test
+ public void testEncodeDecodeDictionary() throws Exception {
+ LineReader readerOne = null;
+ LineReader readerTwo = null;
+ try {
+ codec.encode(dictionary);
+ bufferedWriter.close();
+ readerOne = new StringLineReader(writer.toString());
+ SAMSequenceDictionary actual = codec.decode(readerOne, null);
+ assertEquals(actual, dictionary);
+
+ readerTwo = new StringLineReader(writer.toString());
+
+ String line = readerTwo.readLine();
+ assertTrue(line.startsWith("@HD"));
+
+ line = readerTwo.readLine();
+ while (line != null) {
+ assertTrue(line.startsWith("@SQ"));
+ line = readerTwo.readLine();
+ }
+ } finally {
+ assert readerOne != null;
+ assert readerTwo != null;
+ readerOne.close();
+ readerTwo.close();
+ }
+ }
+
+ @Test
+ public void testEncodeDecodeListOfSeqs() throws Exception {
+ LineReader readerOne = null;
+ LineReader readerTwo = null;
+
+ try {
+ List<SAMSequenceRecord> sequences = dictionary.getSequences();
+ codec.encodeHeaderLine(false);
+ sequences.forEach(codec::encodeSequenceRecord);
+ bufferedWriter.close();
+ readerOne = new StringLineReader(writer.toString());
+ SAMSequenceDictionary actual = codec.decode(readerOne, null);
+ assertEquals(actual, dictionary);
+ readerTwo = new StringLineReader(writer.toString());
+
+ String line = readerTwo.readLine();
+ assertTrue(line.startsWith("@HD"));
+
+ line = readerTwo.readLine();
+ while (line != null) {
+ assertTrue(line.startsWith("@SQ"));
+ line = readerTwo.readLine();
+ }
+ } finally {
+ assert readerOne != null;
+ assert readerTwo != null;
+ readerOne.close();
+ readerTwo.close();
+ }
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java
index ece91e2..31ad5c2 100644
--- a/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java
+++ b/src/test/java/htsjdk/samtools/SamReaderFactoryTest.java
@@ -284,7 +284,43 @@ public class SamReaderFactoryTest {
}
reader.close();
}
-
+
+
+ /**
+ * A path that pretends it's not based upon a file. This helps in cases where we want to test branches
+ * that apply to non-file based paths without actually having to use non-file based resources (like cloud urls)
+ */
+ private static class NeverFilePathInputResource extends PathInputResource {
+ public NeverFilePathInputResource(Path pathResource) {
+ super(pathResource);
+ }
+
+ @Override
+ public File asFile() {
+ return null;
+ }
+ }
+
+ @Test
+ public void checkHasIndexForStreamingPathBamWithFileIndex() throws IOException {
+ InputResource bam = new NeverFilePathInputResource(localBam.toPath());
+ InputResource index = new FileInputResource(localBamIndex);
+
+ // ensure that the index is being used, not checked in queryInputResourcePermutation
+ try (final SamReader reader = SamReaderFactory.makeDefault().open(new SamInputResource(bam, index))) {
+ Assert.assertTrue(reader.hasIndex());
+ }
+ }
+
+ @Test
+ public void queryStreamingPathBamWithFileIndex() throws IOException {
+ InputResource bam = new NeverFilePathInputResource(localBam.toPath());
+ InputResource index = new FileInputResource(localBamIndex);
+
+ final SamInputResource resource = new SamInputResource(bam, index);
+ queryInputResourcePermutation(new SamInputResource(bam, index));
+ }
+
@Test
public void customReaderFactoryTest() throws IOException {
try {
diff --git a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java
index 33a34da..4ce0b7a 100644
--- a/src/test/java/htsjdk/samtools/ValidateSamFileTest.java
+++ b/src/test/java/htsjdk/samtools/ValidateSamFileTest.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2009-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -36,6 +36,8 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.nio.channels.FileChannel;
+import java.nio.file.StandardOpenOption;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileReader;
@@ -45,6 +47,8 @@ import java.io.LineNumberReader;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
import java.util.Iterator;
/**
@@ -55,6 +59,8 @@ import java.util.Iterator;
*/
public class ValidateSamFileTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools/ValidateSamFileTest");
+ private static final int TERMINATION_GZIP_BLOCK_SIZE = 28;
+ private static final int RANDOM_NUMBER_TRUNC_BYTE = 128;
@Test
public void testValidSamFile() throws Exception {
@@ -412,21 +418,6 @@ public class ValidateSamFileTest {
}
- private Histogram<String> executeValidation(final SamReader samReader, final ReferenceSequenceFile reference, final IndexValidationStringency stringency) throws IOException {
- final File outFile = File.createTempFile("validation", ".txt");
- outFile.deleteOnExit();
- final PrintWriter out = new PrintWriter(outFile);
- new SamFileValidator(out, 8000).setIndexValidationStringency(stringency).validateSamFileSummary(samReader, reference);
- final LineNumberReader reader = new LineNumberReader(new FileReader(outFile));
- if (reader.readLine().equals("No errors found")) {
- return new Histogram<String>();
- }
- final MetricsFile<MetricBase, String> outputFile = new MetricsFile<MetricBase, String>();
- outputFile.read(new FileReader(outFile));
- Assert.assertNotNull(outputFile.getHistogram());
- return outputFile.getHistogram();
- }
-
private void testHeaderVersion(final String version, final boolean expectValid) throws Exception {
final File samFile = File.createTempFile("validateHeader.", ".sam");
samFile.deleteOnExit();
@@ -468,4 +459,89 @@ public class ValidateSamFileTest {
Assert.assertFalse(results.isEmpty());
Assert.assertEquals(results.get(SAMValidationError.Type.MATES_ARE_SAME_END.getHistogramString()).getValue(), 2.0);
}
+
+
+ @DataProvider(name = "TagCorrectlyProcessData")
+ public Object[][] tagCorrectlyProcessData() throws IOException {
+ final String E2TagCorrectlyProcessTestData =
+ "@HD\tVN:1.0\tSO:unsorted\n" +
+ "@SQ\tSN:chr1\tLN:101\n" +
+ "@RG\tID:0\tSM:Hi,Mom!\n" +
+ "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tE2:Z:CAA";
+
+ final String U2TagCorrectlyProcessTestData =
+ "@HD\tVN:1.0\tSO:unsorted\n" +
+ "@SQ\tSN:chr1\tLN:101\n" +
+ "@RG\tID:0\tSM:Hi,Mom!\n" +
+ "E\t147\tchr1\t15\t255\t10M\t=\t2\t-30\tCAACAGAAGC\t)'.*.+2,))\tU2:Z:CAA";
+
+ return new Object[][]{
+ {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.E2_BASE_EQUALS_PRIMARY_BASE},
+ {E2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH},
+ {U2TagCorrectlyProcessTestData.getBytes(), SAMValidationError.Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH}
+ };
+ }
+
+ @Test(dataProvider = "TagCorrectlyProcessData")
+ public void tagCorrectlyProcessTest(byte[] bytesFromFile,
+ SAMValidationError.Type errorType) throws Exception {
+ final SamReader samReader = SamReaderFactory
+ .makeDefault()
+ .validationStringency(ValidationStringency.SILENT)
+ .open(
+ SamInputResource.of(
+ new ByteArrayInputStream(bytesFromFile)
+ )
+ );
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
+ Assert.assertEquals(results.get(errorType.getHistogramString()).getValue(), 1.0);
+ }
+
+ @DataProvider(name = "validateBamFileTerminationData")
+ public Object[][] validateBamFileTerminationData() throws IOException {
+ return new Object[][]{
+ {getBrokenFile(TERMINATION_GZIP_BLOCK_SIZE), SAMValidationError.Type.BAM_FILE_MISSING_TERMINATOR_BLOCK},
+ {getBrokenFile(RANDOM_NUMBER_TRUNC_BYTE), SAMValidationError.Type.TRUNCATED_FILE}
+ };
+ }
+
+ @Test(dataProvider = "validateBamFileTerminationData")
+ public void validateBamFileTerminationTest(File file, SAMValidationError.Type errorType) throws IOException {
+ final SamFileValidator samFileValidator = new SamFileValidator(new PrintWriter(System.out), 8000);
+ samFileValidator.validateBamFileTermination(file);
+ Assert.assertEquals(samFileValidator.getErrorsByType().get(errorType).getValue(), 1.0);
+ }
+
+ private Histogram<String> executeValidation(final SamReader samReader, final ReferenceSequenceFile reference,
+ final IndexValidationStringency stringency) throws IOException {
+ return executeValidationWithErrorIgnoring(samReader, reference, stringency, Collections.EMPTY_LIST);
+ }
+
+ private Histogram<String> executeValidationWithErrorIgnoring(final SamReader samReader, final ReferenceSequenceFile reference,
+ final IndexValidationStringency stringency, Collection<SAMValidationError.Type> ignoringError) throws IOException {
+ final File outFile = File.createTempFile("validation", ".txt");
+ outFile.deleteOnExit();
+
+ final PrintWriter out = new PrintWriter(outFile);
+ final SamFileValidator samFileValidator = new SamFileValidator(out, 8000);
+ samFileValidator.setIndexValidationStringency(stringency).setErrorsToIgnore(ignoringError);
+ samFileValidator.validateSamFileSummary(samReader, reference);
+
+ final LineNumberReader reader = new LineNumberReader(new FileReader(outFile));
+ if (reader.readLine().equals("No errors found")) {
+ return new Histogram<>();
+ }
+ final MetricsFile<MetricBase, String> outputFile = new MetricsFile<>();
+ outputFile.read(new FileReader(outFile));
+ Assert.assertNotNull(outputFile.getHistogram());
+ return outputFile.getHistogram();
+ }
+
+ private File getBrokenFile(int truncByte) throws IOException {
+ final FileChannel stream = FileChannel.open(new File(TEST_DATA_DIR + "/test_samfile_version_1pt5.bam").toPath());
+ final File breakingFile = File.createTempFile("trunc", ".bam");
+ breakingFile.deleteOnExit();
+ FileChannel.open(breakingFile.toPath(), StandardOpenOption.WRITE).transferFrom(stream, 0, stream.size() - truncByte);
+ return breakingFile;
+ }
}
diff --git a/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java b/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java
index a0984d7..297b892 100644
--- a/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java
+++ b/src/test/java/htsjdk/samtools/sra/AbstractSRATest.java
@@ -18,7 +18,7 @@ public abstract class AbstractSRATest {
@BeforeGroups(groups = "sra")
public final void checkIfCanResolve() {
- if (!SRAAccession.isSupported()) {
+ if (SRAAccession.checkIfInitialized() != null) {
return;
}
canResolveNetworkAccession = SRAAccession.isValid(checkAccession);
@@ -26,7 +26,7 @@ public abstract class AbstractSRATest {
@BeforeMethod
public final void assertSRAIsSupported() {
- if(!SRAAccession.isSupported()){
+ if(SRAAccession.checkIfInitialized() != null){
throw new SkipException("Skipping SRA Test because SRA native code is unavailable.");
}
}
diff --git a/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java b/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java
new file mode 100644
index 0000000..a21c743
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/util/AbstractLocusInfoTest.java
@@ -0,0 +1,79 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMSequenceRecord;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ */
+
+public class AbstractLocusInfoTest {
+ private final byte[] qualities = {30, 50, 50, 60, 60, 70, 70, 70, 80, 90, 30, 50, 50, 60, 60, 70, 70, 70, 80, 90};
+ private byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C', 'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'};
+ private EdgingRecordAndOffset typedRecordAndOffset;
+ private EdgingRecordAndOffset typedRecordAndOffsetEnd;
+ private SAMSequenceRecord sequence = new SAMSequenceRecord("chrM", 100);
+
+ @BeforeTest
+ public void setUp() {
+ SAMRecord record = new SAMRecord(new SAMFileHeader());
+ record.setReadName("testRecord");
+ record.setReadBases(bases);
+ record.setBaseQualities(qualities);
+ typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 10, 10, 10);
+ typedRecordAndOffsetEnd = EdgingRecordAndOffset.createEndRecord(typedRecordAndOffset);
+ }
+
+ @Test
+ public void testConstructor() {
+ AbstractLocusInfo<EdgingRecordAndOffset> info = new AbstractLocusInfo<>(sequence, 1);
+ assertEquals("chrM", info.getSequenceName());
+ assertEquals(0, info.getRecordAndOffsets().size());
+ assertEquals(100, info.getSequenceLength());
+ assertEquals(1, info.getPosition());
+ }
+
+ @Test
+ public void testAdd() {
+ AbstractLocusInfo<EdgingRecordAndOffset> info = new AbstractLocusInfo<>(sequence, 10);
+ info.add(typedRecordAndOffset);
+ info.add(typedRecordAndOffsetEnd);
+ assertEquals(2, info.getRecordAndOffsets().size());
+ assertEquals(typedRecordAndOffset, info.getRecordAndOffsets().get(0));
+ assertEquals(typedRecordAndOffsetEnd, info.getRecordAndOffsets().get(1));
+ assertEquals(10, info.getPosition());
+ assertEquals('A', info.getRecordAndOffsets().get(0).getReadBase());
+ assertEquals('A', info.getRecordAndOffsets().get(1).getReadBase());
+ assertEquals(30, info.getRecordAndOffsets().get(0).getBaseQuality());
+ assertEquals(30, info.getRecordAndOffsets().get(1).getBaseQuality());
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java b/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java
new file mode 100644
index 0000000..0c08436
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/util/AbstractLocusIteratorTestTemplate.java
@@ -0,0 +1,68 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.util;
+
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecordSetBuilder;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+
+/**
+ * Common template for testing classes, that extend AbstractLocusIterator.
+ *
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ *
+ */
+public abstract class AbstractLocusIteratorTestTemplate {
+
+ /** Coverage for tests with the same reads */
+ final static int coverage = 2;
+
+ /** the read length for the tests */
+ final static int readLength = 36;
+
+ final static SAMFileHeader header = new SAMFileHeader();
+
+ static {
+ header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ SAMSequenceDictionary dict = new SAMSequenceDictionary();
+ dict.addSequence(new SAMSequenceRecord("chrM", 100000));
+ header.setSequenceDictionary(dict);
+ }
+
+ /** Get the record builder for the tests with the default parameters that are needed */
+ static SAMRecordSetBuilder getRecordBuilder() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ builder.setHeader(header);
+ builder.setReadLength(readLength);
+ return builder;
+ }
+
+ public abstract void testBasicIterator();
+ public abstract void testEmitUncoveredLoci();
+ public abstract void testSimpleGappedAlignment();
+ public abstract void testOverlappingGappedAlignmentsWithoutIndels();
+}
\ No newline at end of file
diff --git a/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java b/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java
new file mode 100644
index 0000000..568c84c
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/util/AbstractRecordAndOffsetTest.java
@@ -0,0 +1,63 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+
+/**
+ *
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ *
+ */
+
+public class AbstractRecordAndOffsetTest {
+
+ private final byte[] qualities = {30, 40, 50, 60, 70, 80 ,90, 70, 80, 90};
+ private byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'};
+ SAMRecord record;
+
+ @BeforeTest
+ public void setUp(){
+ record = new SAMRecord(new SAMFileHeader());
+ record.setReadName("testRecord");
+ record.setReadBases(bases);
+ record.setBaseQualities(qualities);
+ }
+
+ @Test
+ public void testConstructor(){
+ AbstractRecordAndOffset abstractRecordAndOffset = new AbstractRecordAndOffset(record, 0, 10, 3);
+ assertArrayEquals(qualities, abstractRecordAndOffset.getBaseQualities());
+ assertArrayEquals(bases, abstractRecordAndOffset.getRecord().getReadBases());
+ assertEquals('A', abstractRecordAndOffset.getReadBase());
+ assertEquals(30, abstractRecordAndOffset.getBaseQuality());
+ assertEquals(0, abstractRecordAndOffset.getOffset());
+ assertEquals(-1, abstractRecordAndOffset.getRefPos());
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java b/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java
index b988415..8a0d97f 100644
--- a/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java
+++ b/src/test/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java
@@ -23,12 +23,16 @@
*/
package htsjdk.samtools.util;
+import htsjdk.samtools.FileTruncatedException;
import htsjdk.samtools.util.zip.DeflaterFactory;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.BufferedReader;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
@@ -37,11 +41,13 @@ import java.util.zip.Deflater;
public class BlockCompressedOutputStreamTest {
+ private static final String HTSJDK_TRIBBLE_RESOURCES = "src/test/resources/htsjdk/tribble/";
+
@Test
public void testBasic() throws Exception {
final File f = File.createTempFile("BCOST.", ".gz");
f.deleteOnExit();
- final List<String> linesWritten = new ArrayList<String>();
+ final List<String> linesWritten = new ArrayList<>();
System.out.println("Creating file " + f);
final BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(f);
String s = "Hi, Mom!\n";
@@ -76,11 +82,54 @@ public class BlockCompressedOutputStreamTest {
bcis2.close();
}
- @Test
- public void testOverflow() throws Exception {
+ @DataProvider(name = "seekReadExceptionsData")
+ private Object[][] seekReadExceptionsData()
+ {
+ return new Object[][]{
+ {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.gz", FileTruncatedException.class,
+ BlockCompressedInputStream.PREMATURE_END_MSG + System.getProperty("user.dir") + "/" +
+ HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.gz", true, false, 0},
+ {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.hdr.gz", IOException.class,
+ BlockCompressedInputStream.INCORRECT_HEADER_SIZE_MSG + System.getProperty("user.dir") + "/" +
+ HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.truncated.hdr.gz", true, false, 0},
+ {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", IOException.class,
+ BlockCompressedInputStream.CANNOT_SEEK_STREAM_MSG, false, true, 0},
+ {HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", IOException.class,
+ BlockCompressedInputStream.INVALID_FILE_PTR_MSG + 1000 + " for " + System.getProperty("user.dir") + "/" +
+ HTSJDK_TRIBBLE_RESOURCES + "vcfexample.vcf.gz", true, true, 1000 }
+ };
+ }
+
+ @Test(dataProvider = "seekReadExceptionsData")
+ public void testSeekReadExceptions(final String filePath, final Class c, final String msg, final boolean isFile, final boolean isSeek, final int pos) throws Exception {
+
+ final BlockCompressedInputStream bcis = isFile ?
+ new BlockCompressedInputStream(new File(filePath)) :
+ new BlockCompressedInputStream(new FileInputStream(filePath));
+ boolean haveException = false;
+ try {
+ if ( isSeek ) {
+ bcis.seek(pos);
+ } else {
+ final BufferedReader reader = new BufferedReader(new InputStreamReader(bcis));
+ reader.readLine();
+ }
+ } catch (final Exception e) {
+ if ( e.getClass().equals(c) ) {
+ haveException = true;
+ Assert.assertEquals(e.getMessage(), msg);
+ }
+ }
+
+ if ( !haveException ) {
+ Assert.fail("Expected " + c.getSimpleName());
+ }
+ }
+
+ @Test public void testOverflow() throws Exception {
final File f = File.createTempFile("BCOST.", ".gz");
f.deleteOnExit();
- final List<String> linesWritten = new ArrayList<String>();
+ final List<String> linesWritten = new ArrayList<>();
System.out.println("Creating file " + f);
final BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(f);
Random r = new Random(15555);
diff --git a/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java b/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java
new file mode 100644
index 0000000..a5459c6
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/util/EdgeReadIteratorTest.java
@@ -0,0 +1,402 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMRecordSetBuilder;
+import htsjdk.samtools.SamReader;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+/**
+ * Tests check that for each alignment block of processed reads, iterator returns a <code>EdgingRecordAndOffset</code>
+ * with type <code>BEGIN</code> for the reference position of read start and a <code>EdgingRecordAndOffset</code> with
+ * type <code>END</code> for the reference position + 1 of read end.
+ */
+public class EdgeReadIteratorTest extends AbstractLocusIteratorTestTemplate {
+
+ @Override
+ @Test
+ public void testBasicIterator() {
+ final EdgeReadIterator sli = new EdgeReadIterator(createSamFileReader());
+ int pos = 1;
+ for (final AbstractLocusInfo<EdgingRecordAndOffset> li : sli) {
+ if (pos == 1 || pos == 37) {
+ assertEquals(pos++, li.getPosition());
+ assertEquals(2, li.getRecordAndOffsets().size());
+ } else {
+ assertEquals(pos++, li.getPosition());
+ assertEquals(0, li.getRecordAndOffsets().size());
+ }
+ }
+
+ }
+
+ /**
+ * Since EdgeReadIterator does not support emitting uncovered loci, this test just check that
+ * iterator return correctly aligned objects for start and end of a read.
+ */
+ @Override
+ @Test
+ public void testEmitUncoveredLoci() {
+ final SAMRecordSetBuilder builder = getRecordBuilder();
+ // add records up to coverage for the test in that position
+ final int startPosition = 165;
+ for (int i = 0; i < coverage; i++) {
+ // add a negative-strand fragment mapped on chrM with base quality of 10
+ builder.addFrag("record" + i, 0, startPosition, true, false, "36M", null, 10);
+ }
+ final int coveredEnd = CoordMath.getEnd(startPosition, readLength) +1;
+ final EdgeReadIterator sli = new EdgeReadIterator(builder.getSamReader());
+
+ int pos = 1;
+ final int coveredStart = 165;
+ for (final AbstractLocusInfo li : sli) {
+ Assert.assertEquals(li.getPosition(), pos++);
+ final int expectedReads;
+ if (li.getPosition() == coveredStart || li.getPosition() == coveredEnd) {
+ expectedReads = 2;
+ } else {
+ expectedReads = 0;
+ }
+ Assert.assertEquals(li.getRecordAndOffsets().size(), expectedReads);
+ }
+ Assert.assertEquals(pos, 100001);
+ }
+
+ /**
+ * Try all CIGAR operands (except H and P) and confirm that loci produced by SamLocusIterator are as expected.
+ */
+ @Override
+ @Test
+ public void testSimpleGappedAlignment() {final SAMRecordSetBuilder builder = getRecordBuilder();
+ // add records up to coverage for the test in that position
+ final int startPosition = 165;
+ for (int i = 0; i < coverage; i++) {
+ // add a negative-strand fragment mapped on chrM with base quality of 10
+ builder.addFrag("record" + i, 0, startPosition, true, false, "3S3M3N3M3D3M3I1N18M3S", null, 10);
+ }
+ final EdgeReadIterator sli = new EdgeReadIterator(builder.getSamReader());
+ while (sli.hasNext()) {
+ AbstractLocusInfo<EdgingRecordAndOffset> info = sli.next();
+ int pos = info.getPosition();
+ if (pos == startPosition || pos == startPosition + 6 || pos == startPosition + 12 || pos == startPosition + 16) {
+ assertEquals(EdgingRecordAndOffset.Type.BEGIN, info.getRecordAndOffsets().get(0).getType());
+ assertEquals(EdgingRecordAndOffset.Type.BEGIN, info.getRecordAndOffsets().get(1).getType());
+ } else if (pos == startPosition + 3 || pos == startPosition + 9 || pos == startPosition + 15 || pos == startPosition + 34) {
+ assertEquals(EdgingRecordAndOffset.Type.END, info.getRecordAndOffsets().get(0).getType());
+ assertEquals(EdgingRecordAndOffset.Type.END, info.getRecordAndOffsets().get(1).getType());
+ }
+ }
+ }
+
+ /**
+ * Test two reads that overlap because one has a deletion in the middle of it.
+ */
+ @Override
+ @Test
+ public void testOverlappingGappedAlignmentsWithoutIndels() {
+ final SAMRecordSetBuilder builder = getRecordBuilder();
+ // add records up to coverage for the test in that position
+ final int startPosition = 1;
+ // Were it not for the gap, these two reads would not overlap
+
+ builder.addFrag("record1", 0, startPosition, true, false, "18M10D18M", null, 10);
+ builder.addFrag("record2", 0, 41, true, false, "36M", null, 10);
+
+ final EdgeReadIterator sli = new EdgeReadIterator(builder.getSamReader());
+ // 5 base overlap btw the two reads
+ final int numBasesCovered = 81;
+ final int[] expectedReferencePositions = new int[numBasesCovered];
+ final int[] expectedDepths = new int[numBasesCovered];
+ final int[][] expectedReadOffsets = new int[numBasesCovered][];
+ List<Integer> start = Arrays.asList(0, 28, 40);
+ List<Integer> end = Arrays.asList(19, 47, 77);
+
+ int i;
+ // First 18 bases are from the first read
+ expectedDepths[0] = 1;
+ expectedReferencePositions[0] = 1;
+ expectedReadOffsets[0] = new int[]{0};
+
+ for (i = 1; i < 18; ++i) {
+ fillEmptyLocus(expectedReferencePositions, expectedDepths, expectedReadOffsets, i);
+ }
+ expectedDepths[i] = 1;
+ expectedReferencePositions[i] = 19;
+ expectedReadOffsets[i++] = new int[]{0};
+
+ for (; i < 28; ++i) {
+ fillEmptyLocus(expectedReferencePositions, expectedDepths, expectedReadOffsets, i);
+ }
+
+ // Gap of 10, then 13 bases from the first read
+ expectedDepths[i] = 1;
+ expectedReferencePositions[i] = 29;
+ expectedReadOffsets[i++] = new int[]{18};
+
+ for (; i < 40; ++i) {
+ fillEmptyLocus(expectedReferencePositions, expectedDepths, expectedReadOffsets, i);
+ }
+
+ expectedDepths[i] = 1;
+ expectedReferencePositions[i] = 41;
+ expectedReadOffsets[i++] = new int[]{0};
+
+ for (; i < 46; ++i) {
+ fillEmptyLocus(expectedReferencePositions, expectedDepths, expectedReadOffsets, i);
+ }
+
+ expectedDepths[i] = 1;
+ expectedReferencePositions[i] = 47;
+ expectedReadOffsets[i++] = new int[]{18};
+
+ // Last 5 bases of first read overlap first 5 bases of second read
+ for (; i < 76; ++i) {
+ fillEmptyLocus(expectedReferencePositions, expectedDepths, expectedReadOffsets, i);
+ }
+
+ expectedDepths[i] = 1;
+ expectedReferencePositions[i] = 77;
+ expectedReadOffsets[i++] = new int[]{0};
+
+ // Last 31 bases of 2nd read
+
+ for (; i <= 80; ++i) {
+ fillEmptyLocus(expectedReferencePositions, expectedDepths, expectedReadOffsets, i);
+ }
+
+ i = 0;
+ for (final AbstractLocusInfo<EdgingRecordAndOffset> li : sli) {
+ Assert.assertEquals(li.getRecordAndOffsets().size(), expectedDepths[i]);
+ Assert.assertEquals(li.getPosition(), expectedReferencePositions[i]);
+ Assert.assertEquals(li.getRecordAndOffsets().size(), expectedReadOffsets[i].length);
+ for (int j = 0; j < expectedReadOffsets[i].length; ++j) {
+ Assert.assertEquals(li.getRecordAndOffsets().get(j).getOffset(), expectedReadOffsets[i][j]);
+ if (start.contains(li.getPosition() - 1)) {
+ Assert.assertEquals(li.getRecordAndOffsets().get(j).getType(), EdgingRecordAndOffset.Type.BEGIN);
+ }
+ if (end.contains(li.getPosition() - 1)) {
+ Assert.assertEquals(li.getRecordAndOffsets().get(j).getType(), EdgingRecordAndOffset.Type.END);
+ }
+ }
+ ++i;
+ if (i == 80) {
+ break;
+ }
+ }
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void testSetQualityCutOff() {
+ final EdgeReadIterator sli = new EdgeReadIterator(createSamFileReader());
+
+ sli.setQualityScoreCutoff(10);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void testSetMaxReadsToAccumulatePerLocus() {
+ final EdgeReadIterator sli = new EdgeReadIterator(createSamFileReader());
+
+ sli.setMaxReadsToAccumulatePerLocus(100);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void testSetEmitUncoveredLoci() {
+ final EdgeReadIterator sli = new EdgeReadIterator(createSamFileReader());
+
+ sli.setEmitUncoveredLoci(false);
+ }
+
+ @Test(expectedExceptions = UnsupportedOperationException.class)
+ public void testSetIncludeIndels() {
+ final EdgeReadIterator sli = new EdgeReadIterator(createSamFileReader());
+
+ sli.setIncludeIndels(true);
+ }
+
+ /**
+ * Tests that reads, that don't intersect given interval list, are excluded from iterator
+ */
+ @Test
+ public void testNotIntersectingInterval() {
+ SamReader samReader = createSamFileReader();
+
+ IntervalList intervals = createIntervalList("@HD\tSO:coordinate\tVN:1.0\n" +
+ "@SQ\tSN:chrM\tLN:100\n" +
+ "chrM\t50\t60\t+\ttest");
+ EdgeReadIterator iterator = new EdgeReadIterator(samReader, intervals);
+ int locusPosition = 50;
+ while (iterator.hasNext()) {
+ AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
+ assertEquals(locusPosition++, next.getPosition());
+ assertEquals(0, next.getRecordAndOffsets().size());
+ }
+ assertEquals(61, locusPosition);
+ }
+
+ /**
+ * Tests that for reads, that intersect given interval list read start is shifted to the start of the interval and
+ * length is adjusted to the end of the interval.
+ */
+ @Test
+ public void testIntersectingInterval() {
+ SamReader samReader = createSamFileReader();
+ IntervalList intervals = createIntervalList("@HD\tSO:coordinate\tVN:1.0\n" +
+ "@SQ\tSN:chrM\tLN:100\n" +
+ "chrM\t5\t15\t+\ttest");
+ EdgeReadIterator iterator = new EdgeReadIterator(samReader, intervals);
+ int locusPosition = 5;
+ while (iterator.hasNext()) {
+ AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
+ int position = next.getPosition();
+ assertEquals(locusPosition++, position);
+ if (position == 5) {
+ assertEquals(2, next.getRecordAndOffsets().size());
+ for (EdgingRecordAndOffset record : next.getRecordAndOffsets()) {
+ assertEquals(11, record.getLength());
+ }
+ } else {
+ assertEquals(0, next.getRecordAndOffsets().size());
+ }
+ }
+ assertEquals(16, locusPosition);
+ }
+
+ /**
+ * Test for mixed reads: intersecting and not the interval
+ */
+ @Test
+ public void testIntersectingAndNotInterval() {
+
+ final SAMRecordSetBuilder builder = getRecordBuilder();
+ // add records up to coverage for the test in that position
+ final int startPosition = 40;
+ // Were it not for the gap, these two reads would not overlap
+ builder.addFrag("record2", 0, startPosition, true, false, "36M", null, 10);
+
+ IntervalList intervals = createIntervalList("@HD\tSO:coordinate\tVN:1.0\n" +
+ "@SQ\tSN:chrM\tLN:100\n" +
+ "chrM\t40\t80\t+\ttest");
+
+ EdgeReadIterator iterator = new EdgeReadIterator(builder.getSamReader(), intervals);
+ int locusPosition = 40;
+ while (iterator.hasNext()) {
+ AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
+ int position = next.getPosition();
+ assertEquals(locusPosition++, position);
+ if (position == 40) {
+ assertEquals(1, next.getRecordAndOffsets().size());
+ for (EdgingRecordAndOffset record : next.getRecordAndOffsets()) {
+ assertEquals(36, record.getLength());
+ assertEquals(EdgingRecordAndOffset.Type.BEGIN, record.getType());
+ }
+ } else if (position == 76) {
+ assertEquals(1, next.getRecordAndOffsets().size());
+ for (EdgingRecordAndOffset record : next.getRecordAndOffsets()) {
+ assertEquals(36, record.getLength());
+ assertEquals(EdgingRecordAndOffset.Type.END, record.getType());
+ }
+ } else {
+ assertEquals(0, next.getRecordAndOffsets().size());
+ }
+ }
+ assertEquals(81, locusPosition);
+ }
+
+
+ /**
+ * Test for intersecting interval for read with a deletion in the middle
+ */
+ @Test
+ public void testIntersectingIntervalWithComplicatedCigar() {
+
+ final SAMRecordSetBuilder builder = getRecordBuilder();
+ // add records up to coverage for the test in that position
+ final int startPosition = 1;
+ // Were it not for the gap, these two reads would not overlap
+ builder.addFrag("record", 0, startPosition, true, false, "10M3D26M", null, 10);
+
+ IntervalList intervals = createIntervalList("@HD\tSO:coordinate\tVN:1.0\n" +
+ "@SQ\tSN:chrM\tLN:100\n" +
+ "chrM\t5\t20\t+\ttest");
+
+ EdgeReadIterator iterator = new EdgeReadIterator(builder.getSamReader(), intervals);
+ int locusPosition = 5;
+ int[] expectedLength = new int[]{6, 7};
+ int i = 0;
+ while (iterator.hasNext()) {
+ AbstractLocusInfo<EdgingRecordAndOffset> next = iterator.next();
+ int position = next.getPosition();
+ assertEquals(locusPosition++, position);
+ if (position == 5 || position == 14) {
+ assertEquals(1, next.getRecordAndOffsets().size());
+ for (EdgingRecordAndOffset record : next.getRecordAndOffsets()) {
+ assertEquals(expectedLength[i], record.getLength());
+ assertEquals(EdgingRecordAndOffset.Type.BEGIN, record.getType());
+ }
+ } else if (position == 11) {
+ assertEquals(1, next.getRecordAndOffsets().size());
+ for (EdgingRecordAndOffset record : next.getRecordAndOffsets()) {
+ assertEquals(expectedLength[i], record.getLength());
+ assertEquals(EdgingRecordAndOffset.Type.END, record.getType());
+ }
+ i++;
+ } else {
+ assertEquals(0, next.getRecordAndOffsets().size());
+ }
+ }
+ assertEquals(21, locusPosition);
+ }
+
+
+ private void fillEmptyLocus(int[] expectedReferencePositions, int[] expectedDepths, int[][] expectedReadOffsets, int i) {
+ expectedReferencePositions[i] = i + 1;
+ expectedDepths[i] = 0;
+ expectedReadOffsets[i] = new int[]{};
+ }
+
+ private SamReader createSamFileReader() {
+ final SAMRecordSetBuilder builder = getRecordBuilder();
+ // add records up to coverage for the test in that position
+ final int startPosition = 1;
+ for (int i = 0; i < coverage; i++) {
+ // add a negative-strand fragment mapped on chrM with base quality of 10
+ builder.addFrag("record" + i, 0, startPosition, true, false, "36M", null, 10);
+ }
+ return builder.getSamReader();
+ }
+
+
+ private IntervalList createIntervalList(String s) {
+ return IntervalList.fromReader(new BufferedReader(new InputStreamReader(new ByteArrayInputStream(s.getBytes()))));
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java b/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java
new file mode 100644
index 0000000..a4f6478
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/util/EdgingRecordAndOffsetTest.java
@@ -0,0 +1,94 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.Test;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
+
+/**
+ *
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
+ *
+ */
+
+public class EdgingRecordAndOffsetTest {
+ private final byte[] qualities = {30, 50, 50, 60, 60, 70 ,70, 70, 80, 90};
+ private final byte[] bases = {'A', 'C', 'G', 'T', 'A', 'C', 'G', 'T', 'T', 'C'};
+ private SAMRecord record;
+
+ @BeforeTest
+ public void setUp(){
+ record = new SAMRecord(new SAMFileHeader());
+ record.setReadName("testRecord");
+ record.setReadBases(bases);
+ record.setBaseQualities(qualities);
+ }
+
+ @Test
+ public void testConstructor(){
+ EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3);
+ assertArrayEquals(qualities, typedRecordAndOffset.getBaseQualities());
+ assertArrayEquals(bases, typedRecordAndOffset.getRecord().getReadBases());
+ assertEquals('A', typedRecordAndOffset.getReadBase());
+ assertEquals(0, typedRecordAndOffset.getOffset());
+ assertEquals(3, typedRecordAndOffset.getRefPos());
+ assertEquals(EdgingRecordAndOffset.Type.BEGIN, typedRecordAndOffset.getType());
+ }
+
+ @Test
+ public void testGetSetStart(){
+ EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3);
+ EdgingRecordAndOffset typedRecordAndOffsetEnd = EdgingRecordAndOffset.createEndRecord(typedRecordAndOffset);
+ assertEquals(typedRecordAndOffset, typedRecordAndOffsetEnd.getStart());
+ assertEquals(EdgingRecordAndOffset.Type.END, typedRecordAndOffsetEnd.getType());
+ }
+
+ @Test
+ public void testNotEqualsTypedRecords(){
+ EdgingRecordAndOffset typedRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 3);
+ EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 5, 10, 3);
+ assertNotSame(typedRecordAndOffset.getBaseQuality(), secondEdgingRecordAndOffset.getBaseQuality());
+ assertArrayEquals(typedRecordAndOffset.getBaseQualities(), secondEdgingRecordAndOffset.getBaseQualities());
+ }
+
+ @Test
+ public void testGetOffset(){
+ EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 5, 10, 3);
+ assertEquals(70, secondEdgingRecordAndOffset.getBaseQuality());
+ assertEquals('C', secondEdgingRecordAndOffset.getReadBase());
+ }
+
+ @Test
+ public void testGetQualityAtPosition(){
+ EdgingRecordAndOffset secondEdgingRecordAndOffset = EdgingRecordAndOffset.createBeginRecord(record, 0, 10, 1);
+ assertEquals(50, secondEdgingRecordAndOffset.getBaseQuality(2));
+ }
+}
diff --git a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java
index 17e77b2..262b7c9 100644
--- a/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java
+++ b/src/test/java/htsjdk/samtools/util/SamLocusIteratorTest.java
@@ -23,40 +23,15 @@
*/
package htsjdk.samtools.util;
-import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecordSetBuilder;
-import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
import org.testng.Assert;
import org.testng.annotations.Test;
/**
* @author alecw at broadinstitute.org
+ * @author Mariia_Zueva at epam.com, EPAM Systems, Inc. <www.epam.com>
*/
-public class SamLocusIteratorTest {
-
- /** Coverage for tests with the same reads */
- final static int coverage = 2;
-
- /** the read length for the testss */
- final static int readLength = 36;
-
- final static SAMFileHeader header = new SAMFileHeader();
-
- static {
- header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
- SAMSequenceDictionary dict = new SAMSequenceDictionary();
- dict.addSequence(new SAMSequenceRecord("chrM", 100000));
- header.setSequenceDictionary(dict);
- }
-
- /** Get the record builder for the tests with the default parameters that are needed */
- private static SAMRecordSetBuilder getRecordBuilder() {
- final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
- builder.setHeader(header);
- builder.setReadLength(readLength);
- return builder;
- }
+public class SamLocusIteratorTest extends AbstractLocusIteratorTestTemplate {
/** Create the SamLocusIterator with the builder*/
private SamLocusIterator createSamLocusIterator(final SAMRecordSetBuilder builder) {
@@ -68,6 +43,7 @@ public class SamLocusIteratorTest {
/**
* Test a simple with only matches, with both including or not indels
*/
+ @Override
@Test
public void testBasicIterator() {
final SAMRecordSetBuilder builder = getRecordBuilder();
@@ -118,6 +94,7 @@ public class SamLocusIteratorTest {
/**
* Test the emit uncovered loci, with both including or not indels
*/
+ @Override
@Test
public void testEmitUncoveredLoci() {
@@ -459,6 +436,7 @@ public class SamLocusIteratorTest {
* Try all CIGAR operands (except H and P) and confirm that loci produced by SamLocusIterator are as expected,
* with both including or not indels
*/
+ @Override
@Test
public void testSimpleGappedAlignment() {
final SAMRecordSetBuilder builder = getRecordBuilder();
@@ -551,6 +529,7 @@ public class SamLocusIteratorTest {
/**
* Test two reads that overlap because one has a deletion in the middle of it, without tracking indels
*/
+ @Override
@Test
public void testOverlappingGappedAlignmentsWithoutIndels() {
final SAMRecordSetBuilder builder = getRecordBuilder();
@@ -691,4 +670,4 @@ public class SamLocusIteratorTest {
}
}
-}
+}
\ No newline at end of file
diff --git a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java
index 474a8a8..dbf23a0 100644
--- a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java
+++ b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java
@@ -52,17 +52,17 @@ public class BEDCodecTest {
BEDFeature feature;
feature = codec.decode("chr1 1");
- Assert.assertEquals(feature.getChr(), "chr1");
+ Assert.assertEquals(feature.getContig(), "chr1");
Assert.assertEquals(feature.getStart(), 2);
Assert.assertEquals(feature.getEnd(), 2);
feature = codec.decode("chr1 1 2");
- Assert.assertEquals(feature.getChr(), "chr1");
+ Assert.assertEquals(feature.getContig(), "chr1");
Assert.assertEquals(feature.getStart(), 2);
Assert.assertEquals(feature.getEnd(), 2);
feature = codec.decode("chr1 1 3");
- Assert.assertEquals(feature.getChr(), "chr1");
+ Assert.assertEquals(feature.getContig(), "chr1");
Assert.assertEquals(feature.getStart(), 2);
Assert.assertEquals(feature.getEnd(), 3);
}
@@ -77,7 +77,7 @@ public class BEDCodecTest {
// Borrowed samples from Example: on http://genome.ucsc.edu/FAQ/FAQformat#format1
feature = (FullBEDFeature) codec.decode("chr22 1000 5000 cloneA 960 + 1000 5000 0 2 567,488, 0,3512");
- Assert.assertEquals(feature.getChr(), "chr22");
+ Assert.assertEquals(feature.getContig(), "chr22");
Assert.assertEquals(feature.getStart(), 1001);
Assert.assertEquals(feature.getEnd(), 5000);
Assert.assertEquals(feature.getName(), "cloneA");
@@ -103,7 +103,7 @@ public class BEDCodecTest {
Assert.assertEquals(exons.get(1).getCodingLength(), 488);
feature = (FullBEDFeature) codec.decode("chr22 2000 6000 cloneB 900 - 2000 6000 0 2 433,399, 0,3601");
- Assert.assertEquals(feature.getChr(), "chr22");
+ Assert.assertEquals(feature.getContig(), "chr22");
Assert.assertEquals(feature.getStart(), 2001);
Assert.assertEquals(feature.getEnd(), 6000);
Assert.assertEquals(feature.getName(), "cloneB");
@@ -150,23 +150,23 @@ public class BEDCodecTest {
Iterable<Feature> iter = reader.iterator();
int count = 0;
for (Feature feat : iter) {
- Assert.assertTrue(feat.getChr().length() > 0);
+ Assert.assertTrue(feat.getContig().length() > 0);
Assert.assertTrue(feat.getEnd() >= feat.getStart());
if (count == 0) {
- Assert.assertEquals("1", feat.getChr());
+ Assert.assertEquals("1", feat.getContig());
Assert.assertEquals(25592413 + 1, feat.getStart());
Assert.assertEquals(25657872, feat.getEnd());
}
if (count == 3) {
- Assert.assertEquals("1", feat.getChr());
+ Assert.assertEquals("1", feat.getContig());
Assert.assertEquals(152555536 + 1, feat.getStart());
Assert.assertEquals(152587611, feat.getEnd());
}
if (count == 28) {
- Assert.assertEquals("14", feat.getChr());
+ Assert.assertEquals("14", feat.getContig());
Assert.assertEquals(73996607 + 1, feat.getStart());
Assert.assertEquals(74025282, feat.getEnd());
}
diff --git a/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java b/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java
index 681cc6b..c670bf1 100644
--- a/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java
+++ b/src/test/java/htsjdk/tribble/gelitext/GeliTextTest.java
@@ -77,7 +77,7 @@ public class GeliTextTest {
GeliTextFeature feat = iter.next();
// check the first records contents
// 22 14438070 A 0 0 GG 33.2618 33.2618 0 0 0 0 0 0 0 33.2618 0 0
- Assert.assertTrue("22".equals(feat.getChr()));
+ Assert.assertTrue("22".equals(feat.getContig()));
Assert.assertEquals(feat.getStart(), 14438070);
Assert.assertEquals('A', feat.getRefBase());
Assert.assertEquals(feat.getDepthOfCoverage(), 0.0, 0.0001);
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
index 868aacc..613dec5 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextTestProvider.java
@@ -831,7 +831,7 @@ public class VariantContextTestProvider {
*/
public static void assertEquals( final VariantContext actual, final VariantContext expected ) {
Assert.assertNotNull(actual, "VariantContext expected not null");
- Assert.assertEquals(actual.getChr(), expected.getChr(), "chr");
+ Assert.assertEquals(actual.getContig(), expected.getContig(), "chr");
Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
Assert.assertEquals(actual.getID(), expected.getID(), "id");
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java
index be55f8f..14056f8 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantContextUnitTest.java
@@ -26,6 +26,9 @@
package htsjdk.variant.variantcontext;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.CloserUtil;
+
// the imports for unit testing.
import htsjdk.samtools.util.TestUtil;
@@ -35,8 +38,9 @@ import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.bcf2.BCF2Codec;
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.tribble.TribbleException;
-import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFFileReader;
+
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeSuite;
@@ -184,7 +188,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
List<Allele> alleles = Arrays.asList(Aref, T);
VariantContext vc = snpBuilder.alleles(alleles).make();
- Assert.assertEquals(vc.getChr(), snpLoc);
+ Assert.assertEquals(vc.getContig(), snpLoc);
Assert.assertEquals(vc.getStart(), snpLocStart);
Assert.assertEquals(vc.getEnd(), snpLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);
@@ -212,7 +216,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
List<Allele> alleles = Arrays.asList(Aref);
VariantContext vc = snpBuilder.alleles(alleles).make();
- Assert.assertEquals(vc.getChr(), snpLoc);
+ Assert.assertEquals(vc.getContig(), snpLoc);
Assert.assertEquals(vc.getStart(), snpLocStart);
Assert.assertEquals(vc.getEnd(), snpLocStop);
Assert.assertEquals(VariantContext.Type.NO_VARIATION, vc.getType());
@@ -239,7 +243,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
List<Allele> alleles = Arrays.asList(ATCref, del);
VariantContext vc = new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
- Assert.assertEquals(vc.getChr(), delLoc);
+ Assert.assertEquals(vc.getContig(), delLoc);
Assert.assertEquals(vc.getStart(), delLocStart);
Assert.assertEquals(vc.getEnd(), delLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
@@ -267,7 +271,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
List<Allele> alleles = Arrays.asList(Tref, ATC);
VariantContext vc = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, alleles).make();
- Assert.assertEquals(vc.getChr(), insLoc);
+ Assert.assertEquals(vc.getContig(), insLoc);
Assert.assertEquals(vc.getStart(), insLocStart);
Assert.assertEquals(vc.getEnd(), insLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
@@ -305,7 +309,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
List<Allele> alleles = Arrays.asList(delRef, ATC);
VariantContext vc = insBuilder.alleles(alleles).make();
- Assert.assertEquals(vc.getChr(), insLoc);
+ Assert.assertEquals(vc.getContig(), insLoc);
Assert.assertEquals(vc.getStart(), insLocStart);
Assert.assertEquals(vc.getEnd(), insLocStop);
Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);
@@ -464,6 +468,8 @@ public class VariantContextUnitTest extends VariantBaseTest {
Assert.assertEquals(4, vc.getCalledChrCount(T));
Assert.assertEquals(3, vc.getCalledChrCount(ATC));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
+
+ Assert.assertEquals(T, vc.getAltAlleleWithHighestAlleleCount());
}
@Test
@@ -487,6 +493,16 @@ public class VariantContextUnitTest extends VariantBaseTest {
Assert.assertEquals(4, vc.getCalledChrCount(Aref));
Assert.assertEquals(0, vc.getCalledChrCount(T));
Assert.assertEquals(2, vc.getCalledChrCount(Allele.NO_CALL));
+
+ //bi allelic, only one alt allele
+ Allele expected;
+ if (alleles.size()>1) {
+ expected = alleles.get(1);
+ } else {
+ expected = null;
+ }
+
+ Assert.assertEquals( vc.getAltAlleleWithHighestAlleleCount(), expected);
}
}
@@ -600,6 +616,21 @@ public class VariantContextUnitTest extends VariantBaseTest {
Assert.assertEquals(4, vc125.getCalledChrCount(Aref));
}
+ @Test
+ public void testMonomorphicVariant() {
+ Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
+ Genotype g2 = GenotypeBuilder.create("BB", Arrays.asList(Aref, Allele.NO_CALL));
+ Genotype g3 = GenotypeBuilder.create("CC", Arrays.asList(Allele.NO_CALL,Allele.NO_CALL));
+ GenotypesContext gc = GenotypesContext.create(g1, g2, g3);
+ VariantContext vc = new VariantContextBuilder("genotypes", snpLoc, snpLocStart, snpLocStop, Collections.singletonList(Aref)).genotypes(gc).make();
+
+ Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);
+ Assert.assertNull(vc.getAltAlleleWithHighestAlleleCount());
+ Assert.assertEquals(vc.getCalledChrCount(Aref), 3);
+
+ }
+
+
public void testGetGenotypeMethods() {
Genotype g1 = GenotypeBuilder.create("AA", Arrays.asList(Aref, Aref));
Genotype g2 = GenotypeBuilder.create("AT", Arrays.asList(Aref, T));
@@ -732,7 +763,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
@Test(dataProvider = "SitesAndGenotypesVC")
public void runModifyVCTests(SitesAndGenotypesVC cfg) {
VariantContext modified = new VariantContextBuilder(cfg.vc).loc("chr2", 123, 123).make();
- Assert.assertEquals(modified.getChr(), "chr2");
+ Assert.assertEquals(modified.getContig(), "chr2");
Assert.assertEquals(modified.getStart(), 123);
Assert.assertEquals(modified.getEnd(), 123);
@@ -752,7 +783,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
Assert.assertEquals(modified.getAttribute("AC"), 1);
// test the behavior when the builder's attribute object is not initialized
- modified = new VariantContextBuilder(modified.getSource(), modified.getChr(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make();
+ modified = new VariantContextBuilder(modified.getSource(), modified.getContig(), modified.getStart(), modified.getEnd(), modified.getAlleles()).attribute("AC", 1).make();
// test normal attribute modification
modified = new VariantContextBuilder(cfg.vc).attribute("AC", 1).make();
@@ -770,7 +801,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
Assert.assertTrue(modified.getGenotypes().isEmpty());
// test that original hasn't changed
- Assert.assertEquals(cfg.vc.getChr(), cfg.copy.getChr());
+ Assert.assertEquals(cfg.vc.getContig(), cfg.copy.getContig());
Assert.assertEquals(cfg.vc.getStart(), cfg.copy.getStart());
Assert.assertEquals(cfg.vc.getEnd(), cfg.copy.getEnd());
Assert.assertEquals(cfg.vc.getAlleles(), cfg.copy.getAlleles());
@@ -832,7 +863,7 @@ public class VariantContextUnitTest extends VariantBaseTest {
VariantContext sub = vc.subContextFromSamples(cfg.samples, cfg.updateAlleles);
// unchanged attributes should be the same
- Assert.assertEquals(sub.getChr(), vc.getChr());
+ Assert.assertEquals(sub.getContig(), vc.getContig());
Assert.assertEquals(sub.getStart(), vc.getStart());
Assert.assertEquals(sub.getEnd(), vc.getEnd());
Assert.assertEquals(sub.getLog10PError(), vc.getLog10PError());
@@ -1228,13 +1259,34 @@ public class VariantContextUnitTest extends VariantBaseTest {
final VariantContext vcACSetTwoAlts =
createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACTwoAlts, hetVarTC);
+ // with AC set, and two different ALTs (T and C), with no GT, we expect a 2 count values.
+ final Map<String, Object> attributesACNoGtTwoAlts = new HashMap<String, Object>();
+ attributesACNoGtTwoAlts.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1", "1"));
+ final VariantContext vcACNoGtSetTwoAlts =
+ createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACNoGtTwoAlts, null);
+
+ // with AF set, and two different ALTs (T and C), with GT of 1/2, we expect two frequncy values.
+ // With two ALTs, a list is expected, so we set the attribute as a list of 0.5,0.5
+ final Map<String, Object> attributesAFTwoAlts = new HashMap<String, Object>();
+ attributesAFTwoAlts.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5", "0.5"));
+ final VariantContext vcAFSetTwoAlts =
+ createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFTwoAlts, hetVarTC);
+
+ // with AF set, and two different ALTs (T and C), with no GT, we expect two frequency values.
+ final Map<String, Object> attributesAFNoGtTwoAlts = new HashMap<String, Object>();
+ attributesAFNoGtTwoAlts.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5", "0.5"));
+ final VariantContext vcAFNoGtSetTwoAlts =
+ createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFNoGtTwoAlts, null);
+
return new Object[][]{
{vcNoGenotypes},
{vcANSet},
{vcANSetNoCall},
{vcACSet},
{vcACSetNoAlts},
- {vcACSetTwoAlts}
+ {vcACNoGtSetTwoAlts},
+ {vcAFSetTwoAlts},
+ {vcAFNoGtSetTwoAlts}
};
}
@Test(dataProvider = "testValidateChromosomeCountsDataProvider")
@@ -1288,13 +1340,34 @@ public class VariantContextUnitTest extends VariantBaseTest {
final VariantContext vcACSetTwoAltsOneAltCount =
createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACTwoAltsOneAltCount, hetVarTC);
+ // with AC set, no GT, two ALTs, but only count for one ALT (we expect two items in the list: 1,1)
+ final Map<String, Object> attributesACNoGtTwoAltsOneAltCount = new HashMap<String, Object>();
+ attributesACNoGtTwoAltsOneAltCount.put(VCFConstants.ALLELE_COUNT_KEY, Arrays.asList("1"));
+ final VariantContext vcACNoGtSetTwoAltsOneAltCount =
+ createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesACNoGtTwoAltsOneAltCount, null);
+
+ // with AF set, two ALTs, but only frequency for one ALT (we expect two items in the list
+ final Map<String, Object> attributesAFTwoAltsWrongFreq = new HashMap<String, Object>();
+ attributesAFTwoAltsWrongFreq.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5"));
+ final VariantContext vcAFSetTwoAltsWrongFreq =
+ createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFTwoAltsWrongFreq, hetVarTC);
+
+ // with AF set, no GT, two ALTs, but only frequency for one ALT (we expect two items in the list
+ final Map<String, Object> attributesAFNoGtTwoAltsWrongCount = new HashMap<String, Object>();
+ attributesAFNoGtTwoAltsWrongCount.put(VCFConstants.ALLELE_FREQUENCY_KEY, Arrays.asList("0.5"));
+ final VariantContext vcAFNoGtSetTwoAltsWrongFreq =
+ createValidateChromosomeCountsContext(Arrays.asList(Aref, T, C), attributesAFNoGtTwoAltsWrongCount, null);
+
return new Object[][]{
{vcANSet},
{vcANSetNoCall},
{vcACWrongCount},
{vcACSetTwoAlts},
{vcACSetTwoAltsWrongCount},
- {vcACSetTwoAltsOneAltCount}
+ {vcACSetTwoAltsOneAltCount},
+ {vcACNoGtSetTwoAltsOneAltCount},
+ {vcAFSetTwoAltsWrongFreq},
+ {vcAFNoGtSetTwoAltsWrongFreq}
};
}
@Test(dataProvider = "testValidateChromosomeCountsFailureDataProvider", expectedExceptions = TribbleException.class)
@@ -1447,4 +1520,102 @@ public class VariantContextUnitTest extends VariantBaseTest {
// extraStrictValidation throws exceptions if it fails, so no Asserts here...
vc.extraStrictValidation(reportedReference, observedReference, rsIDs);
}
+
+
+ @DataProvider(name = "structuralVariationsTestData")
+ public Object[][] getStructuralVariationsTestData() {
+ return new Object[][] {
+ {new File("src/test/resources/htsjdk/variant/structuralvariants.vcf")}
+ };
+ }
+
+ @Test(dataProvider = "structuralVariationsTestData")
+ public void testExtractStructuralVariationsData(final File vcfFile) {
+ VCFFileReader reader = null;
+ CloseableIterator<VariantContext> iter = null;
+ try {
+ reader = new VCFFileReader(vcfFile , false );
+ iter = reader.iterator();
+ while(iter.hasNext()) {
+ final VariantContext ctx = iter.next();
+ final StructuralVariantType st = ctx.getStructuralVariantType();
+ Assert.assertNotNull(st);
+ }
+ } finally {
+ CloserUtil.close(iter);
+ CloserUtil.close(reader);
+ }
+ }
+
+ @Test
+ public void testGetAttributeAsIntList() {
+ final VariantContext context = basicBuilder
+ .attribute("Empty", new int[0])
+ .attribute("DefaultIntegerList", new int[5])
+ .attribute("ListWithMissing", new Object[]{1, null, null})
+ .attribute("IntegerList", new int[]{0, 1, 2, 3})
+ .attribute("DoubleList", new double[]{1.8, 1.6, 2.1})
+ .attribute("StringList", new String[]{"1", "2"})
+ .attribute("NotNumeric", new String[]{"A", "B"})
+ .make();
+ // test an empty value
+ Assert.assertTrue(context.getAttributeAsIntList("Empty", 5).isEmpty());
+ // test as integer
+ Assert.assertEquals(context.getAttributeAsIntList("DefaultIntegerList", 5), Arrays.asList(0, 0, 0, 0, 0));
+ Assert.assertEquals(context.getAttributeAsIntList("ListWithMissing", 5), Arrays.asList(1, 5, 5));
+ Assert.assertEquals(context.getAttributeAsIntList("IntegerList", 5), Arrays.asList(0, 1, 2, 3));
+ Assert.assertEquals(context.getAttributeAsIntList("DoubleList", 5), Arrays.asList(1, 1, 2));
+ Assert.assertEquals(context.getAttributeAsIntList("StringList", 5), Arrays.asList(1, 2));
+ Assert.assertThrows(() -> context.getAttributeAsIntList("NotNumeric", 5));
+ // test the case of a missing key
+ Assert.assertTrue(context.getAttributeAsIntList("MissingList", 5).isEmpty());
+ }
+
+ @Test
+ public void testGetAttributeAsDoubleList() {
+ final VariantContext context = basicBuilder
+ .attribute("Empty", new int[0])
+ .attribute("DefaultIntegerList", new int[5])
+ .attribute("ListWithMissing", new Object[]{1, null, null})
+ .attribute("IntegerList", new int[]{0, 1, 2, 3})
+ .attribute("DoubleList", new double[]{1.8, 1.6, 2.1})
+ .attribute("StringList", new String[]{"1", "2"})
+ .attribute("NotNumeric", new String[]{"A", "B"})
+ .make();
+ // test an empty value
+ Assert.assertTrue(context.getAttributeAsDoubleList("Empty", 5).isEmpty());
+ // test as double
+ Assert.assertEquals(context.getAttributeAsDoubleList("DefaultIntegerList", 5), Arrays.asList(0d, 0d, 0d, 0d, 0d));
+ Assert.assertEquals(context.getAttributeAsDoubleList("ListWithMissing", 5), Arrays.asList(1d, 5d, 5d));
+ Assert.assertEquals(context.getAttributeAsDoubleList("IntegerList", 5), Arrays.asList(0d, 1d, 2d, 3d));
+ Assert.assertEquals(context.getAttributeAsDoubleList("DoubleList", 5), Arrays.asList(1.8, 1.6, 2.1));
+ Assert.assertEquals(context.getAttributeAsDoubleList("StringList", 5), Arrays.asList(1d, 2d));
+ Assert.assertThrows(() -> context.getAttributeAsDoubleList("NotNumeric", 5));
+ // test the case of a missing key
+ Assert.assertTrue(context.getAttributeAsDoubleList("MissingList", 5).isEmpty());
+ }
+
+ @Test
+ public void testGetAttributeAsStringList() {
+ final VariantContext context = basicBuilder
+ .attribute("Empty", new int[0])
+ .attribute("DefaultIntegerList", new int[5])
+ .attribute("ListWithMissing", new Object[]{1, null, null})
+ .attribute("IntegerList", new int[]{0, 1, 2, 3})
+ .attribute("DoubleList", new double[]{1.8, 1.6, 2.1})
+ .attribute("StringList", new String[]{"1", "2"})
+ .attribute("NotNumeric", new String[]{"A", "B"})
+ .make();
+ // test an empty value
+ Assert.assertTrue(context.getAttributeAsStringList("Empty", "empty").isEmpty());
+ // test as string
+ Assert.assertEquals(context.getAttributeAsStringList("DefaultIntegerList", "empty"), Arrays.asList("0", "0", "0", "0", "0"));
+ Assert.assertEquals(context.getAttributeAsStringList("ListWithMissing", "empty"), Arrays.asList("1", "empty", "empty"));
+ Assert.assertEquals(context.getAttributeAsStringList("IntegerList", "empty"), Arrays.asList("0", "1", "2", "3"));
+ Assert.assertEquals(context.getAttributeAsStringList("DoubleList", "empty"), Arrays.asList("1.8", "1.6", "2.1"));
+ Assert.assertEquals(context.getAttributeAsStringList("StringList", "empty"), Arrays.asList("1", "2"));
+ Assert.assertEquals(context.getAttributeAsStringList("NotNumeric", "empty"), Arrays.asList("A", "B"));
+ // test the case of a missing key
+ Assert.assertTrue(context.getAttributeAsStringList("MissingList", "empty").isEmpty());
+ }
}
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
index bebd393..78bf565 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
@@ -31,14 +31,10 @@ import htsjdk.variant.variantcontext.VariantContextUtils.JexlVCMatchExp;
import htsjdk.variant.vcf.VCFConstants;
import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
/**
@@ -55,6 +51,10 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
private static final VariantContextUtils.JexlVCMatchExp exp
= new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression("QUAL > 500.0"));
+ private static final JexlVCMatchExp missingValueExpression = new VariantContextUtils.JexlVCMatchExp(
+ "Zis10", VariantContextUtils.engine.get().createExpression("Z==10"));
+
+
// SNP alleles: A[ref]/T[alt] at chr1:10. One (crappy) sample, one (bare minimum) VC.
private static final SimpleFeature eventLoc = new SimpleFeature("chr1", 10, 10);
private static final Allele Aref = Allele.create("A", true);
@@ -87,7 +87,45 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
// eval our known expression
Assert.assertTrue(!jexlMap.get(exp));
}
-
+
+ @Test(dataProvider = "getMissingValueTestData")
+ public void testMissingBehaviorThroughMatch(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class<? extends Exception> expectedException){
+ if(expectedException == null) {
+ Assert.assertEquals(VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment), expected);
+ } else {
+ Assert.assertThrows(expectedException, () -> VariantContextUtils.match(vc, null, missingValueExpression, missingValueTreatment));
+ }
+ }
+
+ @Test(dataProvider = "getMissingValueTestData")
+ public void testMissingBehavior(VariantContext vc, JexlMissingValueTreatment missingValueTreatment, boolean expected, Class<? extends Exception> expectedException){
+ final JEXLMap jexlMap = new JEXLMap(Collections.singletonList(missingValueExpression), vc, null, missingValueTreatment);
+ if(expectedException == null) {
+ Assert.assertEquals((boolean) jexlMap.get(missingValueExpression), expected);
+ } else {
+ Assert.assertThrows(expectedException, () -> jexlMap.get(missingValueExpression));
+ }
+ }
+
+ @DataProvider
+ public Object[][] getMissingValueTestData(){
+ final List<Allele> alleles = Arrays.asList(Aref, Talt);
+ VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles);
+ VariantContext noZ = vcb.make();
+ VariantContext hasZ = vcb.attribute("Z", 0).make();
+
+ return new Object[][]{
+ {noZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null},
+ {hasZ, JEXLMap.DEFAULT_MISSING_VALUE_TREATMENT, false, null}, //the value isn't missing but the expression is false
+ {noZ, JexlMissingValueTreatment.TREAT_AS_MATCH, true, null},
+ {hasZ, JexlMissingValueTreatment.TREAT_AS_MATCH, false, null}, //the value isn't missing but the expression is false
+ {noZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null},
+ {hasZ, JexlMissingValueTreatment.TREAT_AS_MISMATCH, false, null},
+ {noZ, JexlMissingValueTreatment.THROW, false, IllegalArgumentException.class},
+ {hasZ, JexlMissingValueTreatment.THROW, false, null}
+ };
+ }
+
// Testing the new 'FT' and 'isPassFT' expressions in the JEXL map
@Test
public void testJEXLGenotypeFilters() {
diff --git a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index af875fc..e9135cc 100644
--- a/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/test/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -196,7 +196,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
@Test
public void testVCFHeaderAddFilterLine() {
final VCFHeader header = getHiSeqVCFHeader();
- final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine");
+ final String filterDesc = "TestFilterLine Description";
+ final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine",filterDesc);
+ Assert.assertEquals(filterDesc,filterLine.getDescription());
header.addMetaDataLine(filterLine);
Assert.assertTrue(header.getFilterLines().contains(filterLine), "TestFilterLine not found in filter header lines");
diff --git a/src/test/resources/htsjdk/tribble/vcfexample.vcf.truncated.gz b/src/test/resources/htsjdk/tribble/vcfexample.vcf.truncated.gz
new file mode 100644
index 0000000..eaeb499
Binary files /dev/null and b/src/test/resources/htsjdk/tribble/vcfexample.vcf.truncated.gz differ
diff --git a/src/test/resources/htsjdk/tribble/vcfexample.vcf.truncated.hdr.gz b/src/test/resources/htsjdk/tribble/vcfexample.vcf.truncated.hdr.gz
new file mode 100644
index 0000000..bbea6c9
Binary files /dev/null and b/src/test/resources/htsjdk/tribble/vcfexample.vcf.truncated.hdr.gz differ
diff --git a/src/test/resources/htsjdk/variant/structuralvariants.vcf b/src/test/resources/htsjdk/variant/structuralvariants.vcf
new file mode 100644
index 0000000..5ffad2f
--- /dev/null
+++ b/src/test/resources/htsjdk/variant/structuralvariants.vcf
@@ -0,0 +1,22 @@
+##fileformat=VCFv4.2
+##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">
+##INFO=<ID=END_CHR,Number=A,Type=String,Description="End chromosome of the variant described in this record">
+##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
+##INFO=<ID=SVLEN,Number=A,Type=Integer,Description="Difference in length between REF and ALT alleles">
+##INFO=<ID=SVTYPE,Number=A,Type=String,Description="Type of structural variant">
+##INFO=<ID=STRAND_1,Number=1,Type=String,Description="Strand Orientation of SV Start">
+##INFO=<ID=STRAND_2,Number=1,Type=String,Description="Strand Orientation of SV End">
+##INFO=<ID=METHOD,Number=1,Type=String,Description="SV Caller used to predict">
+##INFO=<ID=DP,Number=1,Type=String,Description="combined depth across samples">
+##ALT=<ID=DEL,Description="Deletion">
+##ALT=<ID=DUP,Description="Duplication">
+##ALT=<ID=INS,Description="Insertion of novel sequence">
+##ALT=<ID=INV,Description="Inversion">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=AO,Number=1,Type=Integer,Description="Alternate Allele Observations">
+##contig=<ID=1,length=14640000>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+1 20 . N <DUP> 1 . IMPRECISE;SVTYPE=DUP;END=4641652;END_CHR=1;STRAND_1=-;STRAND_2=+;SVLEN=4641632;METHOD=LUMPY;DP=90 GT:AO 1/1:90
+1 33 . N <DUP> 1 . IMPRECISE;SVTYPE=DUP;END=2640388;END_CHR=1;STRAND_1=-;STRAND_2=+;SVLEN=2640355;METHOD=LUMPY;DP=3 GT:AO 1/1:3
+1 70 . N <DEL> 1 . IMPRECISE;SVTYPE=DEL;END=4641583;END_CHR=1;STRAND_1=+;STRAND_2=-;SVLEN=-4641513;METHOD=LUMPY;DP=1 GT:AO 1/1:1
+1 101 . N <INV> 1 . IMPRECISE;SVTYPE=INV;END=1988714;END_CHR=1;STRAND_1=-;STRAND_2=-;SVLEN=1988613;METHOD=LUMPY;DP=2 GT:AO 1/1:2
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htsjdk.git
More information about the debian-med-commit
mailing list