[med-svn] [htsjdk] 02/06: New upstream version 2.7.0+dfsg
Vincent Danjean
vdanjean at debian.org
Sun Nov 27 20:35:35 UTC 2016
This is an automated email from the git hooks/post-receive script.
vdanjean pushed a commit to branch master
in repository htsjdk.
commit 1c8643b172d63ec6b4632a43379e7f39716103cd
Author: Vincent Danjean <Vincent.Danjean at ens-lyon.org>
Date: Fri Nov 11 20:45:47 2016 +0100
New upstream version 2.7.0+dfsg
---
.../java/htsjdk/samtools/AbstractBAMFileIndex.java | 3 +-
.../htsjdk/samtools/AbstractSAMHeaderRecord.java | 3 +-
src/main/java/htsjdk/samtools/BAMIndexer.java | 6 +-
src/main/java/htsjdk/samtools/CRAMFileReader.java | 66 +-
.../htsjdk/samtools/DefaultSAMRecordFactory.java | 2 +-
.../htsjdk/samtools/MergingSamRecordIterator.java | 3 +-
src/main/java/htsjdk/samtools/SAMFileHeader.java | 6 +
src/main/java/htsjdk/samtools/SAMFileReader.java | 751 ---------------------
.../htsjdk/samtools/SAMFileTruncatedReader.java | 69 --
src/main/java/htsjdk/samtools/SAMRecord.java | 154 ++++-
.../java/htsjdk/samtools/SAMRecordSetBuilder.java | 2 +-
src/main/java/htsjdk/samtools/SAMRecordUtil.java | 100 +--
.../htsjdk/samtools/SAMSequenceDictionary.java | 109 ++-
.../java/htsjdk/samtools/SAMSequenceRecord.java | 3 +-
src/main/java/htsjdk/samtools/SAMUtils.java | 63 +-
.../java/htsjdk/samtools/SamFileHeaderMerger.java | 30 +-
.../java/htsjdk/samtools/SamFileValidator.java | 7 +-
src/main/java/htsjdk/samtools/SamReader.java | 26 +-
src/main/java/htsjdk/samtools/TextTagCodec.java | 5 +-
.../java/htsjdk/samtools/sra/SRAAccession.java | 3 +-
.../java/htsjdk/samtools/util/IntervalList.java | 11 +-
.../samtools/util/PositionalOutputStream.java | 65 ++
.../samtools/util/QualityEncodingDetector.java | 10 +-
.../util/SamRecordIntervalIteratorFactory.java | 2 +-
.../java/htsjdk/samtools/util/SequenceUtil.java | 174 ++---
src/main/java/htsjdk/samtools/util/StringUtil.java | 55 ++
src/main/java/htsjdk/samtools/util/TestUtil.java | 3 +-
src/main/java/htsjdk/tribble/bed/BEDCodec.java | 12 +-
.../java/htsjdk/tribble/index/AbstractIndex.java | 13 +-
src/main/java/htsjdk/tribble/index/Index.java | 8 +
.../java/htsjdk/tribble/index/IndexCreator.java | 7 +
.../java/htsjdk/tribble/index/IndexFactory.java | 16 +-
.../htsjdk/tribble/index/TribbleIndexCreator.java | 16 +
.../htsjdk/tribble/index/tabix/TabixIndex.java | 9 +-
.../variantcontext/GenotypeJEXLContext.java | 25 +-
.../htsjdk/variant/variantcontext/JEXLMap.java | 174 ++---
.../variantcontext/VariantContextUtils.java | 54 +-
.../variant/variantcontext/VariantJEXLContext.java | 16 +-
.../writer/IndexingVariantContextWriter.java | 54 +-
.../java/htsjdk/samtools/BAMFileWriterTest.java | 2 +-
.../java/htsjdk/samtools/CRAMFileReaderTest.java | 96 ++-
.../java/htsjdk/samtools/SAMIntegerTagTest.java | 46 ++
.../java/htsjdk/samtools/SAMRecordUnitTest.java | 78 +++
.../java/htsjdk/samtools/SAMRecordUtilTest.java | 29 -
.../htsjdk/samtools/SAMSequenceDictionaryTest.java | 55 ++
src/test/java/htsjdk/samtools/SAMUtilsTest.java | 27 +-
.../java/htsjdk/samtools/SamReaderSortTest.java | 2 +-
.../{SAMFileReaderTest.java => SamReaderTest.java} | 2 +-
.../samtools/util/PositionalOutputStreamTest.java | 62 ++
.../samtools/util/RelativeIso8601DateTest.java | 10 +-
.../htsjdk/samtools/util/SequenceUtilTest.java | 33 +-
.../java/htsjdk/samtools/util/StringUtilTest.java | 52 ++
.../java/htsjdk/tribble/FeatureReaderTest.java | 2 +-
src/test/java/htsjdk/tribble/bed/BEDCodecTest.java | 11 +
.../htsjdk/tribble/index/IndexFactoryTest.java | 5 +
src/test/java/htsjdk/tribble/index/IndexTest.java | 47 ++
.../htsjdk/tribble/index/tabix/TabixIndexTest.java | 3 +-
.../variantcontext/VariantJEXLContextUnitTest.java | 276 +++++---
.../reference_with_lower_and_uppercase.dict | 3 +
.../reference_with_lower_and_uppercase.fasta | 4 +
.../reference_with_lower_and_uppercase.fasta.fai | 2 +
.../SequenceUtil/upper_and_lowercase_read.sam | 10 +
62 files changed, 1453 insertions(+), 1539 deletions(-)
diff --git a/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java b/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java
index 4475e00..6bf28ef 100644
--- a/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java
+++ b/src/main/java/htsjdk/samtools/AbstractBAMFileIndex.java
@@ -387,8 +387,9 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
}
/**
- * @deprecated Invoke htsjdk.samtools.Chunk#optimizeChunkList(java.util.List<htsjdk.samtools.Chunk>, long) directly.
+ * @deprecated Invoke {@link Chunk#optimizeChunkList} directly.
*/
+ @Deprecated
protected List<Chunk> optimizeChunkList(final List<Chunk> chunks, final long minimumOffset) {
return Chunk.optimizeChunkList(chunks, minimumOffset);
}
diff --git a/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java b/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java
index 42d09db..769a7a7 100644
--- a/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java
+++ b/src/main/java/htsjdk/samtools/AbstractSAMHeaderRecord.java
@@ -50,8 +50,9 @@ public abstract class AbstractSAMHeaderRecord implements Serializable {
* Otherwise, the value will be converted to a String with toString.
* @param key attribute name
* @param value attribute value
- * @deprecated Use the version that takes a String value instead
+ * @deprecated Use {@link #setAttribute(String, String) instead
*/
+ @Deprecated
public void setAttribute(final String key, final Object value) {
setAttribute(key, value == null? null: value.toString());
}
diff --git a/src/main/java/htsjdk/samtools/BAMIndexer.java b/src/main/java/htsjdk/samtools/BAMIndexer.java
index 80b557a..f5b1558 100644
--- a/src/main/java/htsjdk/samtools/BAMIndexer.java
+++ b/src/main/java/htsjdk/samtools/BAMIndexer.java
@@ -282,7 +282,7 @@ public class BAMIndexer {
/**
* Generates a BAM index file from an input BAM file
*
- * @param reader SAMFileReader for input BAM file
+ * @param reader SamReader for input BAM file
* @param output File for output index file
*/
public static void createIndex(SamReader reader, File output) {
@@ -292,7 +292,7 @@ public class BAMIndexer {
/**
* Generates a BAM index file from an input BAM file
*
- * @param reader SAMFileReader for input BAM file
+ * @param reader SamReader for input BAM file
* @param output File for output index file
*/
public static void createIndex(SamReader reader, File output, Log log) {
@@ -310,4 +310,4 @@ public class BAMIndexer {
}
indexer.finish();
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/htsjdk/samtools/CRAMFileReader.java b/src/main/java/htsjdk/samtools/CRAMFileReader.java
index acdb8ba..9a29d36 100644
--- a/src/main/java/htsjdk/samtools/CRAMFileReader.java
+++ b/src/main/java/htsjdk/samtools/CRAMFileReader.java
@@ -26,7 +26,7 @@ import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
-import htsjdk.samtools.util.CoordMath;
+import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeEOFException;
import java.io.File;
@@ -57,6 +57,8 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
private ValidationStringency validationStringency;
+ private final static Log log = Log.getInstance(CRAMFileReader.class);
+
/**
* Create a CRAMFileReader from either a file or input stream using the reference source returned by
* {@link ReferenceSource#getDefaultCRAMReferenceSource() getDefaultCRAMReferenceSource}.
@@ -95,6 +97,9 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
this.cramFile = cramFile;
this.inputStream = inputStream;
this.referenceSource = referenceSource;
+ if (cramFile != null) {
+ mIndexFile = findIndexForFile(null, cramFile);
+ }
getIterator();
}
@@ -117,7 +122,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
}
this.cramFile = cramFile;
- this.mIndexFile = indexFile;
+ mIndexFile = findIndexForFile(indexFile, cramFile);
this.referenceSource = referenceSource;
getIterator();
@@ -140,6 +145,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
this.cramFile = cramFile;
this.referenceSource = referenceSource;
+ mIndexFile = findIndexForFile(null, cramFile);
getIterator();
}
@@ -164,21 +170,8 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
if (referenceSource == null) {
throw new IllegalArgumentException("A reference is required for CRAM readers");
}
-
- this.inputStream = inputStream;
this.referenceSource = referenceSource;
- this.validationStringency = validationStringency;
-
- iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
- if (indexInputStream != null) {
- SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
- if (null != baiStream) {
- mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
- }
- else {
- throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream");
- }
- }
+ initWithStreams(inputStream, indexInputStream, validationStringency);
}
/**
@@ -196,7 +189,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
public CRAMFileReader(final InputStream stream,
final File indexFile, final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
- this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency);
+ this(stream, indexFile == null ? null : new SeekableFileStream(indexFile), referenceSource, validationStringency);
}
/**
@@ -211,11 +204,44 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
*
* @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null
*/
- public CRAMFileReader(final File cramFile,
- final File indexFile, final CRAMReferenceSource referenceSource,
+ public CRAMFileReader(final File cramFile, final File indexFile, final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
- this(new FileInputStream(cramFile), indexFile, referenceSource, validationStringency);
+ if (cramFile == null) {
+ throw new IllegalArgumentException("Input file can not be null for CRAM reader");
+ }
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM readers");
+ }
this.cramFile = cramFile;
+ this.referenceSource = referenceSource;
+ this.mIndexFile = findIndexForFile(indexFile, cramFile);
+ final SeekableFileStream indexStream = this.mIndexFile == null ? null : new SeekableFileStream(this.mIndexFile);
+ initWithStreams(new FileInputStream(cramFile), indexStream, validationStringency);
+ }
+
+ private void initWithStreams(final InputStream inputStream, final SeekableStream indexInputStream,
+ final ValidationStringency validationStringency) throws IOException {
+ this.inputStream = inputStream;
+ this.validationStringency = validationStringency;
+ iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
+ if (indexInputStream != null) {
+ SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ if (null != baiStream) {
+ mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ }
+ else {
+ throw new IllegalArgumentException("CRAM index must be a BAI or CRAI stream");
+ }
+ }
+ }
+
+ private File findIndexForFile(File indexFile, final File cramFile) {
+ indexFile = indexFile == null ? SamFiles.findIndex(cramFile) : indexFile;
+ if (indexFile != null && indexFile.lastModified() < cramFile.lastModified()) {
+ log.warn("CRAM index file " + indexFile.getAbsolutePath() +
+ " is older than CRAM " + cramFile.getAbsolutePath());
+ }
+ return indexFile;
}
@Override
diff --git a/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java b/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java
index 8a6077a..7e3848e 100644
--- a/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java
+++ b/src/main/java/htsjdk/samtools/DefaultSAMRecordFactory.java
@@ -1,7 +1,7 @@
package htsjdk.samtools;
/**
- * Default factory for creating SAM and BAM records used by the SAMFileReader classes.
+ * Default factory for creating SAM and BAM records used by the {@link SamReader} classes.
*
* @author Tim Fennell
*/
diff --git a/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java b/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java
index 6b790fe..a294752 100644
--- a/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java
+++ b/src/main/java/htsjdk/samtools/MergingSamRecordIterator.java
@@ -50,8 +50,9 @@ public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
*
* @param headerMerger The merged header and contents of readers.
* @param forcePresorted True to ensure that the iterator checks the headers of the readers for appropriate sort order.
- * @deprecated replaced by (SamFileHeaderMerger, Collection<SAMFileReader>, boolean)
+ * @deprecated replaced by {@link #MergingSamRecordIterator(SamFileHeaderMerger, Collection, boolean)}
*/
+ @Deprecated
public MergingSamRecordIterator(final SamFileHeaderMerger headerMerger, final boolean forcePresorted) {
this(headerMerger, headerMerger.getReaders(), forcePresorted);
}
diff --git a/src/main/java/htsjdk/samtools/SAMFileHeader.java b/src/main/java/htsjdk/samtools/SAMFileHeader.java
index 22d18a6..47543c2 100644
--- a/src/main/java/htsjdk/samtools/SAMFileHeader.java
+++ b/src/main/java/htsjdk/samtools/SAMFileHeader.java
@@ -120,6 +120,12 @@ public class SAMFileHeader extends AbstractSAMHeaderRecord
setAttribute(VERSION_TAG, CURRENT_VERSION);
}
+ /** Constructor that initializes the sequence dictionary with the provided one. */
+ public SAMFileHeader(final SAMSequenceDictionary dict) {
+ this();
+ setSequenceDictionary(dict);
+ }
+
public String getVersion() {
return (String) getAttribute("VN");
}
diff --git a/src/main/java/htsjdk/samtools/SAMFileReader.java b/src/main/java/htsjdk/samtools/SAMFileReader.java
deleted file mode 100644
index 07189f7..0000000
--- a/src/main/java/htsjdk/samtools/SAMFileReader.java
+++ /dev/null
@@ -1,751 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package htsjdk.samtools;
-
-
-import htsjdk.samtools.seekablestream.SeekableBufferedStream;
-import htsjdk.samtools.seekablestream.SeekableHTTPStream;
-import htsjdk.samtools.seekablestream.SeekableStream;
-import htsjdk.samtools.util.*;
-
-import java.io.*;
-import java.net.URL;
-import java.util.NoSuchElementException;
-import java.util.zip.GZIPInputStream;
-
-/**
- * Class for reading and querying SAM/BAM files. Delegates to appropriate concrete implementation.
- *
- * @see SamReaderFactory
- */
- at Deprecated
-public class SAMFileReader implements SamReader, SamReader.Indexing {
-
- private static ValidationStringency defaultValidationStringency = ValidationStringency.DEFAULT_STRINGENCY;
-
- public static ValidationStringency getDefaultValidationStringency() {
- return defaultValidationStringency;
- }
-
- /**
- * Set validation stringency for all subsequently-created SAMFileReaders. This is the only way to
- * change the validation stringency for SAM header.
- * NOTE: Programs that change this should make sure to have a try/finally clause wrapping the work that
- * they do, so that the original stringency can be restored after the program's work is done. This facilitates
- * calling a program that is usually run stand-alone from another program, without messing up the original
- * validation stringency.
- */
- public static void setDefaultValidationStringency(final ValidationStringency defaultValidationStringency) {
- SAMFileReader.defaultValidationStringency = defaultValidationStringency;
- }
-
- /**
- * Returns the SAMSequenceDictionary from the provided FASTA.
- */
- public static SAMSequenceDictionary getSequenceDictionary(final File dictionaryFile) {
- final SAMFileReader samFileReader = new SAMFileReader(dictionaryFile);
- final SAMSequenceDictionary dict = samFileReader.getFileHeader().getSequenceDictionary();
- CloserUtil.close(dictionaryFile);
- return dict;
- }
-
- private boolean mIsBinary = false;
- private BAMIndex mIndex = null;
- private SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
- private ReaderImplementation mReader = null;
- private boolean useAsyncIO = Defaults.USE_ASYNC_IO_READ_FOR_SAMTOOLS;
-
- private File samFile = null;
-
- private static class EmptySamIterator implements CloseableIterator<SAMRecord> {
- @Override
- public boolean hasNext() {
- return false;
- }
-
- @Override
- public SAMRecord next() {
- throw new NoSuchElementException("next called on empty iterator");
- }
-
- @Override
- public void remove() {
- throw new UnsupportedOperationException("Not supported: remove");
- }
-
- @Override
- public void close() {
- //no-op
- }
- }
-
-
- /**
- * Prepare to read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream.
- */
- public SAMFileReader(final InputStream stream) {
- this(stream, false);
- }
-
- /**
- * Prepare to read a SAM or BAM file. If the given file is a BAM, and has a companion BAI index file
- * that is named according to the convention, it will be found and opened, and indexed query will be allowed.
- */
- public SAMFileReader(final File file) {
- this(file, null, false);
- }
-
- /**
- * Prepare to read a SAM or BAM file. If the given file is a BAM, and an index is present, indexed query
- * will be allowed.
- *
- * @param file SAM or BAM to read.
- * @param indexFile Index file that is companion to BAM, or null if no index file, or if index file
- * should be found automatically.
- */
- public SAMFileReader(final File file, final File indexFile) {
- this(file, indexFile, false);
- }
-
- /**
- * Read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream.
- *
- * @param stream input SAM or BAM. This is buffered internally so caller need not buffer.
- * @param eagerDecode if true, decode SAM record entirely when reading it.
- */
- public SAMFileReader(final InputStream stream, final boolean eagerDecode) {
- init(stream, null, null, eagerDecode, defaultValidationStringency);
- }
-
- /**
- * Read a SAM or BAM file, possibly with an index file if present.
- * If the given file is a BAM, and an index is present, indexed query will be allowed.
- *
- * @param file SAM or BAM.
- * @param eagerDecode if true, decode SAM record entirely when reading it.
- */
- public SAMFileReader(final File file, final boolean eagerDecode) {
- this(file, null, eagerDecode);
- }
-
- /**
- * Read a SAM or BAM file, possibly with an index file. If the given file is a BAM, and an index is present,
- * indexed query will be allowed.
- *
- * @param file SAM or BAM.
- * @param indexFile Location of index file, or null in order to use the default index file (if present).
- * @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
- */
- public SAMFileReader(final File file, final File indexFile, final boolean eagerDecode) {
- init(null, file, indexFile, eagerDecode, defaultValidationStringency);
- }
-
- /**
- * Read a BAM file by http
- * indexed query will be allowed.
- *
- * @param url BAM.
- * @param indexFile Location of index file, or null if indexed access not required.
- * @param eagerDecode eagerDecode if true, decode SAM record entirely when reading it.
- */
- public SAMFileReader(final URL url, final File indexFile, final boolean eagerDecode) {
- init(new SeekableBufferedStream(new SeekableHTTPStream(url)),
- indexFile, eagerDecode, defaultValidationStringency);
- }
-
- /**
- * Read a BAM file via caller-supplied mechanism. Indexed query will be allowed, but
- * index file must be provided in that case.
- *
- * @param strm BAM -- If the stream is not buffered, caller should wrap in SeekableBufferedStream for
- * better performance.
- * @param indexFile Location of index file, or null indexed access not required.
- * @param eagerDecode if true, decode SAM record entirely when reading it.
- */
- public SAMFileReader(final SeekableStream strm, final File indexFile, final boolean eagerDecode) {
- init(strm, indexFile, eagerDecode, defaultValidationStringency);
- }
-
- /**
- * @param strm BAM -- If the stream is not buffered, caller should wrap in SeekableBufferedStream for
- * better performance.
- */
- public SAMFileReader(final SeekableStream strm, final SeekableStream indexStream, final boolean eagerDecode) {
- init(strm, indexStream, eagerDecode, defaultValidationStringency);
- }
-
- public void close() {
- if (mReader != null) {
- mReader.close();
- }
- mReader = null;
- mIndex = null;
- }
-
- /**
- * If true, this reader will use asynchronous IO.
- */
- public void setUseAsyncIO(final boolean useAsyncIO) {
- this.useAsyncIO = useAsyncIO;
- }
-
- /**
- * If true, writes the source of every read into the source SAMRecords.
- *
- * @param enabled true to write source information into each SAMRecord.
- */
- public void enableFileSource(final boolean enabled) {
- mReader.enableFileSource(this, enabled);
- }
-
- /**
- * If true, uses the caching version of the index reader.
- *
- * @param enabled true to use the caching version of the reader.
- */
- public void enableIndexCaching(final boolean enabled) {
- if (mIndex != null)
- throw new SAMException("Unable to turn on index caching; index file has already been loaded.");
- mReader.enableIndexCaching(enabled);
- }
-
- /**
- * If false, disable the use of memory mapping for accessing index files (default behavior is to use memory mapping).
- * This is slower but more scalable when accessing large numbers of BAM files sequentially.
- *
- * @param enabled True to use memory mapping, false to use regular I/O.
- */
- public void enableIndexMemoryMapping(final boolean enabled) {
- if (mIndex != null) {
- throw new SAMException("Unable to change index memory mapping; index file has already been loaded.");
- }
- mReader.enableIndexMemoryMapping(enabled);
- }
-
- /**
- * Only meaningful for BAM file readers - enables or disables checking of checksums on uncompressed
- * data during decompression. Enabling this will increase decompression time by 15-30%.
- */
- public void enableCrcChecking(final boolean enabled) {
- this.mReader.enableCrcChecking(enabled);
- }
-
- /**
- * Override the default SAMRecordFactory class used to instantiate instances of SAMRecord and BAMRecord.
- */
- public void setSAMRecordFactory(final SAMRecordFactory factory) {
- this.samRecordFactory = factory;
- this.mReader.setSAMRecordFactory(factory);
- }
-
- /**
- * @return True if this is a BAM reader.
- */
- public boolean isBinary() {
- return mIsBinary;
- }
-
- /**
- * @return true if ths is a BAM file, and has an index
- */
- public boolean hasIndex() {
- return mReader.hasIndex();
- }
-
- @Override
- public Indexing indexing() {
- return this;
- }
-
- /**
- * Retrieves the index for the given file type. Ensure that the index is of the specified type.
- *
- * @return An index of the given type.
- */
- public BAMIndex getIndex() {
- return mReader.getIndex();
- }
-
- /**
- * Returns true if the supported index is browseable, meaning the bins in it can be traversed
- * and chunk data inspected and retrieved.
- *
- * @return True if the index supports the BrowseableBAMIndex interface. False otherwise.
- */
- public boolean hasBrowseableIndex() {
- return hasIndex() && getIndex() instanceof BrowseableBAMIndex;
- }
-
- /**
- * Gets an index tagged with the BrowseableBAMIndex interface. Throws an exception if no such
- * index is available.
- *
- * @return An index with a browseable interface, if possible.
- * @throws SAMException if no such index is available.
- */
- public BrowseableBAMIndex getBrowseableIndex() {
- final BAMIndex index = getIndex();
- if (!(index instanceof BrowseableBAMIndex))
- throw new SAMException("Cannot return index: index created by BAM is not browseable.");
- return BrowseableBAMIndex.class.cast(index);
- }
-
- public SAMFileHeader getFileHeader() {
- return mReader.getFileHeader();
- }
-
- @Override
- public Type type() {
- return mReader.type();
- }
-
- @Override
- public String getResourceDescription() {
- return this.toString();
- }
-
- /**
- * Control validation of SAMRecords as they are read from file.
- * In order to control validation stringency for SAM Header, call SAMFileReader.setDefaultValidationStringency
- * before constructing a SAMFileReader.
- */
- public void setValidationStringency(final ValidationStringency validationStringency) {
- mReader.setValidationStringency(validationStringency);
- }
-
- /**
- * Iterate through file in order. For a SAMFileReader constructed from an InputStream, and for any SAM file,
- * a 2nd iteration starts where the 1st one left off. For a BAM constructed from a File, each new iteration
- * starts at the first record.
- * <p/>
- * Only a single open iterator on a SAM or BAM file may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first.
- */
- public SAMRecordIterator iterator() {
- return new AssertingIterator(mReader.getIterator());
- }
-
- /**
- * Iterate through the given chunks in the file.
- *
- * @param chunks List of chunks for which to retrieve data.
- * @return An iterator over the given chunks.
- */
- public SAMRecordIterator iterator(final SAMFileSpan chunks) {
- return new AssertingIterator(mReader.getIterator(chunks));
- }
-
- /**
- * Gets a pointer spanning all reads in the BAM file.
- *
- * @return Unbounded pointer to the first record, in chunk format.
- */
- public SAMFileSpan getFilePointerSpanningReads() {
- return mReader.getFilePointerSpanningReads();
- }
-
- /**
- * Iterate over records that match the given interval. Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
- * in parallel over the same underlying file.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match the interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * is in the query region.
- *
- * @param sequence Reference sequence of interest.
- * @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
- * @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
- * @param contained If true, each SAMRecord returned is will have its alignment completely contained in the
- * interval of interest. If false, the alignment of the returned SAMRecords need only overlap the interval of interest.
- * @return Iterator over the SAMRecords matching the interval.
- */
- public SAMRecordIterator query(final String sequence, final int start, final int end, final boolean contained) {
- final int referenceIndex = getFileHeader().getSequenceIndex(sequence);
- final CloseableIterator<SAMRecord> currentIterator;
- if (referenceIndex == -1) {
- currentIterator = new EmptySamIterator();
- } else {
- final QueryInterval[] queryIntervals = {new QueryInterval(referenceIndex, start, end)};
- currentIterator = mReader.query(queryIntervals, contained);
- }
- return new AssertingIterator(currentIterator);
- }
-
- /**
- * Iterate over records that overlap the given interval. Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match the interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * is in the query region.
- *
- * @param sequence Reference sequence of interest.
- * @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
- * @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
- * @return Iterator over the SAMRecords overlapping the interval.
- */
- public SAMRecordIterator queryOverlapping(final String sequence, final int start, final int end) {
- return query(sequence, start, end, false);
- }
-
- /**
- * Iterate over records that are contained in the given interval. Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match the interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * is in the query region.
- *
- * @param sequence Reference sequence of interest.
- * @param start 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
- * @param end 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
- * @return Iterator over the SAMRecords contained in the interval.
- */
- public SAMRecordIterator queryContained(final String sequence, final int start, final int end) {
- return query(sequence, start, end, true);
- }
-
- /**
- * Iterate over records that match one of the given intervals. This may be more efficient than querying
- * each interval separately, because multiple reads of the same SAMRecords is avoided.
- * <p/>
- * Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
- * in parallel over the same underlying file.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match an interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * is in the query region.
- *
- * @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
- * and abutting intervals merged. This can be done with {@link htsjdk.samtools.QueryInterval#optimizeIntervals}
- * @param contained If true, each SAMRecord returned is will have its alignment completely contained in one of the
- * intervals of interest. If false, the alignment of the returned SAMRecords need only overlap one of
- * the intervals of interest.
- * @return Iterator over the SAMRecords matching the interval.
- */
- public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained) {
- return new AssertingIterator(mReader.query(intervals, contained));
- }
-
- /**
- * Iterate over records that overlap any of the given intervals. This may be more efficient than querying
- * each interval separately, because multiple reads of the same SAMRecords is avoided.
- * <p/>
- * Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match the interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * is in the query region.
- *
- * @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
- * and abutting intervals merged. This can be done with {@link htsjdk.samtools.QueryInterval#optimizeIntervals}
- * @return Iterator over the SAMRecords overlapping any of the intervals.
- */
- public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals) {
- return query(intervals, false);
- }
-
- /**
- * Iterate over records that are contained in the given interval. This may be more efficient than querying
- * each interval separately, because multiple reads of the same SAMRecords is avoided.
- * <p/>
- * Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match the interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * is in the query region.
- *
- * @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
- * and abutting intervals merged. This can be done with {@link htsjdk.samtools.QueryInterval#optimizeIntervals}
- * @return Iterator over the SAMRecords contained in any of the intervals.
- */
- public SAMRecordIterator queryContained(final QueryInterval[] intervals) {
- return query(intervals, true);
- }
-
-
- public SAMRecordIterator queryUnmapped() {
- return new AssertingIterator(mReader.queryUnmapped());
- }
-
- /**
- * Iterate over records that map to the given sequence and start at the given position. Only valid to call this if hasIndex() == true.
- * <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first.
- * <p/>
- * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
- * and then discarded because they do not match the interval of interest.
- * <p/>
- * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
- * matches the arguments.
- *
- * @param sequence Reference sequence of interest.
- * @param start Alignment start of interest.
- * @return Iterator over the SAMRecords with the given alignment start.
- */
- public SAMRecordIterator queryAlignmentStart(final String sequence, final int start) {
- return new AssertingIterator(mReader.queryAlignmentStart(sequence, start));
- }
-
- /**
- * Fetch the mate for the given read. Only valid to call this if hasIndex() == true.
- * This will work whether the mate has a coordinate or not, so long as the given read has correct
- * mate information. This method iterates over the SAM file, so there may not be an unclosed
- * iterator on the SAM file when this method is called.
- * <p/>
- * Note that it is not possible to call queryMate when iterating over the SAMFileReader, because queryMate
- * requires its own iteration, and there cannot be two simultaneous iterations on the same SAMFileReader. The
- * work-around is to open a second SAMFileReader on the same input file, and call queryMate on the second
- * reader.
- *
- * @param rec Record for which mate is sought. Must be a paired read.
- * @return rec's mate, or null if it cannot be found.
- */
- public SAMRecord queryMate(final SAMRecord rec) {
- if (!rec.getReadPairedFlag()) {
- throw new IllegalArgumentException("queryMate called for unpaired read.");
- }
- if (rec.getFirstOfPairFlag() == rec.getSecondOfPairFlag()) {
- throw new IllegalArgumentException("SAMRecord must be either first and second of pair, but not both.");
- }
- final boolean firstOfPair = rec.getFirstOfPairFlag();
- final CloseableIterator<SAMRecord> it;
- if (rec.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- it = queryUnmapped();
- } else {
- it = queryAlignmentStart(rec.getMateReferenceName(), rec.getMateAlignmentStart());
- }
- try {
- SAMRecord mateRec = null;
- while (it.hasNext()) {
- final SAMRecord next = it.next();
- if (!next.getReadPairedFlag()) {
- if (rec.getReadName().equals(next.getReadName())) {
- throw new SAMFormatException("Paired and unpaired reads with same name: " + rec.getReadName());
- }
- continue;
- }
- if (firstOfPair) {
- if (next.getFirstOfPairFlag()) continue;
- } else {
- if (next.getSecondOfPairFlag()) continue;
- }
- if (rec.getReadName().equals(next.getReadName())) {
- if (mateRec != null) {
- throw new SAMFormatException("Multiple SAMRecord with read name " + rec.getReadName() +
- " for " + (firstOfPair ? "second" : "first") + " end.");
- }
- mateRec = next;
- }
- }
- return mateRec;
- } finally {
- it.close();
- }
- }
-
-
- private void init(final SeekableStream strm, final File indexFile, final boolean eagerDecode,
- final ValidationStringency validationStringency) {
-
- try {
- if (streamLooksLikeBam(strm)) {
- mIsBinary = true;
- mReader = new BAMFileReader(strm, indexFile, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
- } else {
- throw new SAMFormatException("Unrecognized file format: " + strm);
- }
- setValidationStringency(validationStringency);
- } catch (final IOException e) {
- throw new RuntimeIOException(e);
- }
- }
-
- private void init(final SeekableStream strm, final SeekableStream indexStream, final boolean eagerDecode,
- final ValidationStringency validationStringency) {
-
- try {
- if (streamLooksLikeBam(strm)) {
- mIsBinary = true;
- mReader = new BAMFileReader(strm, indexStream, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
- } else {
- throw new SAMFormatException("Unrecognized file format: " + strm);
- }
- setValidationStringency(validationStringency);
- } catch (final IOException e) {
- throw new RuntimeIOException(e);
- }
- }
-
- // Its too expensive to examine the remote file to determine type.
- // Rely on file extension.
- private boolean streamLooksLikeBam(final SeekableStream strm) {
- String source = strm.getSource();
- if (source == null) return true;
- source = source.toLowerCase();
- //Source will typically be a file path or URL
- //If it's a URL we require one of the query parameters to be bam file
- return source.endsWith(".bam") || source.contains(".bam?") || source.contains(".bam&") || source.contains(".bam%26");
- }
-
- private void init(final InputStream stream, File file, final File indexFile, final boolean eagerDecode,
- final ValidationStringency validationStringency) {
- if (stream != null && file != null) throw new IllegalArgumentException("stream and file are mutually exclusive");
- this.samFile = file;
-
- try {
- BufferedInputStream bufferedStream;
- // Buffering is required because mark() and reset() are called on the input stream.
- final int bufferSize = Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
- if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file), bufferSize);
- else bufferedStream = IOUtil.toBufferedStream(stream);
- if (SamStreams.isBAMFile(bufferedStream)) {
- mIsBinary = true;
- if (file == null || !file.isFile()) {
- // Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo
- mReader = new BAMFileReader(bufferedStream, indexFile, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
- } else {
- bufferedStream.close();
- mReader = new BAMFileReader(file, indexFile, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
- }
- } else if (BlockCompressedInputStream.isValidFile(bufferedStream)) {
- mIsBinary = false;
- mReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory);
- } else if (SamStreams.isGzippedSAMFile(bufferedStream)) {
- mIsBinary = false;
- mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory);
- } else if (SamStreams.isCRAMFile(bufferedStream)) {
- if (file == null || !file.isFile()) {
- file = null;
- } else {
- bufferedStream.close();
- bufferedStream = null;
- }
- mReader = new CRAMFileReader(file, bufferedStream);
- } else if (isSAMFile(bufferedStream)) {
- if (indexFile != null) {
- bufferedStream.close();
- throw new RuntimeException("Cannot use index file with textual SAM file");
- }
- mIsBinary = false;
- mReader = new SAMTextReader(bufferedStream, file, validationStringency, this.samRecordFactory);
- } else {
- bufferedStream.close();
- throw new SAMFormatException("Unrecognized file format");
- }
-
- setValidationStringency(validationStringency);
- mReader.setSAMRecordFactory(this.samRecordFactory);
- } catch (final IOException e) {
- throw new RuntimeIOException(e);
- }
- }
-
- private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
- throws IOException {
- int bytesRead = 0;
- while (bytesRead < length) {
- final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
- if (count <= 0) {
- break;
- }
- bytesRead += count;
- }
- return bytesRead;
- }
-
- private boolean isSAMFile(final InputStream stream) {
- // For now, assume every non-binary file is a SAM text file.
- return true;
- }
-
- @Override
- public String toString() {
- if (this.samFile == null) {
- return getClass().getSimpleName() + "{initialized with stream}";
- } else {
- return getClass().getSimpleName() + "{" + this.samFile.getAbsolutePath() + "}";
- }
- }
-
- /**
- * Convenience method to create a QueryInterval
- *
- * @param sequence sequence of interest, must exist in sequence dictionary
- * @param start 1-based start position, must be >= 1
- * @param end 1-based end position.
- * @throws java.lang.IllegalArgumentException if sequence not found in sequence dictionary, or start position < 1
- */
- public QueryInterval makeQueryInterval(final String sequence, int start, int end) {
- int referenceIndex = getFileHeader().getSequenceIndex(sequence);
- if (referenceIndex < 0) {
- throw new IllegalArgumentException(String.format("Sequence '%s' not found in sequence dictionary", sequence));
- }
- if (start < 1) {
- throw new IllegalArgumentException("Start position must be >= 1");
- }
- return new QueryInterval(referenceIndex, start, end);
- }
-
- /**
- * Convenience method to create a QueryInterval that goes from start to end of given sequence.
- *
- * @param sequence sequence of interest, must exist in sequence dictionary
- * @param start 1-based start position, must be >= 1
- * @throws java.lang.IllegalArgumentException if sequence not found in sequence dictionary, or start position < 1
- */
- public QueryInterval makeQueryInterval(final String sequence, int start) {
- return makeQueryInterval(sequence, start, 0);
- }
-
-}
diff --git a/src/main/java/htsjdk/samtools/SAMFileTruncatedReader.java b/src/main/java/htsjdk/samtools/SAMFileTruncatedReader.java
deleted file mode 100644
index c63dfb5..0000000
--- a/src/main/java/htsjdk/samtools/SAMFileTruncatedReader.java
+++ /dev/null
@@ -1,69 +0,0 @@
-package htsjdk.samtools;
-
-import java.io.File;
-import java.util.NoSuchElementException;
-
-/**
- * A truncated form of a SAMFileReader that iterates over a limited number of records.
- *
- * @author mccowan at broadinstitute.org
- */
- at Deprecated
-public class SAMFileTruncatedReader extends SAMFileReader {
- private class TruncatedIterator implements SAMRecordIterator {
- final SAMRecordIterator i;
- final long max;
- long currentRecord = 0;
-
- TruncatedIterator(final SAMRecordIterator i, final long max) {
- this.i = i;
- this.max = max;
- }
-
- public boolean hasNext() {
- return i.hasNext() && max != currentRecord;
- }
-
- public SAMRecord next() {
- if (this.hasNext()) {
- currentRecord += 1;
- return i.next();
- } else {
- throw new NoSuchElementException();
- }
- }
-
- public void remove() {
- i.remove();
- }
-
- public void close() {
- i.close();
- }
-
- public SAMRecordIterator assertSorted(final SAMFileHeader.SortOrder sortOrder) {
- return i.assertSorted(sortOrder);
- }
- }
-
- private final long maxRecordsToIterate;
-
- /**
- * @param input The SAM file
- * @param max The maximum number of records to read from the file via iterator() methods
- */
- public SAMFileTruncatedReader(final File input, final long max) {
- super(input);
- this.maxRecordsToIterate = max;
- }
-
- @Override
- public SAMRecordIterator iterator() {
- return new TruncatedIterator(super.iterator(), maxRecordsToIterate);
- }
-
- @Override
- public SAMRecordIterator iterator(final SAMFileSpan chunks) {
- return new TruncatedIterator(super.iterator(chunks), maxRecordsToIterate);
- }
-}
diff --git a/src/main/java/htsjdk/samtools/SAMRecord.java b/src/main/java/htsjdk/samtools/SAMRecord.java
index eb76c28..13ec386 100644
--- a/src/main/java/htsjdk/samtools/SAMRecord.java
+++ b/src/main/java/htsjdk/samtools/SAMRecord.java
@@ -26,17 +26,12 @@ package htsjdk.samtools;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.Locatable;
+import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.Serializable;
import java.lang.reflect.Array;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
/**
@@ -161,6 +156,16 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*/
public static final int MAX_INSERT_SIZE = 1<<29;
+ /**
+ * Tags that are known to need the reverse complement if the read is reverse complemented.
+ */
+ public static List<String> TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name());
+
+ /**
+ * Tags that are known to need the reverse if the read is reverse complemented.
+ */
+ public static List<String> TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name());
+
private String mReadName = null;
private byte[] mReadBases = NULL_SEQUENCE;
private byte[] mBaseQualities = NULL_QUALS;
@@ -985,9 +990,10 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
* the query sequence itself is unmapped. This method name is misspelled.
- * Use setReadUnmappedFlag instead.
+ * Use {@link #setReadUnmappedFlag} instead.
* @deprecated
*/
+ @Deprecated
public void setReadUmappedFlag(final boolean flag) {
setReadUnmappedFlag(flag);
}
@@ -1396,7 +1402,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* @deprecated
* The attribute type and value checks have been moved directly into
- * {@code SAMBinaryTagAndValue}.
+ * {@link SAMBinaryTagAndValue}.
*/
@Deprecated
protected static boolean isAllowedAttributeValue(final Object value) {
@@ -1648,8 +1654,9 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*
* @return String representation of this.
* @deprecated This method is not guaranteed to return a valid SAM text representation of the SAMRecord.
- * To get standard SAM text representation, use htsjdk.samtools.SAMRecord#getSAMString().
+ * To get standard SAM text representation, {@link SAMRecord#getSAMString}.
*/
+ @Deprecated
public String format() {
final StringBuilder buffer = new StringBuilder();
addField(buffer, getReadName(), null, null);
@@ -2045,7 +2052,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
* Gets the source of this SAM record -- both the reader that retrieved the record and the position on disk from
* whence it came.
- * @return The file source. Note that the reader will be null if not activated using SAMFileReader.enableFileSource().
+ * @return The file source. Note that the reader will be null if the reader source has not be set.
*/
public SAMFileSource getFileSource() {
return mFileSource;
@@ -2110,7 +2117,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
* Note that this does a shallow copy of everything, except for the attribute list, for which a copy of the list
* is made, but the attributes themselves are copied by reference. This should be safe because callers should
- * never modify a mutable value returned by any of the get() methods anyway.
+ * never modify a mutable value returned by any of the get() methods anyway. If one of the cloned record's SEQ or
+ * QUAL needs to be modified, a deeper copy should be made (e.g. Reverse Complement).
*/
@Override
public Object clone() throws CloneNotSupportedException {
@@ -2246,5 +2254,125 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
if (this.transientAttributes != null) return this.transientAttributes.remove(key);
else return null;
}
-}
+ /**
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. Changes made after making a copy of the bases, qualities,
+ * and any attributes that will be altered. If in-place update is needed use
+ * {@link #reverseComplement(boolean)}.
+ * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
+ */
+ public void reverseComplement() {
+ reverseComplement(false);
+ }
+
+ /**
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. Optionally makes a copy of the bases, qualities or attributes instead
+ * of altering them in-place. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
+ *
+ * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values.
+ */
+ public void reverseComplement(boolean inplace) {
+ reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace);
+ }
+
+ /**
+ * Reverse complement bases and reverse quality scores. In addition reverse complement any
+ * non-null attributes specified by tagsToRevcomp and reverse and non-null attributes
+ * specified by tagsToReverse.
+ */
+ public void reverseComplement(final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse, boolean inplace) {
+ final byte[] readBases = inplace ? getReadBases() : getReadBases().clone();
+ SequenceUtil.reverseComplement(readBases);
+ setReadBases(readBases);
+ final byte qualities[] = inplace ? getBaseQualities() : getBaseQualities().clone();
+ reverseArray(qualities);
+ setBaseQualities(qualities);
+
+ // Deal with tags that need to be reverse complemented
+ if (tagsToRevcomp != null) {
+ for (final String tag: tagsToRevcomp) {
+ Object value = getAttribute(tag);
+ if (value != null) {
+ if (value instanceof byte[]) {
+ value = inplace ? value : ((byte[]) value).clone();
+ SequenceUtil.reverseComplement((byte[]) value);
+ } else if (value instanceof String) {
+ //SequenceUtil.reverseComplement is in-place for bytes but copies Strings since they are immutable.
+ value = SequenceUtil.reverseComplement((String) value);
+ } else {
+ throw new UnsupportedOperationException("Don't know how to reverse complement: " + value);
+ }
+ setAttribute(tag, value);
+ }
+ }
+ }
+
+ // Deal with tags that needed to just be reversed
+ if (tagsToReverse != null) {
+ for (final String tag : tagsToReverse) {
+ Object value = getAttribute(tag);
+ if (value != null) {
+ if (value instanceof String) {
+ value = StringUtil.reverseString((String) value);
+ } else if (value.getClass().isArray()) {
+ if (value instanceof byte[]) {
+ value = inplace ? value : ((byte[]) value).clone();
+ reverseArray((byte[]) value);
+ } else if (value instanceof short[]) {
+ value = inplace ? value : ((short[]) value).clone();
+ reverseArray((short[]) value);
+ } else if (value instanceof int[]) {
+ value = inplace ? value : ((int[]) value).clone();
+ reverseArray((int[]) value);
+ } else if (value instanceof float[]) {
+ value = inplace ? value : ((float[]) value).clone();
+ reverseArray((float[]) value);
+ } else {
+ throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported.");
+ }
+ } else {
+ throw new UnsupportedOperationException("Don't know how to reverse: " + value);
+ }
+
+ setAttribute(tag, value);
+ }
+ }
+ }
+ }
+
+ private static void reverseArray(final byte[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final byte tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final short[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final short tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final int[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final int tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+
+ private static void reverseArray(final float[] array) {
+ for (int i=0, j=array.length-1; i<j; ++i, --j) {
+ final float tmp = array[i];
+ array[i] = array[j];
+ array[j] = tmp;
+ }
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java b/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java
index 714199f..2af91c3 100644
--- a/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java
+++ b/src/main/java/htsjdk/samtools/SAMRecordSetBuilder.java
@@ -541,7 +541,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
/**
* Creates samFileReader from the data in instance of this class
*
- * @return SAMFileReader
+ * @return SamReader
*/
public SamReader getSamReader() {
diff --git a/src/main/java/htsjdk/samtools/SAMRecordUtil.java b/src/main/java/htsjdk/samtools/SAMRecordUtil.java
index d290074..d778789 100644
--- a/src/main/java/htsjdk/samtools/SAMRecordUtil.java
+++ b/src/main/java/htsjdk/samtools/SAMRecordUtil.java
@@ -31,19 +31,39 @@ import java.util.Collection;
import java.util.List;
/**
+ *
+ * Use {@link SAMRecord#reverseComplement()} instead, which defaults to making a copy of attributes for reverse
+ * complement rather than changing them in-place.
+ *
* @author alecw at broadinstitute.org
*/
+ at Deprecated
public class SAMRecordUtil {
public static List<String> TAGS_TO_REVERSE_COMPLEMENT = Arrays.asList(SAMTag.E2.name(), SAMTag.SQ.name());
public static List<String> TAGS_TO_REVERSE = Arrays.asList(SAMTag.OQ.name(), SAMTag.U2.name());
/**
* Reverse-complement bases and reverse quality scores along with known optional attributes that
- * need the same treatment. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * need the same treatment. Changes made in-place, instead of making a copy of the bases, qualities,
+ * or attributes. If a copy is needed use {@link #reverseComplement(SAMRecord, boolean)}.
+ * See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
* for the default set of tags that are handled.
*/
public static void reverseComplement(final SAMRecord rec) {
- reverseComplement(rec, TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE);
+ rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, true);
+ }
+
+ /**
+ * Reverse-complement bases and reverse quality scores along with known optional attributes that
+ * need the same treatment. Optionally makes a copy of the bases, qualities or attributes instead
+ * of altering them in-place. See {@link #TAGS_TO_REVERSE_COMPLEMENT} {@link #TAGS_TO_REVERSE}
+ * for the default set of tags that are handled.
+ *
+ * @param rec Record to reverse complement.
+ * @param inplace Setting this to false will clone all attributes, bases and qualities before changing the values.
+ */
+ public static void reverseComplement(final SAMRecord rec, boolean inplace) {
+ rec.reverseComplement(TAGS_TO_REVERSE_COMPLEMENT, TAGS_TO_REVERSE, inplace);
}
/**
@@ -51,79 +71,7 @@ public class SAMRecordUtil {
* non-null attributes specified by tagsToRevcomp and reverse and non-null attributes
* specified by tagsToReverse.
*/
- public static void reverseComplement(final SAMRecord rec, final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse) {
- final byte[] readBases = rec.getReadBases();
- SequenceUtil.reverseComplement(readBases);
- rec.setReadBases(readBases);
- final byte qualities[] = rec.getBaseQualities();
- reverseArray(qualities);
- rec.setBaseQualities(qualities);
-
- // Deal with tags that need to be reverse complemented
- if (tagsToRevcomp != null) {
- for (final String tag: tagsToRevcomp) {
- Object value = rec.getAttribute(tag);
- if (value != null) {
- if (value instanceof byte[]) SequenceUtil.reverseComplement((byte[]) value);
- else if (value instanceof String) value = SequenceUtil.reverseComplement((String) value);
- else throw new UnsupportedOperationException("Don't know how to reverse complement: " + value);
- rec.setAttribute(tag, value);
- }
- }
- }
-
- // Deal with tags that needed to just be reversed
- if (tagsToReverse != null) {
- for (final String tag : tagsToReverse) {
- Object value = rec.getAttribute(tag);
- if (value != null) {
- if (value instanceof String) {
- value = StringUtil.reverseString((String) value);
- }
- else if (value.getClass().isArray()) {
- if (value instanceof byte[]) reverseArray((byte[]) value);
- else if (value instanceof short[]) reverseArray((short[]) value);
- else if (value instanceof int[]) reverseArray((int[]) value);
- else if (value instanceof float[]) reverseArray((float[]) value);
- else throw new UnsupportedOperationException("Reversing array attribute of type " + value.getClass().getComponentType() + " not supported.");
- }
- else throw new UnsupportedOperationException("Don't know how to reverse: " + value);
-
- rec.setAttribute(tag, value);
- }
- }
- }
- }
-
- private static void reverseArray(final byte[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final byte tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
- }
-
- private static void reverseArray(final short[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final short tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
- }
-
- private static void reverseArray(final int[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final int tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
- }
-
- private static void reverseArray(final float[] array) {
- for (int i=0, j=array.length-1; i<j; ++i, --j) {
- final float tmp = array[i];
- array[i] = array[j];
- array[j] = tmp;
- }
+ public static void reverseComplement(final SAMRecord rec, final Collection<String> tagsToRevcomp, final Collection<String> tagsToReverse, boolean inplace) {
+ rec.reverseComplement(tagsToRevcomp, tagsToReverse, inplace);
}
}
diff --git a/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java b/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
index 2b8d18a..b7744d7 100644
--- a/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
+++ b/src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
@@ -23,20 +23,22 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.util.Log;
+
import java.io.Serializable;
import java.math.BigInteger;
import java.security.MessageDigest;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
+import java.util.stream.Collector;
+import java.util.stream.Collectors;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
import javax.xml.bind.annotation.XmlTransient;
+import static htsjdk.samtools.SAMSequenceRecord.*;
+import static java.util.stream.Collectors.toList;
+
/**
* Collection of SAMSequenceRecords.
*/
@@ -64,6 +66,8 @@ public class SAMSequenceDictionary implements Serializable {
return Collections.unmodifiableList(mSequences);
}
+ private static Log log = Log.getInstance(SAMSequenceDictionary.class);
+
public SAMSequenceRecord getSequence(final String name) {
return mSequenceMap.get(name);
}
@@ -135,7 +139,7 @@ public class SAMSequenceDictionary implements Serializable {
}
return len;
}
-
+
/**
* @return true is the dictionary is empty
*/
@@ -146,7 +150,7 @@ public class SAMSequenceDictionary implements Serializable {
private static String DICT_MISMATCH_TEMPLATE = "SAM dictionaries are not the same: %s.";
/**
* Non-comprehensive {@link #equals(Object)}-assertion: instead of calling {@link SAMSequenceRecord#equals(Object)} on constituent
- * {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call
+ * {@link SAMSequenceRecord}s in this dictionary against its pair in the target dictionary, in order, call
* {@link SAMSequenceRecord#isSameSequence(SAMSequenceRecord)}.
* Aliases are ignored.
*
@@ -154,7 +158,7 @@ public class SAMSequenceDictionary implements Serializable {
*/
public void assertSameDictionary(final SAMSequenceDictionary that) {
if (this == that) return;
-
+
final Iterator<SAMSequenceRecord> thatSequences = that.mSequences.iterator();
for (final SAMSequenceRecord thisSequence : mSequences) {
if (!thatSequences.hasNext())
@@ -189,7 +193,7 @@ public class SAMSequenceDictionary implements Serializable {
* alternate names fo a given contig. e.g:
* <code>1,chr1,chr01,01,CM000663,NC_000001.10</code> e.g:
* <code>MT,chrM</code>
- *
+ *
* @param originalName
* existing contig name
* @param altName
@@ -219,11 +223,11 @@ public class SAMSequenceDictionary implements Serializable {
/**
* return a MD5 sum for ths dictionary, the checksum is re-computed each
* time this method is called.
- *
+ *
* <pre>
* md5( (seq1.md5_if_available) + ' '+(seq2.name+seq2.length) + ' '+...)
* </pre>
- *
+ *
* @return a MD5 checksum for this dictionary or the empty string if it is
* empty
*/
@@ -266,5 +270,86 @@ public class SAMSequenceDictionary implements Serializable {
" length:"+ getReferenceLength()+" "+
" md5:"+md5()+")";
}
+
+ public static final List<String> DEFAULT_DICTIONARY_EQUAL_TAG = Arrays.asList(
+ SAMSequenceRecord.MD5_TAG,
+ SAMSequenceRecord.SEQUENCE_LENGTH_TAG);
+
+ /**
+ * Will merge dictionaryTags from two dictionaries into one focusing on merging the tags rather than the sequences.
+ *
+ * Requires that dictionaries have the same SAMSequence records in the same order.
+ * For each sequenceIndex, the union of the tags from both sequences will be added to the new sequence, mismatching
+ * values (for tags that are in both) will generate a warning, and the value from dict1 will be used.
+ * For tags that are in tagsToEquate an unequal value will generate an error (an IllegalArgumentException will
+ * be thrown.) tagsToEquate must include LN and MD.
+ *
+ * @param dict1 first dictionary
+ * @param dict2 first dictionary
+ * @param tagsToMatch list of tags that must be equal if present in both sequence. Must contain MD, and LN
+ * @return dictionary consisting of the same sequences as the two inputs with the merged values of tags.
+ */
+ static public SAMSequenceDictionary mergeDictionaries(final SAMSequenceDictionary dict1,
+ final SAMSequenceDictionary dict2,
+ final List<String> tagsToMatch) {
+
+ // We require MD and LN to match.
+ if (!tagsToMatch.contains(MD5_TAG) || !tagsToMatch.contains(SEQUENCE_LENGTH_TAG)) {
+ throw new IllegalArgumentException("Both " + MD5_TAG + " and " + SEQUENCE_LENGTH_TAG + " must be matched " +
+ "when merging dictionaries. Found: " + String.join(",", tagsToMatch));
+ }
+
+ if (!dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList()).equals(
+ dict2.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(Collectors.toList()))) {
+
+ throw new IllegalArgumentException(String.format("Do not use this function to merge dictionaries with " +
+ "different sequences in them. Sequences must be in the same order as well. Found [%s] and [%s].",
+ String.join(", ", dict1.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(toList())),
+ String.join(", ", dict2.getSequences().stream().map(SAMSequenceRecord::getSequenceName).collect(toList()))));
+ }
+
+ final SAMSequenceDictionary finalDict = new SAMSequenceDictionary();
+ for (int sequenceIndex = 0; sequenceIndex < dict1.getSequences().size(); sequenceIndex++) {
+ final SAMSequenceRecord s1 = dict1.getSequence(sequenceIndex);
+ final SAMSequenceRecord s2 = dict2.getSequence(sequenceIndex);
+
+ final String sName = s1.getSequenceName();
+ final SAMSequenceRecord sMerged = new SAMSequenceRecord(sName, UNKNOWN_SEQUENCE_LENGTH);
+ finalDict.addSequence(sMerged);
+
+ final Set<String> allTags = new HashSet<>();
+ s1.getAttributes().stream().forEach(a -> allTags.add(a.getKey()));
+ s2.getAttributes().stream().forEach(a -> allTags.add(a.getKey()));
+
+ for (final String tag : allTags) {
+ final String value1 = s1.getAttribute(tag);
+ final String value2 = s2.getAttribute(tag);
+
+ if (value1 != null && value2 != null && !value1.equals(value2)) {
+ String baseMessage = String.format("Found sequence entry for which " +
+ "tags differ: %s and tag %s has the two values: %s and %s.",
+ sName, tag, value1, value2);
+
+ if (tagsToMatch.contains(tag)) {
+ log.error("Cannot merge dictionaries. ", baseMessage);
+ throw new IllegalArgumentException("Cannot merge dictionaries. " + baseMessage);
+ } else {
+ log.warn(baseMessage, " Using ", value1);
+ }
+ }
+ sMerged.setAttribute(tag, value1 == null ? value2 : value1);
+ }
+
+ final int length1 = s1.getSequenceLength();
+ final int length2 = s2.getSequenceLength();
+
+ if (length1 != UNKNOWN_SEQUENCE_LENGTH && length2 != UNKNOWN_SEQUENCE_LENGTH && length1 != length2) {
+ throw new IllegalArgumentException(String.format("Cannot merge the two dictionaries. " +
+ "Found sequence entry for which " + "lengths differ: %s has lengths %s and %s", sName, length1, length2));
+ }
+ sMerged.setSequenceLength(length1 == UNKNOWN_SEQUENCE_LENGTH ? length2 : length1);
+ }
+ return finalDict;
+ }
}
diff --git a/src/main/java/htsjdk/samtools/SAMSequenceRecord.java b/src/main/java/htsjdk/samtools/SAMSequenceRecord.java
index cbc2b7a..6bca979 100644
--- a/src/main/java/htsjdk/samtools/SAMSequenceRecord.java
+++ b/src/main/java/htsjdk/samtools/SAMSequenceRecord.java
@@ -82,9 +82,10 @@ public class SAMSequenceRecord extends AbstractSAMHeaderRecord implements Clonea
}
/**
- * @deprecated Use SAMSequenceRecord(final String name, final int sequenceLength) instead.
+ * @deprecated Use {@link #SAMSequenceRecord(String, int)} instead.
* sequenceLength is required for the object to be considered valid.
*/
+ @Deprecated
public SAMSequenceRecord(final String name) {
this(name, UNKNOWN_SEQUENCE_LENGTH);
}
diff --git a/src/main/java/htsjdk/samtools/SAMUtils.java b/src/main/java/htsjdk/samtools/SAMUtils.java
index c0432ac..25b6799 100644
--- a/src/main/java/htsjdk/samtools/SAMUtils.java
+++ b/src/main/java/htsjdk/samtools/SAMUtils.java
@@ -421,8 +421,9 @@ public final class SAMUtils {
*
* @param beg 0-based start of read (inclusive)
* @param end 0-based end of read (exclusive)
- * @deprecated Use GenomicIndexUtil.regionToBin
+ * @deprecated Use {@link GenomicIndexUtil#regionToBin}
*/
+ @Deprecated
static int reg2bin(final int beg, final int end) {
return GenomicIndexUtil.regionToBin(beg, end);
}
@@ -1102,7 +1103,7 @@ public final class SAMUtils {
public static boolean isValidUnsignedIntegerAttribute(long value) {
return value >= 0 && value <= BinaryCodec.MAX_UINT;
}
-
+
/**
* Extract a List of 'other canonical alignments' from a SAM record. Those alignments are stored as a string in the 'SA' tag as defined
* in the SAM specification.
@@ -1118,40 +1119,40 @@ public final class SAMUtils {
if( saValue == null ) return Collections.emptyList();
if( ! (saValue instanceof String) ) throw new SAMException(
"Expected a String for attribute 'SA' but got " + saValue.getClass() );
-
+
final SAMRecordFactory samReaderFactory = new DefaultSAMRecordFactory();
-
- /* the spec says: "Other canonical alignments in a chimeric alignment, formatted as a
- * semicolon-delimited list: (rname,pos,strand,CIGAR,mapQ,NM;)+.
+
+ /* the spec says: "Other canonical alignments in a chimeric alignment, formatted as a
+ * semicolon-delimited list: (rname,pos,strand,CIGAR,mapQ,NM;)+.
* Each element in the list represents a part of the chimeric alignment.
* Conventionally, at a supplementary line, the 1rst element points to the primary line.
*/
-
+
/* break string using semicolon */
final String semiColonStrs[] = SEMICOLON_PAT.split((String)saValue);
-
+
/* the result list */
final List<SAMRecord> alignments = new ArrayList<>( semiColonStrs.length );
-
+
/* base SAM flag */
int record_flag = record.getFlags() ;
record_flag &= ~SAMFlag.PROPER_PAIR.flag;
record_flag &= ~SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag;
record_flag &= ~SAMFlag.READ_REVERSE_STRAND.flag;
-
-
+
+
for(int i=0; i< semiColonStrs.length;++i ) {
final String semiColonStr = semiColonStrs[i];
/* ignore empty string */
if( semiColonStr.isEmpty() ) continue;
-
+
/* break string using comma */
final String commaStrs[] = COMMA_PAT.split(semiColonStr);
if( commaStrs.length != 6 ) throw new SAMException("Bad 'SA' attribute in " + semiColonStr);
-
+
/* create the new record */
final SAMRecord otherRec = samReaderFactory.createSAMRecord( record.getHeader() );
-
+
/* copy fields from the original record */
otherRec.setReadName( record.getReadName() );
otherRec.setReadBases( record.getReadBases() );
@@ -1160,13 +1161,13 @@ public final class SAMUtils {
otherRec.setMateReferenceIndex( record.getMateReferenceIndex() );
otherRec.setMateAlignmentStart( record.getMateAlignmentStart() );
}
-
-
+
+
/* get reference sequence */
final int tid = record.getHeader().getSequenceIndex( commaStrs[0] );
if( tid == -1 ) throw new SAMException("Unknown contig in " + semiColonStr);
otherRec.setReferenceIndex( tid );
-
+
/* fill POS */
final int alignStart;
try {
@@ -1174,47 +1175,49 @@ public final class SAMUtils {
} catch( final NumberFormatException err ) {
throw new SAMException("bad POS in "+semiColonStr, err);
}
-
- otherRec.setAlignmentStart( alignStart );
-
+
+ otherRec.setAlignmentStart( alignStart );
+
/* set TLEN */
- if( record.getReadPairedFlag() &&
- !record.getMateUnmappedFlag() &&
+ if( record.getReadPairedFlag() &&
+ !record.getMateUnmappedFlag() &&
record.getMateReferenceIndex() == tid ) {
otherRec.setInferredInsertSize( record.getMateAlignmentStart() - alignStart );
}
- /* set FLAG */
+ /* set FLAG */
int other_flag = record_flag;
other_flag |= (commaStrs[2].equals("+") ? 0 : SAMFlag.READ_REVERSE_STRAND.flag) ;
/* spec: Conventionally, at a supplementary line, the 1st element points to the primary line */
if( !( record.getSupplementaryAlignmentFlag() && i==0 ) ) {
other_flag |= SAMFlag.SUPPLEMENTARY_ALIGNMENT.flag;
- }
+ }
otherRec.setFlags(other_flag);
-
+
/* set CIGAR */
otherRec.setCigar( TextCigarCodec.decode( commaStrs[3] ) );
-
+
/* set MAPQ */
try {
otherRec.setMappingQuality( Integer.parseInt(commaStrs[4]) );
} catch (final NumberFormatException err) {
throw new SAMException("bad MAPQ in "+semiColonStr, err);
}
-
+
/* fill NM */
try {
- otherRec.setAttribute( SAMTagUtil.getSingleton().NM , Integer.parseInt(commaStrs[5]) );
+ if (!commaStrs[5].equals("*")) {
+ otherRec.setAttribute(SAMTagUtil.getSingleton().NM, Integer.parseInt(commaStrs[5]));
+ }
} catch (final NumberFormatException err) {
throw new SAMException("bad NM in "+semiColonStr, err);
}
-
+
/* if strand is not the same: reverse-complement */
if( otherRec.getReadNegativeStrandFlag() != record.getReadNegativeStrandFlag() ) {
SAMRecordUtil.reverseComplement(otherRec);
}
-
+
/* add the alignment */
alignments.add( otherRec );
}
diff --git a/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java b/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java
index b162cb2..b3f588c 100644
--- a/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java
+++ b/src/main/java/htsjdk/samtools/SamFileHeaderMerger.java
@@ -122,8 +122,9 @@ public class SamFileHeaderMerger {
*
* @param readers sam file readers to combine
* @param sortOrder sort order new header should have
- * @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
+ * @deprecated replaced by {@link #SamFileHeaderMerger(SAMFileHeader.SortOrder, Collection, boolean)}
*/
+ @Deprecated
public SamFileHeaderMerger(final Collection<SamReader> readers, final SAMFileHeader.SortOrder sortOrder) {
this(readers, sortOrder, false);
}
@@ -135,8 +136,9 @@ public class SamFileHeaderMerger {
* @param sortOrder sort order new header should have
* @param mergeDictionaries If true, merge sequence dictionaries in new header. If false, require that
* all input sequence dictionaries be identical.
- * @deprecated replaced by SamFileHeaderMerger(Collection<SAMFileHeader>, SAMFileHeader.SortOrder, boolean)
+ * @deprecated replaced by {@link #SamFileHeaderMerger(SAMFileHeader.SortOrder, Collection, boolean)}
*/
+ @Deprecated
public SamFileHeaderMerger(final Collection<SamReader> readers, final SAMFileHeader.SortOrder sortOrder, final boolean mergeDictionaries) {
this(sortOrder, getHeadersFromReaders(readers), mergeDictionaries);
this.readers = readers;
@@ -188,7 +190,7 @@ public class SamFileHeaderMerger {
}
}
- // Utilility method to make use with old constructor
+ // Utility method to make use with old constructor
private static List<SAMFileHeader> getHeadersFromReaders(final Collection<SamReader> readers) {
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(readers.size());
for (final SamReader reader : readers) {
@@ -585,7 +587,7 @@ public class SamFileHeaderMerger {
// Since sequenceRecord already exists in resultingDict, don't need to add it.
// Add in all the sequences prior to it that have been held in holder.
resultingDict.addAll(loc, holder);
- // Remember the index of sequenceRecord so can check for merge imcompatibility.
+ // Remember the index of sequenceRecord so can check for merge incompatibility.
prevloc = loc + holder.size();
previouslyMerged = sequenceRecord;
holder.clear();
@@ -622,12 +624,12 @@ public class SamFileHeaderMerger {
* @param masterDictionary the superset dictionary we've created.
*/
private void createSequenceMapping(final Collection<SAMFileHeader> headers, final SAMSequenceDictionary masterDictionary) {
- final LinkedList<String> resultingDictStr = new LinkedList<String>();
+ final LinkedList<String> resultingDictStr = new LinkedList<>();
for (final SAMSequenceRecord r : masterDictionary.getSequences()) {
resultingDictStr.add(r.getSequenceName());
}
for (final SAMFileHeader header : headers) {
- final Map<Integer, Integer> seqMap = new HashMap<Integer, Integer>();
+ final Map<Integer, Integer> seqMap = new HashMap<>();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
for (final SAMSequenceRecord rec : dict.getSequences()) {
seqMap.put(rec.getSequenceIndex(), resultingDictStr.indexOf(rec.getSequenceName()));
@@ -640,8 +642,9 @@ public class SamFileHeaderMerger {
/**
* Returns the read group id that should be used for the input read and RG id.
*
- * @deprecated replaced by getReadGroupId(SAMFileHeader, String)
+ * @deprecated replaced by {@link #getReadGroupId(SAMFileHeader, String)}
*/
+ @Deprecated
public String getReadGroupId(final SamReader reader, final String originalReadGroupId) {
return getReadGroupId(reader.getFileHeader(), originalReadGroupId);
}
@@ -655,8 +658,9 @@ public class SamFileHeaderMerger {
* @param reader one of the input files
* @param originalProgramGroupId a program group ID from the above input file
* @return new ID from the merged list of program groups in the output file
- * @deprecated replaced by getProgramGroupId(SAMFileHeader, String)
+ * @deprecated replaced by {@link #getProgramGroupId(SAMFileHeader, String)}
*/
+ @Deprecated
public String getProgramGroupId(final SamReader reader, final String originalProgramGroupId) {
return getProgramGroupId(reader.getFileHeader(), originalProgramGroupId);
}
@@ -693,8 +697,9 @@ public class SamFileHeaderMerger {
/**
* Returns the collection of readers that this header merger is working with. May return null.
*
- * @deprecated replaced by getHeaders()
+ * @deprecated replaced by {@link #getHeaders()}
*/
+ @Deprecated
public Collection<SamReader> getReaders() {
return this.readers;
}
@@ -712,8 +717,9 @@ public class SamFileHeaderMerger {
* @param reader the reader
* @param oldReferenceSequenceIndex the old sequence (also called reference) index
* @return the new index value
- * @deprecated replaced by getMergedSequenceIndex(SAMFileHeader, Integer)
+ * @deprecated replaced by {@link #getMergedSequenceIndex(SAMFileHeader, Integer)}
*/
+ @Deprecated
public Integer getMergedSequenceIndex(final SamReader reader, final Integer oldReferenceSequenceIndex) {
return this.getMergedSequenceIndex(reader.getFileHeader(), oldReferenceSequenceIndex);
}
@@ -745,7 +751,7 @@ public class SamFileHeaderMerger {
* Implementations of this interface are used by mergeHeaderRecords(..) to instantiate
* specific subclasses of AbstractSAMHeaderRecord.
*/
- private static interface HeaderRecordFactory<RecordType extends AbstractSAMHeaderRecord> {
+ private interface HeaderRecordFactory<RecordType extends AbstractSAMHeaderRecord> {
/**
* Constructs a new instance of RecordType.
@@ -753,7 +759,7 @@ public class SamFileHeaderMerger {
* @param id The id of the new record.
* @param srcRecord Except for the id, the new record will be a copy of this source record.
*/
- public RecordType createRecord(final String id, RecordType srcRecord);
+ RecordType createRecord(final String id, RecordType srcRecord);
}
/**
diff --git a/src/main/java/htsjdk/samtools/SamFileValidator.java b/src/main/java/htsjdk/samtools/SamFileValidator.java
index 3a6deb0..cf18a7f 100644
--- a/src/main/java/htsjdk/samtools/SamFileValidator.java
+++ b/src/main/java/htsjdk/samtools/SamFileValidator.java
@@ -318,7 +318,7 @@ public class SamFileValidator {
addError(new SAMValidationError(Type.INVALID_QUALITY_FORMAT, e.getMessage(), null));
}
} catch (SAMFormatException e) {
- // increment record number because the iterator behind the SAMFileReader
+ // increment record number because the iterator behind the SamReader
// reads one record ahead so we will get this failure one record ahead
final String msg = "SAMFormatException on record " + progress.getCount() + 1;
out.println(msg);
@@ -588,10 +588,11 @@ public class SamFileValidator {
}
/**
- * @deprecated use setIndexValidationStringency instead
+ * @deprecated use {@link #setIndexValidationStringency} instead
*/
+ @Deprecated
public SamFileValidator setValidateIndex(final boolean validateIndex) {
- // The SAMFileReader must also have IndexCaching enabled to have the index validated,
+ // The SamReader must also have IndexCaching enabled to have the index validated,
return this.setIndexValidationStringency(validateIndex ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE);
}
diff --git a/src/main/java/htsjdk/samtools/SamReader.java b/src/main/java/htsjdk/samtools/SamReader.java
index 9493593..6bd6c21 100644
--- a/src/main/java/htsjdk/samtools/SamReader.java
+++ b/src/main/java/htsjdk/samtools/SamReader.java
@@ -133,7 +133,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
public Indexing indexing();
/**
- * Iterate through file in order. For a SAMFileReader constructed from an InputStream, and for any SAM file,
+ * Iterate through file in order. For a SamReader constructed from an InputStream, and for any SAM file,
* a 2nd iteration starts where the 1st one left off. For a BAM constructed from a SeekableStream or File, each new iteration
* starts at the first record.
* <p/>
@@ -145,8 +145,8 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
/**
* Iterate over records that match the given interval. Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
+ * a second iteration, the first one must be closed first. You can use a second SamReader to iterate
* in parallel over the same underlying file.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -167,7 +167,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
/**
* Iterate over records that overlap the given interval. Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -186,7 +186,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
/**
* Iterate over records that are contained in the given interval. Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -208,8 +208,8 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
* <p/>
* Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
- * a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
+ * a second iteration, the first one must be closed first. You can use a second SamReader to iterate
* in parallel over the same underlying file.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -233,7 +233,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
* <p/>
* Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -253,7 +253,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
* <p/>
* Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -274,7 +274,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
/**
* Iterate over records that map to the given sequence and start at the given position. Only valid to call this if hasIndex() == true.
* <p/>
- * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * Only a single open iterator on a given SamReader may be extant at any one time. If you want to start
* a second iteration, the first one must be closed first.
* <p/>
* Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
@@ -295,9 +295,9 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
* mate information. This method iterates over the SAM file, so there may not be an unclosed
* iterator on the SAM file when this method is called.
* <p/>
- * Note that it is not possible to call queryMate when iterating over the SAMFileReader, because queryMate
- * requires its own iteration, and there cannot be two simultaneous iterations on the same SAMFileReader. The
- * work-around is to open a second SAMFileReader on the same input file, and call queryMate on the second
+ * Note that it is not possible to call queryMate when iterating over the SamReader, because queryMate
+ * requires its own iteration, and there cannot be two simultaneous iterations on the same SamReader. The
+ * work-around is to open a second SamReader on the same input file, and call queryMate on the second
* reader.
*
* @param rec Record for which mate is sought. Must be a paired read.
diff --git a/src/main/java/htsjdk/samtools/TextTagCodec.java b/src/main/java/htsjdk/samtools/TextTagCodec.java
index 0fae202..60363e1 100644
--- a/src/main/java/htsjdk/samtools/TextTagCodec.java
+++ b/src/main/java/htsjdk/samtools/TextTagCodec.java
@@ -41,6 +41,7 @@ import java.util.Map;
* instance is used in multiple threads.
*/
public class TextTagCodec {
+ // 3 fields for non-empty strings 2 fields if the string is empty.
private static final int NUM_TAG_FIELDS = 3;
/**
@@ -149,12 +150,12 @@ public class TextTagCodec {
*/
public Map.Entry<String, Object> decode(final String tag) {
final int numFields = StringUtil.splitConcatenateExcessTokens(tag, fields, ':');
- if (numFields != TextTagCodec.NUM_TAG_FIELDS) {
+ if (numFields != TextTagCodec.NUM_TAG_FIELDS && numFields != TextTagCodec.NUM_TAG_FIELDS - 1) {
throw new SAMFormatException("Not enough fields in tag '" + tag + "'");
}
final String key = fields[0];
final String type = fields[1];
- final String stringVal = fields[2];
+ final String stringVal = numFields == TextTagCodec.NUM_TAG_FIELDS ? fields[2] : "";
final Object val = convertStringToObject(type, stringVal);
return new Map.Entry<String, Object>() {
public String getKey() {
diff --git a/src/main/java/htsjdk/samtools/sra/SRAAccession.java b/src/main/java/htsjdk/samtools/sra/SRAAccession.java
index 17180d7..9aeb10f 100644
--- a/src/main/java/htsjdk/samtools/sra/SRAAccession.java
+++ b/src/main/java/htsjdk/samtools/sra/SRAAccession.java
@@ -74,10 +74,11 @@ public class SRAAccession implements Serializable {
}
/**
- * @deprecated
* @return true if SRA successfully loaded native libraries and fully initialized,
* false otherwise
+ * @deprecated use {@link #checkIfInitialized} instead
*/
+ @Deprecated
public static boolean isSupported() {
return checkIfInitialized() == null;
}
diff --git a/src/main/java/htsjdk/samtools/util/IntervalList.java b/src/main/java/htsjdk/samtools/util/IntervalList.java
index 8b46a1c..32b7176 100644
--- a/src/main/java/htsjdk/samtools/util/IntervalList.java
+++ b/src/main/java/htsjdk/samtools/util/IntervalList.java
@@ -71,12 +71,15 @@ public class IntervalList implements Iterable<Interval> {
/** Constructs a new interval list using the supplied header information. */
public IntervalList(final SAMFileHeader header) {
- if (header == null) {
- throw new IllegalArgumentException("SAMFileHeader must be supplied.");
- }
+ if (header == null) throw new IllegalArgumentException("SAMFileHeader must be supplied.");
this.header = header;
}
+ /** Constructs a new interval list using the supplied header information. */
+ public IntervalList(final SAMSequenceDictionary dict) {
+ this(new SAMFileHeader(dict));
+ }
+
/** Gets the header (if there is one) for the interval list. */
public SAMFileHeader getHeader() { return header; }
@@ -769,4 +772,4 @@ class IntervalCoordinateComparator implements Comparator<Interval>, Serializable
return retval;
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java b/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java
new file mode 100644
index 0000000..ef28be6
--- /dev/null
+++ b/src/main/java/htsjdk/samtools/util/PositionalOutputStream.java
@@ -0,0 +1,65 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package htsjdk.samtools.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * Wraps output stream in a manner which keeps track of the position within the file and allowing writes
+ * at arbitrary points
+ */
+public final class PositionalOutputStream extends OutputStream implements LocationAware
+{
+ private final OutputStream out;
+ private long position = 0;
+
+ public PositionalOutputStream(final OutputStream out) {
+ this.out = out;
+ }
+
+ public final void write(final byte[] bytes) throws IOException {
+ write(bytes, 0, bytes.length);
+ }
+
+ public final void write(final byte[] bytes, final int startIndex, final int numBytes) throws IOException {
+ position += numBytes;
+ out.write(bytes, startIndex, numBytes);
+ }
+
+ public final void write(final int c) throws IOException {
+ position++;
+ out.write(c);
+ }
+
+ public final long getPosition() { return position; }
+
+ @Override
+ public void close() throws IOException {
+ super.close();
+ out.close();
+ }
+}
diff --git a/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java b/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java
index 83999bc..b0a965c 100644
--- a/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java
+++ b/src/main/java/htsjdk/samtools/util/QualityEncodingDetector.java
@@ -23,9 +23,9 @@ import java.util.Set;
import static java.util.Arrays.asList;
/**
- * Utility for determining the type of quality encoding/format (see FastqQualityFormat) used in a SAM/BAM or Fastq.
+ * Utility for determining the type of quality encoding/format (see {@link FastqQualityFormat}) used in a SAM/BAM or Fastq.
* <p/>
- * To use this class, invoke the detect() method with a SAMFileReader or FastqReader, as appropriate. The consumer is
+ * To use this class, invoke the detect() method with a {@link SamReader} or {@link FastqReader}, as appropriate. The consumer is
* responsible for closing readers.
*
* @author mccowan at broadinstitute.org
@@ -116,10 +116,10 @@ public class QualityEncodingDetector {
* Adds the SAMRecord's quality scores.
* <p/>
* Does not assume Phred quality encoding (for obvious reasons); getBaseQualityString() is used to read the
- * unmodified ASCII score. To elaborate, SAMFileReader, which is generating these SAMRecords, builds the
+ * unmodified ASCII score. To elaborate, the {@link SamReader}, which is generating these {@link SAMRecord}s, builds the
* SAMRecord by subtracting a value from each quality score and storing that transformed value internally.
* Since we desire original scores here (whatever was in the file to begin with), we effectively undo this
- * transformation by asking SAMRecord to convert the quality back into the ASCII that was read in the file.
+ * transformation by asking {@link SAMRecord} to convert the quality back into the ASCII that was read in the file.
*/
public void add(final SAMRecord samRecord, final boolean useOriginalQualities) {
addAsciiQuality(useOriginalQualities && samRecord.getOriginalBaseQualities() != null
@@ -402,4 +402,4 @@ public class QualityEncodingDetector {
}
}
}
-}
\ No newline at end of file
+}
diff --git a/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java b/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java
index 7546a01..5d173a5 100644
--- a/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java
+++ b/src/main/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java
@@ -34,7 +34,7 @@ import java.util.List;
import java.util.NoSuchElementException;
/**
- * Create an iterator over a SAMFileReader that only returns reads that overlap one of the intervals
+ * Create an iterator over a {@link SamReader} that only returns reads that overlap one of the intervals
* in an interval list.
*
* @author alecw at broadinstitute.org
diff --git a/src/main/java/htsjdk/samtools/util/SequenceUtil.java b/src/main/java/htsjdk/samtools/util/SequenceUtil.java
index d2fb861..3108cee 100644
--- a/src/main/java/htsjdk/samtools/util/SequenceUtil.java
+++ b/src/main/java/htsjdk/samtools/util/SequenceUtil.java
@@ -442,33 +442,6 @@ public class SequenceUtil {
}
/**
- * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather
- * than byte[]. This is because GATK needs it this way.
- * <p/>
- * TODO: Remove this method when GATK map method is changed to take refseq as byte[].
- * TODO: UPDATE: Seems to be removed from GATK. Deprecated now to be removed in a future version.
- */
- @Deprecated
- private static int countMismatches(final SAMRecord read, final char[] referenceBases, final int referenceOffset) {
- int mismatches = 0;
-
- final byte[] readBases = read.getReadBases();
-
- for (final AlignmentBlock block : read.getAlignmentBlocks()) {
- final int readBlockStart = block.getReadStart() - 1;
- final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset;
- final int length = block.getLength();
-
- for (int i = 0; i < length; ++i) {
- if (!basesEqual(readBases[readBlockStart + i], StringUtil.charToByte(referenceBases[referenceBlockStart + i]))) {
- ++mismatches;
- }
- }
- }
- return mismatches;
- }
-
- /**
* Calculates the sum of qualities for mismatched bases in the read.
*
* @param referenceBases Array of ASCII bytes in which the 0th position in the array corresponds
@@ -537,41 +510,6 @@ public class SequenceUtil {
return qualities;
}
- /**
- * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather
- * than byte[]. This is because GATK needs it this way.
- * <p/>
- * TODO: Remove this method when GATK map method is changed to take refseq as byte[].
- * TODO: UPDATE: Seems to be removed from GATK. Deprecated now to be removed in a future version.
- */
- @Deprecated
- public static int sumQualitiesOfMismatches(final SAMRecord read, final char[] referenceBases,
- final int referenceOffset) {
- int qualities = 0;
-
- final byte[] readBases = read.getReadBases();
- final byte[] readQualities = read.getBaseQualities();
-
- if (read.getAlignmentStart() <= referenceOffset) {
- throw new IllegalArgumentException("read.getAlignmentStart(" + read.getAlignmentStart() +
- ") <= referenceOffset(" + referenceOffset + ")");
- }
-
- for (final AlignmentBlock block : read.getAlignmentBlocks()) {
- final int readBlockStart = block.getReadStart() - 1;
- final int referenceBlockStart = block.getReferenceStart() - 1 - referenceOffset;
- final int length = block.getLength();
-
- for (int i = 0; i < length; ++i) {
- if (!basesEqual(readBases[readBlockStart + i], StringUtil.charToByte(referenceBases[referenceBlockStart + i]))) {
- qualities += readQualities[readBlockStart + i];
- }
- }
- }
-
- return qualities;
- }
-
public static int countInsertedBases(final Cigar cigar) {
int ret = 0;
for (final CigarElement element : cigar.getCigarElements()) {
@@ -658,26 +596,6 @@ public class SequenceUtil {
return samNm;
}
-
- /**
- * Sadly, this is a duplicate of the method above, except that it takes char[] for referenceBases rather
- * than byte[]. This is because GATK needs it this way.
- * <p/>
- * TODO: Remove this method when GATK map method is changed to take refseq as byte[].
- * TODO: UPDATE: Seems to be removed from GATK. Deprecated now to be removed in a future version.
- */
- @Deprecated
- public static int calculateSamNmTag(final SAMRecord read, final char[] referenceBases,
- final int referenceOffset) {
- int samNm = countMismatches(read, referenceBases, referenceOffset);
- for (final CigarElement el : read.getCigar().getCigarElements()) {
- if (el.getOperator() == CigarOperator.INSERTION || el.getOperator() == CigarOperator.DELETION) {
- samNm += el.getLength();
- }
- }
- return samNm;
- }
-
/** Returns the complement of a single byte. */
public static byte complement(final byte b) {
switch (b) {
@@ -972,10 +890,11 @@ public class SequenceUtil {
/**
* Calculate MD and NM similarly to Samtools, except that N->N is a match.
*
- * @param record
- * @param ref
- * @param calcMD
- * @return
+ * @param record Input record for which to calculate NM and MD.
+ * The appropriate tags will be added/updated in the record
+ * @param ref The reference bases for the sequence to which the record is mapped
+ * @param calcMD A flag indicating whether to update the MD tag in the record
+ * @param calcNM A flag indicating whether to update the NM tag in the record
*/
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref,
final boolean calcMD, final boolean calcNM) {
@@ -985,66 +904,63 @@ public class SequenceUtil {
final Cigar cigar = record.getCigar();
final List<CigarElement> cigarElements = cigar.getCigarElements();
final byte[] seq = record.getReadBases();
- final int start = record.getAlignmentStart() - 1;
- int i, x, y, u = 0;
- int nm = 0;
- final StringBuilder str = new StringBuilder();
-
- final int size = cigarElements.size();
- for (i = y = 0, x = start; i < size; ++i) {
- final CigarElement ce = cigarElements.get(i);
- int j;
- final int length = ce.getLength();
+ final int alignmentStart = record.getAlignmentStart() - 1;
+ int cigarIndex, blockRefPos, blockReadStart, matchCount = 0;
+ int nmCount = 0;
+ final StringBuilder mdString = new StringBuilder();
+
+ final int nElements = cigarElements.size();
+ for (cigarIndex = blockReadStart = 0, blockRefPos = alignmentStart; cigarIndex < nElements; ++cigarIndex) {
+ final CigarElement ce = cigarElements.get(cigarIndex);
+ int inBlockOffset;
+ final int blockLength = ce.getLength();
final CigarOperator op = ce.getOperator();
if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ
|| op == CigarOperator.X) {
- for (j = 0; j < length; ++j) {
- final int z = y + j;
+ for (inBlockOffset = 0; inBlockOffset < blockLength; ++inBlockOffset) {
+ final int readOffset = blockReadStart + inBlockOffset;
- if (ref.length <= x + j) break; // out of boundary
+ if (ref.length <= blockRefPos + inBlockOffset) break; // out of boundary
- int c1 = 0;
- int c2 = 0;
- // try {
- c1 = seq[z];
- c2 = ref[x + j];
+ final byte readBase = seq[readOffset];
+ final byte refBase = ref[blockRefPos + inBlockOffset];
- if ((c1 == c2) || c1 == 0) {
+ if ((bases[readBase] == bases[refBase]) || readBase == 0) {
// a match
- ++u;
+ ++matchCount;
} else {
- str.append(u);
- str.appendCodePoint(ref[x + j]);
- u = 0;
- ++nm;
+ mdString.append(matchCount);
+ mdString.appendCodePoint(refBase);
+ matchCount = 0;
+ ++nmCount;
}
}
- if (j < length) break;
- x += length;
- y += length;
+ if (inBlockOffset < blockLength) break;
+ blockRefPos += blockLength;
+ blockReadStart += blockLength;
} else if (op == CigarOperator.DELETION) {
- str.append(u);
- str.append('^');
- for (j = 0; j < length; ++j) {
- if (ref[x + j] == 0) break;
- str.appendCodePoint(ref[x + j]);
+ mdString.append(matchCount);
+ mdString.append('^');
+ for (inBlockOffset = 0; inBlockOffset < blockLength; ++inBlockOffset) {
+ if (ref[blockRefPos + inBlockOffset] == 0) break;
+ mdString.appendCodePoint(ref[blockRefPos + inBlockOffset]);
}
- u = 0;
- if (j < length) break;
- x += length;
- nm += length;
+ matchCount = 0;
+ if (inBlockOffset < blockLength) break;
+ blockRefPos += blockLength;
+ nmCount += blockLength;
} else if (op == CigarOperator.INSERTION
|| op == CigarOperator.SOFT_CLIP) {
- y += length;
- if (op == CigarOperator.INSERTION) nm += length;
+ blockReadStart += blockLength;
+ if (op == CigarOperator.INSERTION) nmCount += blockLength;
} else if (op == CigarOperator.SKIPPED_REGION) {
- x += length;
+ blockRefPos += blockLength;
}
}
- str.append(u);
+ mdString.append(matchCount);
- if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString());
- if (calcNM) record.setAttribute(SAMTag.NM.name(), nm);
+ if (calcMD) record.setAttribute(SAMTag.MD.name(), mdString.toString());
+ if (calcNM) record.setAttribute(SAMTag.NM.name(), nmCount);
}
public static byte upperCase(final byte base) {
@@ -1059,7 +975,7 @@ public class SequenceUtil {
/** Generates all possible unambiguous kmers (upper-case) of length and returns them as byte[]s. */
public static List<byte[]> generateAllKmers(final int length) {
- final List<byte[]> sofar = new LinkedList<byte[]>();
+ final List<byte[]> sofar = new LinkedList<>();
if (sofar.isEmpty()) {
sofar.add(new byte[length]);
diff --git a/src/main/java/htsjdk/samtools/util/StringUtil.java b/src/main/java/htsjdk/samtools/util/StringUtil.java
index ecb1b3f..9049253 100644
--- a/src/main/java/htsjdk/samtools/util/StringUtil.java
+++ b/src/main/java/htsjdk/samtools/util/StringUtil.java
@@ -545,4 +545,59 @@ public class StringUtil {
return i;
}
+
+ /**
+ * Calculates the Hamming distance (number of character mismatches) between two strings s1 and s2.
+ * Since Hamming distance is not defined for strings of differing lengths, we throw an exception if
+ * the two strings are of different lengths. Hamming distance is case sensitive and does not have
+ * any special treatment for DNA.
+ *
+ * @param s1 The first string to compare
+ * @param s2 The second string to compare, note that if s1 and s2 are swapped the value returned will be identical.
+ * @return Hamming distance between s1 and s2.
+ * @throws IllegalArgumentException If the two strings have differing lengths.
+ */
+ public static int hammingDistance(final String s1, final String s2) {
+ if (s1.length() != s2.length()) {
+ throw new IllegalArgumentException("Attempted to determine Hamming distance of strings with differing lengths. " +
+ "The first string has length " + s1.length() + " and the second string has length " + s2.length() + ".");
+ }
+ int measuredDistance = 0;
+ for (int i = 0;i < s1.length();i++) {
+ if (s1.charAt(i) != s2.charAt(i)) {
+ measuredDistance++;
+ }
+ }
+ return measuredDistance;
+ }
+
+ /**
+ * Determines if two strings s1 and s2 are within maxHammingDistance of each other using the Hamming distance metric.
+ * Since Hamming distance is not defined for strings of differing lengths, we throw an exception if
+ * the two strings are of different lengths. Hamming distance is case sensitive and does not have any
+ * special treatment for DNA.
+ *
+ * @param s1 The first string to compare
+ * @param s2 The second string to compare, note that if s1 and s2 are swapped the value returned will be identical.
+ * @param maxHammingDistance The largest Hamming distance the strings can have for this function to return true.
+ * @return true if the two strings are within maxHammingDistance of each other, false otherwise.
+ * @throws IllegalArgumentException If the two strings have differing lengths.
+ */
+ public static boolean isWithinHammingDistance(final String s1, final String s2, final int maxHammingDistance) {
+ if (s1.length() != s2.length()) {
+ throw new IllegalArgumentException("Attempted to determine if two strings of different length were within a specified edit distance.");
+ }
+ int measuredDistance = 0;
+ for (int i = 0;i < s1.length();i++) {
+ if (s1.charAt(i) != s2.charAt(i)) {
+ measuredDistance++;
+ // If the measuredDistance is larger than the maxHammingDistance we can short circuit and return
+ // false, there is no need to continue evaluating the distance.
+ if (measuredDistance > maxHammingDistance) {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
}
diff --git a/src/main/java/htsjdk/samtools/util/TestUtil.java b/src/main/java/htsjdk/samtools/util/TestUtil.java
index bbdf464..fd840d1 100644
--- a/src/main/java/htsjdk/samtools/util/TestUtil.java
+++ b/src/main/java/htsjdk/samtools/util/TestUtil.java
@@ -50,8 +50,9 @@ public class TestUtil {
}
/**
- * @deprecated Use properly spelled method.
+ * @deprecated Use properly spelled method. {@link #getTempDirectory}
*/
+ @Deprecated
public static File getTempDirecory(final String prefix, final String suffix) {
return getTempDirectory(prefix, suffix);
}
diff --git a/src/main/java/htsjdk/tribble/bed/BEDCodec.java b/src/main/java/htsjdk/tribble/bed/BEDCodec.java
index 62d202c..ea1e889 100644
--- a/src/main/java/htsjdk/tribble/bed/BEDCodec.java
+++ b/src/main/java/htsjdk/tribble/bed/BEDCodec.java
@@ -23,6 +23,7 @@
*/
package htsjdk.tribble.bed;
+import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.AsciiFeatureCodec;
import htsjdk.tribble.annotation.Strand;
import htsjdk.tribble.index.tabix.TabixFormat;
@@ -40,6 +41,9 @@ import java.util.regex.Pattern;
*/
public class BEDCodec extends AsciiFeatureCodec<BEDFeature> {
+ /** Default extension for BED files. */
+ public static final String BED_EXTENSION = ".bed";
+
private static final Pattern SPLIT_PATTERN = Pattern.compile("\\t|( +)");
private final int startOffsetValue;
@@ -197,7 +201,13 @@ public class BEDCodec extends AsciiFeatureCodec<BEDFeature> {
@Override
public boolean canDecode(final String path) {
- return path.toLowerCase().endsWith(".bed");
+ final String toDecode;
+ if (AbstractFeatureReader.hasBlockCompressedExtension(path)) {
+ toDecode = path.substring(0, path.lastIndexOf("."));
+ } else {
+ toDecode = path;
+ }
+ return toDecode.toLowerCase().endsWith(BED_EXTENSION);
}
public int getStartOffset() {
diff --git a/src/main/java/htsjdk/tribble/index/AbstractIndex.java b/src/main/java/htsjdk/tribble/index/AbstractIndex.java
index 42bce73..47e31cc 100644
--- a/src/main/java/htsjdk/tribble/index/AbstractIndex.java
+++ b/src/main/java/htsjdk/tribble/index/AbstractIndex.java
@@ -343,13 +343,16 @@ public abstract class AbstractIndex implements MutableIndex {
}
@Override
+ public void write(final File idxFile) throws IOException {
+ try(final LittleEndianOutputStream idxStream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(idxFile)))) {
+ write(idxStream);
+ }
+ }
+
+ @Override
public void writeBasedOnFeatureFile(final File featureFile) throws IOException {
if (!featureFile.isFile()) return;
- final LittleEndianOutputStream idxStream =
- new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(Tribble.indexFile(featureFile))));
- write(idxStream);
- idxStream.close();
-
+ write(Tribble.indexFile(featureFile));
}
public void read(final LittleEndianInputStream dis) throws IOException {
diff --git a/src/main/java/htsjdk/tribble/index/Index.java b/src/main/java/htsjdk/tribble/index/Index.java
index 252bc95..ca6cc60 100644
--- a/src/main/java/htsjdk/tribble/index/Index.java
+++ b/src/main/java/htsjdk/tribble/index/Index.java
@@ -70,6 +70,14 @@ public interface Index {
public void write(LittleEndianOutputStream stream) throws IOException;
/**
+ * Writes the index into a file.
+ *
+ * @param idxFile Where to write the index.
+ * @throws IOException if the index is unable to write to the specified file
+ */
+ public void write(final File idxFile) throws IOException;
+
+ /**
* Write an appropriately named and located Index file based on the name and location of the featureFile.
* If featureFile is not a normal file, the index will silently not be written.
* @param featureFile
diff --git a/src/main/java/htsjdk/tribble/index/IndexCreator.java b/src/main/java/htsjdk/tribble/index/IndexCreator.java
index 9b03d44..c90ec9f 100644
--- a/src/main/java/htsjdk/tribble/index/IndexCreator.java
+++ b/src/main/java/htsjdk/tribble/index/IndexCreator.java
@@ -23,6 +23,7 @@
*/
package htsjdk.tribble.index;
+import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.tribble.Feature;
/**
@@ -45,6 +46,12 @@ public interface IndexCreator {
* @return an index object
*/
public Index finalizeIndex(long finalFilePosition);
+
+ /**
+ * Set the sequence dictionary for the index. Default implementation does nothing.
+ * @param dict the dictionary to add to the index.
+ */
+ public default void setIndexSequenceDictionary(final SAMSequenceDictionary dict) { }
}
diff --git a/src/main/java/htsjdk/tribble/index/IndexFactory.java b/src/main/java/htsjdk/tribble/index/IndexFactory.java
index a588220..3cd1b79 100644
--- a/src/main/java/htsjdk/tribble/index/IndexFactory.java
+++ b/src/main/java/htsjdk/tribble/index/IndexFactory.java
@@ -41,16 +41,13 @@ import htsjdk.tribble.index.tabix.TabixIndex;
import htsjdk.tribble.index.tabix.TabixIndexCreator;
import htsjdk.tribble.readers.PositionalBufferedStream;
import htsjdk.tribble.util.LittleEndianInputStream;
-import htsjdk.tribble.util.LittleEndianOutputStream;
import htsjdk.tribble.util.ParsingUtils;
import htsjdk.tribble.util.TabixUtils;
import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Constructor;
@@ -288,18 +285,11 @@ public class IndexFactory {
* @param idx
* @param idxFile
* @throws IOException
+ * @deprecated use {@link Index#write(File)} instead
*/
+ @Deprecated
public static void writeIndex(final Index idx, final File idxFile) throws IOException {
- LittleEndianOutputStream stream = null;
- try {
- stream = new LittleEndianOutputStream(new BufferedOutputStream(new FileOutputStream(idxFile)));
- idx.write(stream);
- }
- finally {
- if(stream != null) {
- stream.close();
- }
- }
+ idx.write(idxFile);
}
/**
diff --git a/src/main/java/htsjdk/tribble/index/TribbleIndexCreator.java b/src/main/java/htsjdk/tribble/index/TribbleIndexCreator.java
index fc42818..f7385e8 100644
--- a/src/main/java/htsjdk/tribble/index/TribbleIndexCreator.java
+++ b/src/main/java/htsjdk/tribble/index/TribbleIndexCreator.java
@@ -23,15 +23,31 @@
*/
package htsjdk.tribble.index;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+
import java.util.LinkedHashMap;
/**
* Base class for Tribble-specific index creators.
*/
public abstract class TribbleIndexCreator implements IndexCreator {
+ // a constant we use for marking sequence dictionary entries in the Tribble index property list
+ private static final String SEQUENCE_DICTIONARY_PROPERTY_PREDICATE = "DICT:";
+
protected LinkedHashMap<String, String> properties = new LinkedHashMap<String, String>();
public void addProperty(final String key, final String value) {
properties.put(key, value);
}
+
+ /** Set the sequence dictionary entries for the index property list. */
+ @Override
+ public void setIndexSequenceDictionary(final SAMSequenceDictionary dict) {
+ for (final SAMSequenceRecord seq : dict.getSequences()) {
+ final String contig = SEQUENCE_DICTIONARY_PROPERTY_PREDICATE + seq.getSequenceName();
+ final String length = String.valueOf(seq.getSequenceLength());
+ addProperty(contig,length);
+ }
+ }
}
diff --git a/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java b/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java
index 9ab05d4..044cefe 100644
--- a/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java
+++ b/src/main/java/htsjdk/tribble/index/tabix/TabixIndex.java
@@ -201,13 +201,10 @@ public class TabixIndex implements Index {
*
* @param tabixFile Where to write the index.
*/
- public void write(final File tabixFile) {
- final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(tabixFile));
- try {
+ @Override
+ public void write(final File tabixFile) throws IOException {
+ try(final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(tabixFile))) {
write(los);
- los.close();
- } catch (final IOException e) {
- throw new TribbleException("Exception writing " + tabixFile.getAbsolutePath(), e);
}
}
diff --git a/src/main/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java b/src/main/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java
index cda97ab..8d2cd10 100644
--- a/src/main/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java
+++ b/src/main/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java
@@ -26,16 +26,16 @@ public class GenotypeJEXLContext extends VariantJEXLContext {
attributes.put("g", (Genotype g) -> g);
attributes.put(VCFConstants.GENOTYPE_KEY, Genotype::getGenotypeString);
- attributes.put("isHom", (Genotype g) -> g.isHom() ? "1" : "0");
- attributes.put("isHomRef", (Genotype g) -> g.isHomRef() ? "1" : "0");
- attributes.put("isHet", (Genotype g) -> g.isHet() ? "1" : "0");
- attributes.put("isHomVar", (Genotype g) -> g.isHomVar() ? "1" : "0");
- attributes.put("isCalled", (Genotype g) -> g.isCalled() ? "1" : "0");
- attributes.put("isNoCall", (Genotype g) -> g.isNoCall() ? "1" : "0");
- attributes.put("isMixed", (Genotype g) -> g.isMixed() ? "1" : "0");
- attributes.put("isAvailable", (Genotype g) -> g.isAvailable() ? "1" : "0");
- attributes.put("isPassFT", (Genotype g) -> g.isFiltered() ? "0" : "1");
- attributes.put(VCFConstants.GENOTYPE_FILTER_KEY, (Genotype g) -> g.isFiltered()? g.getFilters() : "PASS");
+ attributes.put("isHom", (Genotype g) -> g.isHom() ? true_string : false_string);
+ attributes.put("isHomRef", (Genotype g) -> g.isHomRef() ? true_string : false_string);
+ attributes.put("isHet", (Genotype g) -> g.isHet() ? true_string : false_string);
+ attributes.put("isHomVar", (Genotype g) -> g.isHomVar() ? true_string : false_string);
+ attributes.put("isCalled", (Genotype g) -> g.isCalled() ? true_string : false_string);
+ attributes.put("isNoCall", (Genotype g) -> g.isNoCall() ? true_string : false_string);
+ attributes.put("isMixed", (Genotype g) -> g.isMixed() ? true_string : false_string);
+ attributes.put("isAvailable", (Genotype g) -> g.isAvailable() ? true_string : false_string);
+ attributes.put("isPassFT", (Genotype g) -> g.isFiltered() ? false_string : true_string);
+ attributes.put(VCFConstants.GENOTYPE_FILTER_KEY, (Genotype g) -> g.isFiltered()? g.getFilters() : VCFConstants.PASSES_FILTERS_v4);
attributes.put(VCFConstants.GENOTYPE_QUALITY_KEY, Genotype::getGQ);
}
@@ -44,14 +44,15 @@ public class GenotypeJEXLContext extends VariantJEXLContext {
this.g = g;
}
+ @Override
public Object get(String name) {
//should matching genotype attributes always supersede vc?
if ( attributes.containsKey(name) ) { // dynamic resolution of name -> value via map
return attributes.get(name).get(g);
} else if ( g.hasAnyAttribute(name) ) {
return g.getAnyAttribute(name);
- } else if ( g.getFilters().contains(name) ) {
- return "1";
+ } else if ( g.getFilters() != null && g.getFilters().contains(name) ) {
+ return true_string;
} else
return super.get(name);
}
diff --git a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
index a7a871f..b8e13c7 100644
--- a/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
+++ b/src/main/java/htsjdk/variant/variantcontext/JEXLMap.java
@@ -2,6 +2,7 @@ package htsjdk.variant.variantcontext;
import htsjdk.variant.variantcontext.VariantContextUtils.JexlVCMatchExp;
import org.apache.commons.jexl2.JexlContext;
+import org.apache.commons.jexl2.JexlException;
import org.apache.commons.jexl2.MapContext;
import java.util.Collection;
@@ -11,12 +12,8 @@ import java.util.Map;
import java.util.Set;
/**
- * this is an implementation of a Map of JexlVCMatchExp to true or false values. It lazy initializes each value
- * as requested to save as much processing time as possible.
- *
- * Compatible with JEXL 1.1 (this code will be easier if we move to 2.0, all of the functionality can go into the
- * JexlContext's get()
- *
+ * This is an implementation of a Map of {@link JexlVCMatchExp} to true or false values.
+ * It lazily initializes each value as requested to save as much processing time as possible.
*/
class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
@@ -27,137 +24,148 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
// our context
private JexlContext jContext = null;
- // our mapping from JEXLVCMatchExp to Booleans, which will be set to NULL for previously uncached JexlVCMatchExp
+ /**
+ * our mapping from {@link JexlVCMatchExp} to {@link Boolean}s, which will be set to {@code NULL}
+ * for previously un-cached {@link JexlVCMatchExp}.
+ */
private Map<JexlVCMatchExp,Boolean> jexl;
-
- public JEXLMap(Collection<JexlVCMatchExp> jexlCollection, VariantContext vc, Genotype g) {
+ public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc, final Genotype g) {
+ initialize(jexlCollection);
this.vc = vc;
this.g = g;
- initialize(jexlCollection);
}
- public JEXLMap(Collection<JexlVCMatchExp> jexlCollection, VariantContext vc) {
+ public JEXLMap(final Collection<JexlVCMatchExp> jexlCollection, final VariantContext vc) {
this(jexlCollection, vc, null);
}
- private void initialize(Collection<JexlVCMatchExp> jexlCollection) {
- jexl = new HashMap<JexlVCMatchExp,Boolean>();
- for (JexlVCMatchExp exp: jexlCollection) {
- jexl.put(exp, null);
- }
- }
-
/**
- * create the internal JexlContext, only when required. This code is where new JEXL context variables
- * should get added.
+ * Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression.
*
+ * @throws IllegalArgumentException when {@code o} is {@code null} or
+ * when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions.
*/
- private void createContext() {
- if ( vc == null ) {
- jContext = new MapContext(Collections.emptyMap());
- }
- else if (g == null) {
- jContext = new VariantJEXLContext(vc);
+ public Boolean get(Object o) {
+ if (o == null) {
+ throw new IllegalArgumentException("Query key is null");
}
- else {
- jContext = new GenotypeJEXLContext(vc, g);
+
+ // if we've already determined the value, return it
+ if (jexl.containsKey(o) && jexl.get(o) != null) {
+ return jexl.get(o);
}
- }
- /**
- * @return the size of the internal data structure
- */
- public int size() {
- return jexl.size();
+ // otherwise cast the expression and try again
+ final JexlVCMatchExp e = (JexlVCMatchExp) o;
+ evaluateExpression(e);
+ return jexl.get(e);
}
/**
- * @return true if we're empty
- */
- public boolean isEmpty() { return this.jexl.isEmpty(); }
-
- /**
* do we contain the specified key
* @param o the key
* @return true if we have a value for that key
*/
public boolean containsKey(Object o) { return jexl.containsKey(o); }
- public Boolean get(Object o) {
- // if we've already determined the value, return it
- if (jexl.containsKey(o) && jexl.get(o) != null) return jexl.get(o);
-
- // try and cast the expression
- JexlVCMatchExp e = (JexlVCMatchExp) o;
- evaluateExpression(e);
- return jexl.get(e);
- }
-
- /**
- * get the keyset of map
- * @return a set of keys of type JexlVCMatchExp
- */
public Set<JexlVCMatchExp> keySet() {
return jexl.keySet();
}
/**
- * get all the values of the map. This is an expensive call, since it evaluates all keys that haven't
- * been evaluated yet. This is fine if you truely want all the keys, but if you only want a portion, or know
+ * Get all the values of the map, i.e. the {@link Boolean} values.
+ * This is an expensive call, since it evaluates all keys that haven't been evaluated yet.
+ * This is fine if you truly want all the keys, but if you only want a portion, or know
* the keys you want, you would be better off using get() to get them by name.
+ *
+ * Note: due to laziness, this accessor actually modifies the instance by possibly forcing evaluation of an Jexl expression.
+ *
* @return a collection of boolean values, representing the results of all the variants evaluated
+ *
+ * @throws IllegalArgumentException when any of the JexlVCMatchExp (i.e. keys) contains invalid Jexl expressions.
*/
public Collection<Boolean> values() {
- // this is an expensive call
- for (JexlVCMatchExp exp : jexl.keySet())
- if (jexl.get(exp) == null)
+ for (final JexlVCMatchExp exp : jexl.keySet()) {
+ if (jexl.get(exp) == null) {
evaluateExpression(exp);
+ }
+ }
return jexl.values();
}
/**
- * evaulate a JexlVCMatchExp's expression, given the current context (and setup the context if it's null)
- * @param exp the JexlVCMatchExp to evaluate
+ * @return the number of keys, i.e. {@link JexlVCMatchExp}'s held by this mapping.
+ */
+ public int size() {
+ return jexl.size();
+ }
+
+ public boolean isEmpty() { return this.jexl.isEmpty(); }
+
+ public Boolean put(JexlVCMatchExp jexlVCMatchExp, Boolean aBoolean) {
+ return jexl.put(jexlVCMatchExp, aBoolean);
+ }
+
+ public void putAll(Map<? extends JexlVCMatchExp, ? extends Boolean> map) {
+ jexl.putAll(map);
+ }
+
+ /**
+ * Initializes all keys with null values indicating that they have not yet been evaluated.
+ * The actual value will be computed only when the key is requested via {@link #get(Object)} or {@link #values()}.
*/
- private void evaluateExpression(JexlVCMatchExp exp) {
+ private void initialize(Collection<JexlVCMatchExp> jexlCollection) {
+ jexl = new HashMap<>();
+ for (final JexlVCMatchExp exp: jexlCollection) {
+ jexl.put(exp, null);
+ }
+ }
+
+ /**
+ * Evaluates a {@link JexlVCMatchExp}'s expression, given the current context (and setup the context if it's {@code null}).
+ *
+ * @param exp the {@link JexlVCMatchExp} to evaluate
+ *
+ * @throws IllegalArgumentException when {@code exp} is {@code null}, or
+ * when the Jexl expression in {@code exp} fails to evaluate the JexlContext
+ * constructed with the input VC or genotype.
+ */
+ private void evaluateExpression(final JexlVCMatchExp exp) {
// if the context is null, we need to create it to evaluate the JEXL expression
- if (this.jContext == null) createContext();
+ if (this.jContext == null) {
+ createContext();
+ }
+
try {
final Boolean value = (Boolean) exp.exp.evaluate(jContext);
// treat errors as no match
jexl.put(exp, value == null ? false : value);
- } catch (Exception e) {
+ } catch (final JexlException.Variable e) {
// if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
- // todo - might be safer if we explicitly checked for an exception type, but Apache's API doesn't seem to have that ability
- if (e.getMessage() != null && e.getMessage().contains("undefined variable"))
- jexl.put(exp,false);
- else
- throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, (e.getMessage() == null ? "no message" : e.getMessage())));
+ jexl.put(exp,false);
+ } catch (final JexlException e) {
+ // todo - might be better if no exception is caught here but let's user decide how to deal with them; note this will propagate to get() and values()
+ throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s", exp.name), e);
}
}
/**
- * helper function: adds the list of attributes to the information map we're building
- * @param infoMap the map
- * @param attributes the attributes
+ * Create the internal JexlContext, only when required.
+ * This code is where new JEXL context variables should get added.
*/
- private static void addAttributesToMap(Map<String, Object> infoMap, Map<String, ?> attributes ) {
- for (Entry<String, ?> e : attributes.entrySet()) {
- infoMap.put(e.getKey(), String.valueOf(e.getValue()));
+ private void createContext() {
+ if (vc == null) {
+ jContext = new MapContext(Collections.emptyMap());
+ } else if (g == null) {
+ jContext = new VariantJEXLContext(vc);
+ } else {
+ jContext = new GenotypeJEXLContext(vc, g);
}
}
- public Boolean put(JexlVCMatchExp jexlVCMatchExp, Boolean aBoolean) {
- return jexl.put(jexlVCMatchExp,aBoolean);
- }
-
- public void putAll(Map<? extends JexlVCMatchExp, ? extends Boolean> map) {
- jexl.putAll(map);
- }
-
// //////////////////////////////////////////////////////////////////////////////////////
- // The Following are unsupported at the moment
+ // The Following are unsupported at the moment (date: 2016/08/18)
// //////////////////////////////////////////////////////////////////////////////////////
// this doesn't make much sense to implement, boolean doesn't offer too much variety to deal
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java
index ac4c43c..96eaa64 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantContextUtils.java
@@ -202,7 +202,7 @@ public class VariantContextUtils {
}
/**
- * A simple but common wrapper for matching VariantContext objects using JEXL expressions
+ * A simple but common wrapper for matching {@link VariantContext} objects using JEXL expressions
*/
public static class JexlVCMatchExp {
public String name;
@@ -212,8 +212,12 @@ public class VariantContextUtils {
* Create a new matcher expression with name and JEXL expression exp
* @param name name
* @param exp expression
+ * @throws IllegalArgumentException if either argument is {@code null}
*/
public JexlVCMatchExp(String name, Expression exp) {
+ if (name == null) { throw new IllegalArgumentException("Cannot create JexlVCMatchExp with null name."); }
+ if (exp == null) { throw new IllegalArgumentException("Cannot create JexlVCMatchExp with null expression."); }
+
this.name = name;
this.exp = exp;
}
@@ -258,7 +262,6 @@ public class VariantContextUtils {
return initializeMatchExps(names.toArray(nameArray), exps.toArray(expArray));
}
-
/**
* Method for creating JexlVCMatchExp from input walker arguments mapping from names to exps. These two arrays contain
* the name associated with each JEXL expression. initializeMatchExps will parse each expression and return
@@ -288,51 +291,52 @@ public class VariantContextUtils {
}
/**
- * Returns true if exp match VC. See {@link #match(VariantContext, Collection)} for full docs.
+ * Returns true if {@code exp} match {@code vc}.
+ * See {@link #match(VariantContext, Collection)} for full docs.
* @param vc variant context
* @param exp expression
- * @return true if there is a match
+ * @return true if there is a match
*/
public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
return match(vc, Collections.singletonList(exp)).get(exp);
}
/**
- * Matches each JexlVCMatchExp exp against the data contained in vc, and returns a map from these
- * expressions to true (if they matched) or false (if they didn't). This the best way to apply JEXL
- * expressions to VariantContext records. Use initializeMatchExps() to create the list of JexlVCMatchExp
- * expressions.
+ * Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc},
+ * and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't).
+ * This the best way to apply JEXL expressions to {@link VariantContext} records.
+ * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
- * @param vc variant context
- * @param exps expressions
- * @return true if there is a match
+ * @param vc variant context
+ * @param exps expressions
+ * @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(exps,vc);
-
}
/**
- * Returns true if exp match VC/g. See {@link #match(VariantContext, Collection)} for full docs.
- * @param vc variant context
- * @param g genotype
+ * Returns true if {@code exp} match {@code vc}, {@code g}.
+ * See {@link #match(VariantContext, Genotype, Collection)} for full docs.
+ * @param vc variant context
+ * @param g genotype
* @param exp expression
- * @return true if there is a match
+ * @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
return match(vc,g, Collections.singletonList(exp)).get(exp);
}
/**
- * Matches each JexlVCMatchExp exp against the data contained in vc/g, and returns a map from these
- * expressions to true (if they matched) or false (if they didn't). This the best way to apply JEXL
- * expressions to VariantContext records/genotypes. Use initializeMatchExps() to create the list of JexlVCMatchExp
- * expressions.
+ * Matches each {@link JexlVCMatchExp} exp against the data contained in {@code vc}, {@code g},
+ * and returns a map from these expressions to {@code true} (if they matched) or {@code false} (if they didn't).
+ * This the best way to apply JEXL expressions to {@link VariantContext} records.
+ * Use the various {@code initializeMatchExps()}'s to create the list of {@link JexlVCMatchExp} expressions.
*
- * @param vc variant context
- * @param g genotype
- * @param exps expressions
- * @return true if there is a match
+ * @param vc variant context
+ * @param g genotype
+ * @param exps expressions
+ * @return true if there is a match
*/
public static Map<JexlVCMatchExp, Boolean> match(VariantContext vc, Genotype g, Collection<JexlVCMatchExp> exps) {
return new JEXLMap(exps,vc,g);
@@ -361,7 +365,6 @@ public class VariantContextUtils {
* @throws IllegalArgumentException if vc is monomorphic, not a SNP or not bi-allelic.
*/
-
static public boolean isTransition(final VariantContext vc) throws IllegalArgumentException {
final byte refAllele = vc.getReference().getBases()[0];
final Collection<Allele> altAlleles = vc.getAlternateAlleles();
@@ -386,7 +389,6 @@ public class VariantContextUtils {
|| (refAllele == 'T' && altAllele == 'C');
}
-
/**
* Returns a newly allocated VC that is the same as VC, but without genotypes
* @param vc variant context
diff --git a/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java b/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
index ee23229..493499e 100644
--- a/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
+++ b/src/main/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
@@ -48,6 +48,9 @@ class VariantJEXLContext implements JexlContext {
// our stored variant context
private VariantContext vc;
+ static final String true_string = "1";
+ static final String false_string = "0";
+
private interface AttributeGetter {
public Object get(VariantContext vc);
}
@@ -62,7 +65,7 @@ class VariantJEXLContext implements JexlContext {
attributes.put("QUAL", (VariantContext vc) -> -10 * vc.getLog10PError());
attributes.put("ALLELES", VariantContext::getAlleles);
attributes.put("N_ALLELES", VariantContext::getNAlleles);
- attributes.put("FILTER", (VariantContext vc) -> vc.isFiltered() ? "1" : "0");
+ attributes.put("FILTER", (VariantContext vc) -> vc.isFiltered() ? true_string : false_string);
attributes.put("homRefCount", VariantContext::getHomRefCount);
attributes.put("hetCount", VariantContext::getHetCount);
@@ -80,11 +83,9 @@ class VariantJEXLContext implements JexlContext {
} else if ( vc.hasAttribute(name)) {
result = vc.getAttribute(name);
} else if ( vc.getFilters().contains(name) ) {
- result = "1";
+ result = true_string;
}
- //System.out.printf("dynamic lookup %s => %s%n", name, result);
-
return result;
}
@@ -92,11 +93,10 @@ class VariantJEXLContext implements JexlContext {
return get(name) != null;
}
+ /**
+ * @throws UnsupportedOperationException
+ */
public void set(String name, Object value) {
throw new UnsupportedOperationException("remove() not supported on a VariantJEXLContext");
}
}
-
-
-
-
diff --git a/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java b/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java
index 5f07153..6a77f6b 100644
--- a/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java
+++ b/src/main/java/htsjdk/variant/variantcontext/writer/IndexingVariantContextWriter.java
@@ -26,14 +26,13 @@
package htsjdk.variant.variantcontext.writer;
import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.LocationAware;
import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.tribble.index.DynamicIndexCreator;
import htsjdk.tribble.index.Index;
import htsjdk.tribble.index.IndexCreator;
import htsjdk.tribble.index.IndexFactory;
-import htsjdk.tribble.index.TribbleIndexCreator;
+import htsjdk.samtools.util.PositionalOutputStream;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
@@ -139,9 +138,7 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
// close the index stream (keep it separate to help debugging efforts)
if (indexer != null) {
- if (indexer instanceof TribbleIndexCreator) {
- setIndexSequenceDictionary((TribbleIndexCreator)indexer, refDict);
- }
+ indexer.setIndexSequenceDictionary(refDict);
final Index index = indexer.finalizeIndex(locationSource.getPosition());
index.writeBasedOnFeatureFile(location);
}
@@ -180,51 +177,4 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
protected static final String writerName(final File location, final OutputStream stream) {
return location == null ? stream.toString() : location.getAbsolutePath();
}
-
- // a constant we use for marking sequence dictionary entries in the Tribble index property list
- private static final String SequenceDictionaryPropertyPredicate = "DICT:";
-
- private static void setIndexSequenceDictionary(final TribbleIndexCreator indexCreator, final SAMSequenceDictionary dict) {
- for (final SAMSequenceRecord seq : dict.getSequences()) {
- final String contig = SequenceDictionaryPropertyPredicate + seq.getSequenceName();
- final String length = String.valueOf(seq.getSequenceLength());
- indexCreator.addProperty(contig,length);
- }
- }
-}
-
-/**
- * Wraps output stream in a manner which keeps track of the position within the file and allowing writes
- * at arbitrary points
- */
-final class PositionalOutputStream extends OutputStream implements LocationAware
-{
- private final OutputStream out;
- private long position = 0;
-
- public PositionalOutputStream(final OutputStream out) {
- this.out = out;
- }
-
- public final void write(final byte[] bytes) throws IOException {
- write(bytes, 0, bytes.length);
- }
-
- public final void write(final byte[] bytes, final int startIndex, final int numBytes) throws IOException {
- position += numBytes;
- out.write(bytes, startIndex, numBytes);
- }
-
- public final void write(final int c) throws IOException {
- position++;
- out.write(c);
- }
-
- public final long getPosition() { return position; }
-
- @Override
- public void close() throws IOException {
- super.close();
- out.close();
- }
}
diff --git a/src/test/java/htsjdk/samtools/BAMFileWriterTest.java b/src/test/java/htsjdk/samtools/BAMFileWriterTest.java
index 235f23b..a8944d0 100644
--- a/src/test/java/htsjdk/samtools/BAMFileWriterTest.java
+++ b/src/test/java/htsjdk/samtools/BAMFileWriterTest.java
@@ -50,7 +50,7 @@ public class BAMFileWriterTest {
* Parse some SAM text into a SAM object, then write as BAM. If SAM text was presorted, then the BAM file can
* be read and compared with the SAM object.
*
- * @param samRecordSetBuilder source of input SAMFileReader to be written and compared with
+ * @param samRecordSetBuilder source of input {@link SamReader} to be written and compared with
* @param sortOrder How the BAM should be written
* @param presorted If true, samText is in the order specified by sortOrder
*/
diff --git a/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java b/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java
index 8c610bd..3fcb3bd 100644
--- a/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java
+++ b/src/test/java/htsjdk/samtools/CRAMFileReaderTest.java
@@ -27,6 +27,7 @@ import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.Log;
+import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
@@ -42,13 +43,18 @@ import java.util.Arrays;
public class CRAMFileReaderTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools");
+ private static final File CRAM_WITH_CRAI = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ private static final File CRAM_WITHOUT_CRAI = new File(TEST_DATA_DIR, "cram_query_sorted.cram");
+ private static final ReferenceSource REFERENCE = createReferenceSource();
+ private static final File INDEX_FILE = new File(TEST_DATA_DIR, "cram_with_crai_index.cram.crai");
+
@BeforeClass
public void initClass() {
Log.setGlobalLogLevel(Log.LogLevel.ERROR);
}
- private ReferenceSource createReferenceSource() {
+ private static ReferenceSource createReferenceSource() {
byte[] refBases = new byte[10 * 10];
Arrays.fill(refBases, (byte) 'A');
InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
@@ -60,19 +66,17 @@ public class CRAMFileReaderTest {
@Test(description = "Test CRAMReader 1 reference required", expectedExceptions = IllegalStateException.class)
public void testCRAMReader1_ReferenceRequired() {
- File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
InputStream bis = null;
// assumes that reference_fasta property is not set and the download service is not enabled
- new CRAMFileReader(file, bis);
+ new CRAMFileReader(CRAM_WITH_CRAI, bis);
}
// constructor 2: CRAMFileReader(final File cramFile, final InputStream inputStream, final ReferenceSource referenceSource)
@Test(description = "Test CRAMReader 2 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader2ReferenceRequired() {
- File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
InputStream bis = null;
- new CRAMFileReader(file, bis, null);
+ new CRAMFileReader(CRAM_WITH_CRAI, bis, null);
}
@Test(description = "Test CRAMReader 2 input required", expectedExceptions = IllegalArgumentException.class)
@@ -82,31 +86,66 @@ public class CRAMFileReaderTest {
new CRAMFileReader(file, bis, createReferenceSource());
}
+ @Test
+ public void testCRAMReader2_ShouldAutomaticallyFindCRAMIndex() {
+ InputStream inputStream = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, inputStream, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find CRAM existing index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader2_WithoutCRAMIndex() {
+ InputStream inputStream = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, inputStream, REFERENCE);
+ reader.getIndex();
+ }
+
// constructor 3: CRAMFileReader(final File cramFile, final File indexFile, final ReferenceSource referenceSource)
@Test(description = "Test CRAMReader 3 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader3_RequiredReference() {
- File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
File indexFile = null;
ReferenceSource refSource = null;
- new CRAMFileReader(inputFile, indexFile, refSource);
+ new CRAMFileReader(CRAM_WITH_CRAI, indexFile, refSource);
}
@Test(description = "Test CRAMReader 3 input required", expectedExceptions = IllegalArgumentException.class)
- public void testCRAMReader3_InputRequirted() {
+ public void testCRAMReader3_InputRequired() {
File inputFile = null;
File indexFile = null;
ReferenceSource refSource = null;
new CRAMFileReader(inputFile, indexFile, refSource);
}
+ @Test
+ public void testCRAMReader3_ShouldAutomaticallyFindCRAMIndex() {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, indexFile, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test
+ public void testCRAMReader3_ShouldUseCRAMIndex() {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, INDEX_FILE, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader3_WithoutCRAMIndex() {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, indexFile, REFERENCE);
+ reader.getIndex();
+ }
+
// constructor 4: CRAMFileReader(final File cramFile, final ReferenceSource referenceSource)
@Test(description = "Test CRAMReader 4 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader4_ReferenceRequired() {
- File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
ReferenceSource refSource = null;
- new CRAMFileReader(inputFile, refSource);
+ new CRAMFileReader(CRAM_WITH_CRAI, refSource);
}
@Test(description = "Test CRAMReader 4 input required", expectedExceptions = IllegalArgumentException.class)
@@ -115,6 +154,19 @@ public class CRAMFileReaderTest {
new CRAMFileReader(inputFile, createReferenceSource());
}
+ @Test
+ public void testCRAMReader4_ShouldAutomaticallyFindCRAMIndex() {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, REFERENCE);
+ reader.getIndex();
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader4_WithoutCRAMIndex() {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, REFERENCE);
+ reader.getIndex();
+ }
+
// constructor 5: CRAMFileReader(final InputStream inputStream, final SeekableStream indexInputStream,
// final ReferenceSource referenceSource, final ValidationStringency validationStringency)
@Test(description = "Test CRAMReader 5 reference required", expectedExceptions = IllegalArgumentException.class)
@@ -146,7 +198,6 @@ public class CRAMFileReaderTest {
public void testCRAMReader6_InputRequired() throws IOException {
InputStream bis = null;
File file = null;
- ReferenceSource refSource = null;
new CRAMFileReader(bis, file, createReferenceSource(), ValidationStringency.STRICT);
}
@@ -154,10 +205,27 @@ public class CRAMFileReaderTest {
// final ValidationStringency validationStringency)
@Test(description = "Test CRAMReader 7 reference required", expectedExceptions = IllegalArgumentException.class)
public void testCRAMReader7_ReferenceRequired() throws IOException {
- InputStream bis = new ByteArrayInputStream(new byte[0]);
- File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
ReferenceSource refSource = null;
- new CRAMFileReader(file, file, refSource, ValidationStringency.STRICT);
+ new CRAMFileReader(CRAM_WITH_CRAI, CRAM_WITH_CRAI, refSource, ValidationStringency.STRICT);
}
+ @Test
+ public void testCRAMReader7_ShouldAutomaticallyFindCRAMIndex()throws IOException {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, indexFile, REFERENCE, ValidationStringency.STRICT);
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test
+ public void testCRAMReader7_ShouldUseCRAMIndex() throws IOException {
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITH_CRAI, INDEX_FILE, REFERENCE, ValidationStringency.STRICT);
+ Assert.assertTrue(reader.hasIndex(), "Can't find existing CRAM index.");
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testCRAMReader7_WithoutCRAMIndex() throws IOException {
+ File indexFile = null;
+ CRAMFileReader reader = new CRAMFileReader(CRAM_WITHOUT_CRAI, indexFile, REFERENCE, ValidationStringency.STRICT);
+ reader.getIndex();
+ }
}
diff --git a/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java b/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java
index 2d78a78..133062a 100644
--- a/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java
+++ b/src/test/java/htsjdk/samtools/SAMIntegerTagTest.java
@@ -98,6 +98,52 @@ public class SAMIntegerTagTest {
Assert.assertEquals(rec.getIntegerAttribute(INTEGER_TAG).intValue(), 1);
}
+
+ @DataProvider
+ public Object[][] formatsAndValues(){
+ return new Object[][]{
+ new Object[]{"sam","Hello World!"},
+ new Object[]{"bam","Hello World!"},
+ new Object[]{"cram","Hello World!"},
+ new Object[]{"cram",""},
+ new Object[]{"bam",""},
+ new Object[]{"sam",""},
+ };
+ }
+ /**
+ * Should be able to write empty and non-empty strings
+ */
+ @Test(dataProvider = "formatsAndValues")
+ public void testWriteAndReadStrings(final String format,final String value) throws Exception {
+ final SAMRecord rec = createSamRecord();
+ rec.setAttribute(STRING_TAG, value);
+ writeAndReadSamRecord(format, rec);
+ Assert.assertEquals(rec.getStringAttribute(STRING_TAG),value);
+ }
+
+
+ @DataProvider
+ public Object[][] formatsAndValues2(){
+ return new Object[][]{
+ new Object[]{"sam",'a'},
+ new Object[]{"bam",'a'},
+ new Object[]{"cram",'a'},
+ new Object[]{"cram",null},
+ new Object[]{"bam",null},
+ new Object[]{"sam",null},
+ };
+ }
+ /**
+ * Should be able to write empty and non-empty strings
+ */
+ @Test(dataProvider = "formatsAndValues2")
+ public void testWriteAndReadCharacters(final String format,final Character value) throws Exception {
+ final SAMRecord rec = createSamRecord();
+ rec.setAttribute(STRING_TAG, value);
+ writeAndReadSamRecord(format, rec);
+ Assert.assertEquals(rec.getCharacterAttribute(STRING_TAG),value);
+ }
+
/**
* Should be an exception if a typed attribute call is made for the wrong type.
*/
diff --git a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java
index a8f06e4..951ecee 100644
--- a/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java
+++ b/src/test/java/htsjdk/samtools/SAMRecordUnitTest.java
@@ -417,6 +417,19 @@ public class SAMRecordUnitTest {
Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(Integer.MIN_VALUE - 1L)));
}
+ @Test()
+ public void test_setAttribute_empty_string() {
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMRecord record = new SAMRecord(header);
+ Assert.assertNull(record.getStringAttribute(SAMTag.MD.name()));
+ record.setAttribute(SAMTag.MD.name(), "");
+ Assert.assertNotNull(record.getStringAttribute(SAMTag.MD.name()));
+ Assert.assertEquals(record.getStringAttribute(SAMTag.MD.name()),"");
+ record.setAttribute(SAMTag.MD.name(), null);
+ Assert.assertNull(record.getStringAttribute(SAMTag.MD.name()));
+ }
+
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void test_setAttribute_unsigned_int_negative() {
SAMFileHeader header = new SAMFileHeader();
@@ -960,4 +973,69 @@ public class SAMRecordUnitTest {
SAMRecord.resolveNameFromIndex(1, null);
}
+ @Test
+ public void testReverseComplement() {
+ final SAMRecord rec = createTestSamRec();
+
+ rec.reverseComplement(Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"), false);
+ Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT");
+ Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH");
+ Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
+ Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
+ Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT");
+ }
+
+ /**
+ * Note that since strings are immutable the Y1 attribute, which is a String, is not reversed in the original even
+ * if an in-place reverse complement occurred. The bases and qualities are byte[] so they are reversed if in-place
+ * is true.
+ */
+ @DataProvider
+ public Object [][] reverseComplementData() {
+ return new Object[][]{
+ {false, "ACACACACAC", "HHHHHIIIII", "AAAAGAAAAC", new byte[] {1,2,3,4,5}, new short[] {1,2,3,4,5}, new int[] {1,2,3,4,5}, new float[] {1,2,3,4,5}},
+ {true, "GTGTGTGTGT", "IIIIIHHHHH", "AAAAGAAAAC", new byte[] {5,4,3,2,1}, new short[] {5,4,3,2,1}, new int[] {5,4,3,2,1}, new float[] {5,4,3,2,1}},
+ };
+ }
+
+ @Test(dataProvider = "reverseComplementData")
+ public void testSafeReverseComplement(boolean inplace, String bases, String quals, String y1, byte[] x1, short[] x2, int[] x3, float[] x4) throws CloneNotSupportedException {
+ final SAMRecord original = createTestSamRec();
+ final SAMRecord cloneOfOriginal = (SAMRecord) original.clone();
+ //Runs a copy (rather than in-place) reverseComplement
+ cloneOfOriginal.reverseComplement(Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"), inplace);
+
+ Assert.assertEquals(original.getReadString(), bases);
+ Assert.assertEquals(original.getBaseQualityString(), quals);
+ Assert.assertEquals(original.getByteArrayAttribute("X1"), x1);
+ Assert.assertEquals(original.getSignedShortArrayAttribute("X2"), x2);
+ Assert.assertEquals(original.getSignedIntArrayAttribute("X3"), x3);
+ Assert.assertEquals(original.getFloatArrayAttribute("X4"), x4);
+ Assert.assertEquals(original.getStringAttribute("Y1"), y1);
+
+ Assert.assertEquals(cloneOfOriginal.getReadString(), "GTGTGTGTGT");
+ Assert.assertEquals(cloneOfOriginal.getBaseQualityString(), "IIIIIHHHHH");
+ Assert.assertEquals(cloneOfOriginal.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
+ Assert.assertEquals(cloneOfOriginal.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
+ Assert.assertEquals(cloneOfOriginal.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
+ Assert.assertEquals(cloneOfOriginal.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
+ Assert.assertEquals(cloneOfOriginal.getStringAttribute("Y1"), "GTTTTCTTTT");
+
+ }
+
+ public SAMRecord createTestSamRec() {
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMRecord rec = new SAMRecord(header);
+ rec.setReadString("ACACACACAC");
+ rec.setBaseQualityString("HHHHHIIIII");
+ rec.setAttribute("X1", new byte[] {1,2,3,4,5});
+ rec.setAttribute("X2", new short[] {1,2,3,4,5});
+ rec.setAttribute("X3", new int[] {1,2,3,4,5});
+ rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f});
+ rec.setAttribute("Y1", "AAAAGAAAAC");
+
+ return(rec);
+ }
}
diff --git a/src/test/java/htsjdk/samtools/SAMRecordUtilTest.java b/src/test/java/htsjdk/samtools/SAMRecordUtilTest.java
deleted file mode 100644
index eb3712f..0000000
--- a/src/test/java/htsjdk/samtools/SAMRecordUtilTest.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package htsjdk.samtools;
-
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import java.util.Arrays;
-
-public class SAMRecordUtilTest {
- @Test public void testReverseComplement() {
- final SAMFileHeader header = new SAMFileHeader();
- final SAMRecord rec = new SAMRecord(header);
- rec.setReadString("ACACACACAC");
- rec.setBaseQualityString("HHHHHIIIII");
- rec.setAttribute("X1", new byte[] {1,2,3,4,5});
- rec.setAttribute("X2", new short[] {1,2,3,4,5});
- rec.setAttribute("X3", new int[] {1,2,3,4,5});
- rec.setAttribute("X4", new float[] {1.0f,2.0f,3.0f,4.0f,5.0f});
- rec.setAttribute("Y1", "AAAAGAAAAC");
-
- SAMRecordUtil.reverseComplement(rec, Arrays.asList("Y1"), Arrays.asList("X1", "X2", "X3", "X4", "X5"));
- Assert.assertEquals(rec.getReadString(), "GTGTGTGTGT");
- Assert.assertEquals(rec.getBaseQualityString(), "IIIIIHHHHH");
- Assert.assertEquals(rec.getByteArrayAttribute("X1"), new byte[] {5,4,3,2,1});
- Assert.assertEquals(rec.getSignedShortArrayAttribute("X2"), new short[] {5,4,3,2,1});
- Assert.assertEquals(rec.getSignedIntArrayAttribute("X3"), new int[] {5,4,3,2,1});
- Assert.assertEquals(rec.getFloatArrayAttribute("X4"), new float[] {5.0f,4.0f,3.0f,2.0f,1.0f});
- Assert.assertEquals(rec.getStringAttribute("Y1"), "GTTTTCTTTT");
- }
-}
diff --git a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java
index 89e1b35..0b1a507 100644
--- a/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java
+++ b/src/test/java/htsjdk/samtools/SAMSequenceDictionaryTest.java
@@ -27,11 +27,15 @@
package htsjdk.samtools;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.StringReader;
import java.io.StringWriter;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
@@ -89,4 +93,55 @@ public class SAMSequenceDictionaryTest {
Assert.assertEquals(dict1, dict2);
}
+ @DataProvider(name="testMergeDictionariesData")
+ public Object[][] testMergeDictionariesData(){
+
+ final SAMSequenceRecord rec1, rec2, rec3, rec4, rec5;
+ rec1 = new SAMSequenceRecord("chr1", 100);
+ rec2 = new SAMSequenceRecord("chr1", 101);
+ rec2.setMd5("dummy");
+ rec3 = new SAMSequenceRecord("chr1", SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH);
+ rec3.setMd5("dummy2");
+
+ rec4 = new SAMSequenceRecord("chr1", 100);
+ rec4.setAttribute(SAMSequenceRecord.URI_TAG,"file://some/file/name.ok");
+
+ rec5 = new SAMSequenceRecord("chr2", 200);
+ rec4.setAttribute(SAMSequenceRecord.URI_TAG,"file://some/file/name.ok");
+
+ return new Object[][]{
+ new Object[]{rec1, rec1, true},
+ new Object[]{rec2, rec2, true},
+ new Object[]{rec3, rec3, true},
+ new Object[]{rec4, rec4, true},
+ new Object[]{rec1, rec2, false},//since 100 != 101 in Length
+ new Object[]{rec1, rec3, true},
+ new Object[]{rec1, rec4, true},
+ new Object[]{rec2, rec3, false}, // since MD5 is not equal
+ new Object[]{rec2, rec4, false}, //length differs
+ new Object[]{rec3, rec4, true},
+ new Object[]{rec4, rec5, false}, // different name
+ };
+ }
+
+ @Test(dataProvider = "testMergeDictionariesData", expectedExceptions = IllegalArgumentException.class)
+ public void testMergeDictionaries(final SAMSequenceRecord rec1, final SAMSequenceRecord rec2, boolean canMerge) throws Exception {
+ final SAMSequenceDictionary dict1 = new SAMSequenceDictionary(Collections.singletonList(rec1));
+ final SAMSequenceDictionary dict2 = new SAMSequenceDictionary(Collections.singletonList(rec2));
+
+ try {
+ SAMSequenceDictionary.mergeDictionaries(dict1, dict2, SAMSequenceDictionary.DEFAULT_DICTIONARY_EQUAL_TAG);
+ } catch (final IllegalArgumentException e) {
+ if (canMerge) {
+ throw new Exception("Expected to be able to merge dictionaries, but wasn't:" , e);
+ } else {
+ throw e;
+ }
+ }
+ if (canMerge){
+ throw new IllegalArgumentException("Expected to be able to merge dictionaries, and was indeed able to do so.");
+ } else {
+ throw new Exception("Expected to not be able to merge dictionaries, but was able");
+ }
+ }
}
diff --git a/src/test/java/htsjdk/samtools/SAMUtilsTest.java b/src/test/java/htsjdk/samtools/SAMUtilsTest.java
index 0fa6b4a..3be7e39 100644
--- a/src/test/java/htsjdk/samtools/SAMUtilsTest.java
+++ b/src/test/java/htsjdk/samtools/SAMUtilsTest.java
@@ -173,7 +173,7 @@ public class SAMUtilsTest {
record.setSecondOfPairFlag(true);
Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 10);
}
-
+
@Test
public void testOtherCanonicalAlignments() {
// setup the record
@@ -190,26 +190,26 @@ public class SAMUtilsTest {
record.setMateAlignmentStart(1);
record.setReadPairedFlag(true);
record.setSupplementaryAlignmentFlag(true);//spec says first 'SA' record will be the primary record
-
+
record.setMateReferenceIndex(0);
record.setMateAlignmentStart(100);
record.setInferredInsertSize(99);
-
+
record.setReadBases("AAAAAAAAAA".getBytes());
record.setBaseQualities("##########".getBytes());
// check no alignments if no SA tag */
Assert.assertEquals(SAMUtils.getOtherCanonicalAlignments(record).size(),0);
-
-
+
+
record.setAttribute(SAMTagUtil.getSingleton().SA,
- "2,500,+,3S2=1X2=2S,60,1;" +
- "1,191,-,8M2S,60,0;");
-
+ "2,500,+,3S2=1X2=2S,60,1;" +
+ "1,191,-,8M2S,60,*;");
+
// extract suppl alignments
final List<SAMRecord> suppl = SAMUtils.getOtherCanonicalAlignments(record);
Assert.assertNotNull(suppl);
Assert.assertEquals(suppl.size(), 2);
-
+
for(final SAMRecord other: suppl) {
Assert.assertFalse(other.getReadUnmappedFlag());
Assert.assertTrue(other.getReadPairedFlag());
@@ -223,7 +223,7 @@ public class SAMUtilsTest {
Assert.assertEquals(other.getBaseQualityString(),record.getBaseQualityString());
}
}
-
+
SAMRecord other = suppl.get(0);
Assert.assertFalse(other.getSupplementaryAlignmentFlag());//1st of suppl and 'record' is supplementary
Assert.assertEquals(other.getReferenceName(),"2");
@@ -234,18 +234,17 @@ public class SAMUtilsTest {
Assert.assertEquals(other.getCigarString(),"3S2=1X2=2S");
Assert.assertEquals(other.getInferredInsertSize(),0);
-
+
other = suppl.get(1);
Assert.assertTrue(other.getSupplementaryAlignmentFlag());
Assert.assertEquals(other.getReferenceName(),"1");
Assert.assertEquals(other.getAlignmentStart(),191);
Assert.assertTrue(other.getReadNegativeStrandFlag());
Assert.assertEquals(other.getMappingQuality(), 60);
- Assert.assertEquals(other.getAttribute(SAMTagUtil.getSingleton().NM),0);
+ Assert.assertEquals(other.getAttribute(SAMTagUtil.getSingleton().NM),null);
Assert.assertEquals(other.getCigarString(),"8M2S");
Assert.assertEquals(other.getInferredInsertSize(),-91);//100(mate) - 191(other)
-
}
-
+
}
diff --git a/src/test/java/htsjdk/samtools/SamReaderSortTest.java b/src/test/java/htsjdk/samtools/SamReaderSortTest.java
index cc496db..584410f 100755
--- a/src/test/java/htsjdk/samtools/SamReaderSortTest.java
+++ b/src/test/java/htsjdk/samtools/SamReaderSortTest.java
@@ -31,7 +31,7 @@ import org.testng.annotations.Test;
import java.io.File;
/**
- * Tests for the implementation of SAMRecordIterator in SAMFileReader
+ * Tests for the implementation of {@link SAMRecordIterator} in {@link SamReader}
*
* @author ktibbett at broadinstitute.org
*/
diff --git a/src/test/java/htsjdk/samtools/SAMFileReaderTest.java b/src/test/java/htsjdk/samtools/SamReaderTest.java
similarity index 99%
rename from src/test/java/htsjdk/samtools/SAMFileReaderTest.java
rename to src/test/java/htsjdk/samtools/SamReaderTest.java
index b291011..093dffb 100644
--- a/src/test/java/htsjdk/samtools/SAMFileReaderTest.java
+++ b/src/test/java/htsjdk/samtools/SamReaderTest.java
@@ -32,7 +32,7 @@ import org.testng.annotations.Test;
import java.io.File;
-public class SAMFileReaderTest {
+public class SamReaderTest {
private static final File TEST_DATA_DIR = new File("src/test/resources/htsjdk/samtools");
@Test(dataProvider = "variousFormatReaderTestCases")
diff --git a/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java b/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java
new file mode 100644
index 0000000..49de11d
--- /dev/null
+++ b/src/test/java/htsjdk/samtools/util/PositionalOutputStreamTest.java
@@ -0,0 +1,62 @@
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015 Daniel Gómez-Sánchez
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+package htsjdk.samtools.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.io.OutputStream;
+
+/**
+ * @author Daniel Gomez-Sanchez (magicDGS)
+ */
+public class PositionalOutputStreamTest {
+
+ @Test
+ public void basicPositionTest() throws Exception {
+ // wrapped null output stream to check
+ final PositionalOutputStream wrapped = new PositionalOutputStream(new OutputStream() {
+ @Override
+ public void write(int b) throws IOException {}
+ });
+ int position = 0;
+ // check that we start at position 0
+ Assert.assertEquals(wrapped.getPosition(), position);
+ // check that write one int just add one
+ wrapped.write(100);
+ Assert.assertEquals(wrapped.getPosition(), ++position);
+ // check that write a byte array adds its length
+ final byte[] bytes = new byte[]{1, 3, 5, 7};
+ wrapped.write(bytes);
+ position += bytes.length;
+ Assert.assertEquals(wrapped.getPosition(), position);
+ // check that write just some bytes from an array adds its length
+ wrapped.write(bytes, 2, 2);
+ position += 2;
+ Assert.assertEquals(wrapped.getPosition(), position);
+ }
+
+}
\ No newline at end of file
diff --git a/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java b/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java
index 4fa56d5..e4e9ef9 100644
--- a/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java
+++ b/src/test/java/htsjdk/samtools/util/RelativeIso8601DateTest.java
@@ -10,13 +10,17 @@ import java.util.List;
/** @author mccowan */
public class RelativeIso8601DateTest {
+
+ // 1 second resolution is ISO date
+ private final static double DELTA_FOR_TIME = 1000;
+
@Test
public void testLazyInstance() {
final RelativeIso8601Date lazy = RelativeIso8601Date.generateLazyNowInstance();
Assert.assertEquals(lazy.toString(), RelativeIso8601Date.LAZY_NOW_LABEL);
Assert.assertEquals(lazy.toString(), RelativeIso8601Date.LAZY_NOW_LABEL);
Assert.assertEquals(lazy.toString(), RelativeIso8601Date.LAZY_NOW_LABEL);
- Assert.assertEquals(lazy.getTime(), new Iso8601Date(new Date(System.currentTimeMillis())).getTime(), 1000); // 1 second resolution is ISO date
+ Assert.assertEquals(lazy.getTime(), new Iso8601Date(new Date(System.currentTimeMillis())).getTime(), DELTA_FOR_TIME);
// Assert no exception thrown; this should be valid, because toString should now return an iso-looking date.
new Iso8601Date(lazy.toString());
}
@@ -33,7 +37,7 @@ public class RelativeIso8601DateTest {
for (final RelativeIso8601Date nonLazy : testDates) {
Assert.assertFalse(nonLazy.toString().equals(RelativeIso8601Date.LAZY_NOW_LABEL));
- Assert.assertEquals((double) nonLazy.getTime(), (double) time);
+ Assert.assertEquals((double) nonLazy.getTime(), (double) time, DELTA_FOR_TIME);
// Assert no exception thrown; this should be valid, because toString return an iso-looking date.
new RelativeIso8601Date(nonLazy.toString());
}
@@ -44,6 +48,6 @@ public class RelativeIso8601DateTest {
final String s = new Iso8601Date(new Date(12345)).toString();
final Iso8601Date iso8601Date = new Iso8601Date(s);
final RelativeIso8601Date relativeIso8601Date = new RelativeIso8601Date(s);
- Assert.assertEquals(relativeIso8601Date.getTime(), iso8601Date.getTime());
+ Assert.assertEquals(relativeIso8601Date.getTime(), iso8601Date.getTime(), DELTA_FOR_TIME);
}
}
diff --git a/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java
index c5c797e..008cca5 100644
--- a/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java
+++ b/src/test/java/htsjdk/samtools/util/SequenceUtilTest.java
@@ -23,16 +23,15 @@
*/
package htsjdk.samtools.util;
-import htsjdk.samtools.Cigar;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMTag;
-import htsjdk.samtools.SAMTextHeaderCodec;
-import htsjdk.samtools.TextCigarCodec;
+import htsjdk.samtools.*;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.File;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
@@ -429,4 +428,26 @@ public class SequenceUtilTest {
{"Name/1/2", "Name"}
};
}
+
+ @Test
+ public void testCalculateNmTag() {
+ final File TEST_DIR = new File("src/test/resources/htsjdk/samtools/SequenceUtil");
+ final File referenceFile = new File(TEST_DIR, "reference_with_lower_and_uppercase.fasta");
+ final File samFile = new File(TEST_DIR, "upper_and_lowercase_read.sam");
+
+ SamReader reader = SamReaderFactory.makeDefault().open(samFile);
+ ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceFile);
+
+ reader.iterator().stream().forEach(r -> {
+ Integer nm = SequenceUtil.calculateSamNmTag(r, ref.getSequence(r.getContig()).getBases());
+ String md = r.getStringAttribute(SAMTag.MD.name());
+ Assert.assertEquals(r.getIntegerAttribute(SAMTag.NM.name()), nm, "problem with NM in read \'" + r.getReadName() + "\':");
+ SequenceUtil.calculateMdAndNmTags(r, ref.getSequence(r.getContig()).getBases(), true, true);
+
+ Assert.assertEquals(r.getIntegerAttribute(SAMTag.NM.name()), nm, "problem with NM in read \'" + r.getReadName() + "\':");
+ if (md != null) {
+ Assert.assertEquals(r.getStringAttribute(SAMTag.MD.name()), md, "problem with MD in read \'" + r.getReadName() + "\':");
+ }
+ });
+ }
}
diff --git a/src/test/java/htsjdk/samtools/util/StringUtilTest.java b/src/test/java/htsjdk/samtools/util/StringUtilTest.java
index 91e8792..dbb2a07 100644
--- a/src/test/java/htsjdk/samtools/util/StringUtilTest.java
+++ b/src/test/java/htsjdk/samtools/util/StringUtilTest.java
@@ -67,4 +67,56 @@ public class StringUtilTest {
{"A:BB:C:", new String[]{"A", "BB", "C:"}, true},
};
}
+
+ @DataProvider(name="withinHammingDistanceProvider")
+ public Object[][] isWithinHammingDistanceProvider() {
+ return new Object[][] {
+ {"ATAC", "GCAT", 3, true},
+ {"ATAC", "GCAT", 2, false},
+ {"ATAC", "GCAT", 1, false},
+ {"ATAC", "GCAT", 0, false}
+ };
+ }
+
+ @Test(dataProvider = "withinHammingDistanceProvider")
+ public void testIsWithinHammingDistance(final String s1, final String s2, final int maxHammingDistance, final boolean expectedResult) {
+ Assert.assertEquals(StringUtil.isWithinHammingDistance(s1, s2, maxHammingDistance), expectedResult);
+ }
+
+ @DataProvider(name="withinHammingDistanceExceptionProvider")
+ public Object[][] isWithinHammingDistanceException() {
+ return new Object[][] {
+ {"ATAC", "GCT" , 3},
+ {"ATAC", "AT" , 2},
+ {"ATAC", "T" , 1},
+ {"" , "GCAT", 0}
+ };
+ }
+
+ @Test(dataProvider = "withinHammingDistanceExceptionProvider", expectedExceptions = IllegalArgumentException.class)
+ public void testIsWithinHammingDistanceExceptions(final String s1, final String s2, final int maxHammingDistance) {
+ StringUtil.isWithinHammingDistance(s1, s2, maxHammingDistance);
+ }
+
+ @Test(dataProvider = "withinHammingDistanceExceptionProvider", expectedExceptions = IllegalArgumentException.class)
+ public void testHammingDistanceExceptions(final String s1, final String s2, final int maxHammingDistance) {
+ StringUtil.hammingDistance(s1, s2);
+ }
+
+ @DataProvider(name="hammingDistanceProvider")
+ public Object[][] hammingDistance() {
+ return new Object[][] {
+ {"ATAC" , "GCAT" , 3},
+ {"ATAGC", "ATAGC", 0},
+ {"ATAC" , "atac" , 4}, // Hamming distance is case sensitive.
+ {"" , "" , 0}, // Two empty strings should have Hamming distance of 0.
+ {"nAGTN", "nAGTN", 0} // Ensure that matching Ns are not counted as mismatches.
+ };
+ }
+
+ @Test(dataProvider = "hammingDistanceProvider")
+ public void testHammingDistance(final String s1, final String s2, final int expectedResult) {
+ Assert.assertEquals(StringUtil.hammingDistance(s1, s2), expectedResult);
+ }
+
}
diff --git a/src/test/java/htsjdk/tribble/FeatureReaderTest.java b/src/test/java/htsjdk/tribble/FeatureReaderTest.java
index ac34059..d62693c 100644
--- a/src/test/java/htsjdk/tribble/FeatureReaderTest.java
+++ b/src/test/java/htsjdk/tribble/FeatureReaderTest.java
@@ -134,7 +134,7 @@ public class FeatureReaderTest {
idxFile.delete();
}
final Index idx = IndexFactory.createIndex(featureFile, codec, indexType);
- IndexFactory.writeIndex(idx, idxFile);
+ idx.write(idxFile);
idxFile.deleteOnExit();
} // else let's just hope the index exists, and if so use it
diff --git a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java
index c7b2193..474a8a8 100644
--- a/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java
+++ b/src/test/java/htsjdk/tribble/bed/BEDCodecTest.java
@@ -226,4 +226,15 @@ public class BEDCodecTest {
public void testGetTabixFormat() {
Assert.assertEquals(new BEDCodec().getTabixFormat(), TabixFormat.BED);
}
+
+ @Test
+ public void testCanDecode() {
+ final BEDCodec codec = new BEDCodec();
+ final String pattern = "filename.%s%s";
+ for(final String bcExt: AbstractFeatureReader.BLOCK_COMPRESSED_EXTENSIONS) {
+ Assert.assertTrue(codec.canDecode(String.format(pattern, "bed", bcExt)));
+ Assert.assertFalse(codec.canDecode(String.format(pattern, "vcf", bcExt)));
+ Assert.assertFalse(codec.canDecode(String.format(pattern, "bed.gzip", bcExt)));
+ }
+ }
}
diff --git a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
index ba64998..016049f 100644
--- a/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
+++ b/src/test/java/htsjdk/tribble/index/IndexFactoryTest.java
@@ -25,11 +25,14 @@ package htsjdk.tribble.index;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.util.IOUtil;
import htsjdk.tribble.TestUtils;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.bed.BEDCodec;
+import htsjdk.tribble.index.linear.LinearIndex;
import htsjdk.tribble.index.tabix.TabixFormat;
import htsjdk.tribble.index.tabix.TabixIndex;
+import htsjdk.tribble.util.LittleEndianOutputStream;
import htsjdk.variant.vcf.VCFCodec;
import htsjdk.variant.vcf.VCFFileReader;
import org.testng.Assert;
@@ -37,6 +40,8 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
import java.util.List;
/**
diff --git a/src/test/java/htsjdk/tribble/index/IndexTest.java b/src/test/java/htsjdk/tribble/index/IndexTest.java
index 8104a08..aa179a9 100644
--- a/src/test/java/htsjdk/tribble/index/IndexTest.java
+++ b/src/test/java/htsjdk/tribble/index/IndexTest.java
@@ -1,13 +1,23 @@
package htsjdk.tribble.index;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.tribble.FeatureCodec;
import htsjdk.tribble.TestUtils;
+import htsjdk.tribble.Tribble;
+import htsjdk.tribble.bed.BEDCodec;
import htsjdk.tribble.index.linear.LinearIndex;
+import htsjdk.tribble.index.tabix.TabixFormat;
+import htsjdk.tribble.index.tabix.TabixIndex;
+import htsjdk.tribble.util.LittleEndianOutputStream;
+import htsjdk.tribble.util.TabixUtils;
+import htsjdk.variant.vcf.VCFCodec;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
@@ -47,4 +57,41 @@ public class IndexTest {
Assert.assertTrue(allSize >= Math.max(leftSize,rightSize), "Expected size of joint query " + allSize + " to be at least >= max of left " + leftSize + " and right queries " + rightSize);
}
+
+
+ @DataProvider(name = "writeIndexData")
+ public Object[][] writeIndexData() {
+ return new Object[][]{
+ {new File("src/test/resources/htsjdk/tribble/tabix/testTabixIndex.vcf"), IndexFactory.IndexType.LINEAR, new VCFCodec()},
+ {new File("src/test/resources/htsjdk/tribble/tabix/testTabixIndex.vcf.gz"), IndexFactory.IndexType.TABIX, new VCFCodec()},
+ {new File("src/test/resources/htsjdk/tribble/test.bed"), IndexFactory.IndexType.LINEAR, new BEDCodec()}
+ };
+ }
+
+ private final static OutputStream nullOutputStrem = new OutputStream() {
+ @Override
+ public void write(int b) throws IOException { }
+ };
+
+ @Test(dataProvider = "writeIndexData")
+ public void testWriteIndex(final File inputFile, final IndexFactory.IndexType type, final FeatureCodec codec) throws Exception {
+ // temp index file for this test
+ final File tempIndex = File.createTempFile("index", (type == IndexFactory.IndexType.TABIX) ? TabixUtils.STANDARD_INDEX_EXTENSION : Tribble.STANDARD_INDEX_EXTENSION);
+ tempIndex.delete();
+ tempIndex.deleteOnExit();
+ // create the index
+ final Index index = IndexFactory.createIndex(inputFile, codec, type);
+ Assert.assertFalse(tempIndex.exists());
+ // write the index to a file
+ index.write(tempIndex);
+ Assert.assertTrue(tempIndex.exists());
+ // load the generated index
+ final Index loadedIndex = IndexFactory.loadIndex(tempIndex.getAbsolutePath());
+ // tess that the sequences and properties are the same
+ Assert.assertEquals(loadedIndex.getSequenceNames(), index.getSequenceNames());
+ Assert.assertEquals(loadedIndex.getProperties(), index.getProperties());
+ // test that write to a stream does not blows ip
+ index.write(new LittleEndianOutputStream(nullOutputStrem));
+ }
+
}
diff --git a/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java b/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java
index 557a398..6981b87 100644
--- a/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java
+++ b/src/test/java/htsjdk/tribble/index/tabix/TabixIndexTest.java
@@ -37,6 +37,7 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
+import java.io.IOException;
import java.util.Iterator;
public class TabixIndexTest {
@@ -71,7 +72,7 @@ public class TabixIndexTest {
}
@Test
- public void testQueryProvidedItemsAmount() {
+ public void testQueryProvidedItemsAmount() throws IOException {
final String VCF = "src/test/resources/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf";
// Note that we store only compressed files
final File plainTextVcfInputFile = new File(VCF);
diff --git a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
index bd00b75..bebd393 100644
--- a/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
+++ b/src/test/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
@@ -25,16 +25,18 @@
package htsjdk.variant.variantcontext;
-import htsjdk.samtools.util.Log;
+import htsjdk.tribble.SimpleFeature;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.VariantContextUtils.JexlVCMatchExp;
+import htsjdk.variant.vcf.VCFConstants;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import java.util.Arrays;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -43,178 +45,230 @@ import java.util.Map;
*
* @author aaron
* @author bimber
- *
- * Class VariantJEXLContextUnitTest
+ * @author hyq
+ *
*
* Test out parts of the VariantJEXLContext and GenotypeJEXLContext
*/
public class VariantJEXLContextUnitTest extends VariantBaseTest {
- private static String expression = "QUAL > 500.0";
- private static VariantContextUtils.JexlVCMatchExp exp;
-
- Allele A, Aref, T, Tref;
-
- Allele ATC, ATCref;
- // A [ref] / T at 10
-
- // - / ATC [ref] from 20-23
-
- @BeforeClass
- public void beforeClass() {
- try {
- exp = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression(expression));
- } catch (Exception e) {
- Assert.fail("Unable to create expression" + e.getMessage());
- }
- }
-
- @BeforeMethod
- public void before() {
- A = Allele.create("A");
- Aref = Allele.create("A", true);
- T = Allele.create("T");
- Tref = Allele.create("T", true);
-
- ATC = Allele.create("ATC");
- ATCref = Allele.create("ATC", true);
- }
-
-
+ private static final VariantContextUtils.JexlVCMatchExp exp
+ = new VariantContextUtils.JexlVCMatchExp("name", VariantContextUtils.engine.get().createExpression("QUAL > 500.0"));
+
+ // SNP alleles: A[ref]/T[alt] at chr1:10. One (crappy) sample, one (bare minimum) VC.
+ private static final SimpleFeature eventLoc = new SimpleFeature("chr1", 10, 10);
+ private static final Allele Aref = Allele.create("A", true);
+ private static final Allele Talt = Allele.create("T");
+ private static final Genotype gt = new GenotypeBuilder("DummySample", Arrays.asList(Aref, Talt))
+ .phased(false)
+ .DP(2)
+ .noGQ()
+ .noAD()
+ .noPL()
+ .filter("lowDP")
+ .attribute("WA", "whatEver")
+ .make();
+ private static final VariantContext vc = new VariantContextBuilder("test", eventLoc.getContig(), eventLoc.getStart(), eventLoc.getEnd(), Arrays.asList(Aref, Talt))
+ .genotypes(gt)
+ .noID()
+ .filter("q10")
+ .attribute("attr", "notEmpty")
+ .make();
+
+ //////////////////////// testing JEXLMap ////////////////////////
@Test
public void testGetValue() {
- Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
+ final Map<VariantContextUtils.JexlVCMatchExp, Boolean> jexlMap = getJEXLMap();
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 1);
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 1);
// eval our known expression
- Assert.assertTrue(!map.get(exp));
+ Assert.assertTrue(!jexlMap.get(exp));
}
// Testing the new 'FT' and 'isPassFT' expressions in the JEXL map
@Test
public void testJEXLGenotypeFilters() {
- JexlVCMatchExp passFlag = new VariantContextUtils.JexlVCMatchExp(
+ final JexlVCMatchExp passFlag = new VariantContextUtils.JexlVCMatchExp(
"passFlag", VariantContextUtils.engine.get().createExpression("isPassFT==1"));
- JexlVCMatchExp passFT = new VariantContextUtils.JexlVCMatchExp(
+ final JexlVCMatchExp passFT = new VariantContextUtils.JexlVCMatchExp(
"FTPASS", VariantContextUtils.engine.get().createExpression("FT==\"PASS\""));
- JexlVCMatchExp failFT = new VariantContextUtils.JexlVCMatchExp(
+ final JexlVCMatchExp failFT = new VariantContextUtils.JexlVCMatchExp(
"FTBadCall", VariantContextUtils.engine.get().createExpression("FT==\"BadCall\""));
- JexlVCMatchExp AD1 = new VariantContextUtils.JexlVCMatchExp(
+ final JexlVCMatchExp AD1 = new VariantContextUtils.JexlVCMatchExp(
"AD1", VariantContextUtils.engine.get().createExpression("g.hasAD() && g.getAD().0==1"));
- JexlVCMatchExp AD2 = new VariantContextUtils.JexlVCMatchExp(
+ final JexlVCMatchExp AD2 = new VariantContextUtils.JexlVCMatchExp(
"AD2", VariantContextUtils.engine.get().createExpression("g.hasAD() && g.getAD().1==2"));
- List<JexlVCMatchExp> jexlTests = Arrays.asList(passFlag, passFT, failFT, AD1, AD2);
- Map<VariantContextUtils.JexlVCMatchExp, Boolean> map;
-
- List<Allele> alleles = Arrays.asList(Aref, T);
- VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles);
- VariantContext vcPass = vcb.filters("PASS").make();
- VariantContext vcFail = vcb.filters("BadVariant").make();
- GenotypeBuilder gb = new GenotypeBuilder("SAMPLE", alleles);
+ final List<JexlVCMatchExp> jexlTests = Arrays.asList(passFlag, passFT, failFT, AD1, AD2);
+
+ final List<Allele> alleles = Arrays.asList(Aref, Talt);
+ final VariantContextBuilder vcb = new VariantContextBuilder("test", "chr1", 10, 10, alleles);
+ final VariantContext vcPass = vcb.filters("PASS").make();
+ final VariantContext vcFail = vcb.filters("BadVariant").make();
+ final GenotypeBuilder gb = new GenotypeBuilder("SAMPLE", alleles);
- Genotype genoNull = gb.make();
- Genotype genoPass = gb.filters("PASS").AD(new int[]{1,2}).DP(3).make();
- Genotype genoFail = gb.filters("BadCall").AD(null).DP(0).make();
+ final Genotype genoNull = gb.make();
+ final Genotype genoPass = gb.filters("PASS").AD(new int[]{1,2}).DP(3).make();
+ final Genotype genoFail = gb.filters("BadCall").AD(null).DP(0).make();
+
+ Map<VariantContextUtils.JexlVCMatchExp, Boolean> jexlMap;
// Create the JEXL Maps using the combinations above of vc* and geno*
- map = new JEXLMap(jexlTests,vcPass, genoPass);
+ jexlMap = new JEXLMap(jexlTests, vcPass, genoPass);
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 5);
- Assert.assertTrue(map.get(passFlag));
- Assert.assertTrue(map.get(passFT));
- Assert.assertFalse(map.get(failFT));
- Assert.assertTrue(map.get(AD1));
- Assert.assertTrue(map.get(AD2));
-
- map = new JEXLMap(jexlTests, vcPass, genoFail);
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 5);
+ Assert.assertTrue(jexlMap.get(passFlag));
+ Assert.assertTrue(jexlMap.get(passFT));
+ Assert.assertFalse(jexlMap.get(failFT));
+ Assert.assertTrue(jexlMap.get(AD1));
+ Assert.assertTrue(jexlMap.get(AD2));
+
+ jexlMap = new JEXLMap(jexlTests, vcPass, genoFail);
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 5);
- Assert.assertFalse(map.get(passFlag));
- Assert.assertFalse(map.get(passFT));
- Assert.assertTrue(map.get(failFT));
- Assert.assertFalse(map.get(AD1));
- Assert.assertFalse(map.get(AD2));
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 5);
+ Assert.assertFalse(jexlMap.get(passFlag));
+ Assert.assertFalse(jexlMap.get(passFT));
+ Assert.assertTrue(jexlMap.get(failFT));
+ Assert.assertFalse(jexlMap.get(AD1));
+ Assert.assertFalse(jexlMap.get(AD2));
// Null genotype filter is equivalent to explicit "FT==PASS"
- map = new JEXLMap(jexlTests, vcPass, genoNull);
+ jexlMap = new JEXLMap(jexlTests, vcPass, genoNull);
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 5);
- Assert.assertTrue(map.get(passFlag));
- Assert.assertTrue(map.get(passFT));
- Assert.assertFalse(map.get(failFT));
- Assert.assertFalse(map.get(AD1));
- Assert.assertFalse(map.get(AD2));
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 5);
+ Assert.assertTrue(jexlMap.get(passFlag));
+ Assert.assertTrue(jexlMap.get(passFT));
+ Assert.assertFalse(jexlMap.get(failFT));
+ Assert.assertFalse(jexlMap.get(AD1));
+ Assert.assertFalse(jexlMap.get(AD2));
// Variant-level filters should have no effect here
- map = new JEXLMap(jexlTests,vcFail, genoPass);
+ jexlMap = new JEXLMap(jexlTests, vcFail, genoPass);
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 5);
- Assert.assertTrue(map.get(passFlag));
- Assert.assertTrue(map.get(passFT));
- Assert.assertFalse(map.get(failFT));
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 5);
+ Assert.assertTrue(jexlMap.get(passFlag));
+ Assert.assertTrue(jexlMap.get(passFT));
+ Assert.assertFalse(jexlMap.get(failFT));
- map = new JEXLMap(jexlTests,vcFail, genoFail);
+ jexlMap = new JEXLMap(jexlTests, vcFail, genoFail);
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 5);
- Assert.assertFalse(map.get(passFlag));
- Assert.assertFalse(map.get(passFT));
- Assert.assertTrue(map.get(failFT));
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 5);
+ Assert.assertFalse(jexlMap.get(passFlag));
+ Assert.assertFalse(jexlMap.get(passFT));
+ Assert.assertTrue(jexlMap.get(failFT));
- map = new JEXLMap(jexlTests,vcFail, genoNull);
+ jexlMap = new JEXLMap(jexlTests, vcFail, genoNull);
// make sure the context has a value
- Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 5);
- Assert.assertTrue(map.get(passFlag));
- Assert.assertTrue(map.get(passFT));
- Assert.assertFalse(map.get(failFT));
+ Assert.assertTrue(!jexlMap.isEmpty());
+ Assert.assertEquals(jexlMap.size(), 5);
+ Assert.assertTrue(jexlMap.get(passFlag));
+ Assert.assertTrue(jexlMap.get(passFT));
+ Assert.assertFalse(jexlMap.get(failFT));
}
@Test(expectedExceptions=UnsupportedOperationException.class)
public void testContainsValue() {
- Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
+ final Map<VariantContextUtils.JexlVCMatchExp, Boolean> jexlMap = getJEXLMap();
- map.containsValue(exp);
+ jexlMap.containsValue(exp);
}
@Test(expectedExceptions=UnsupportedOperationException.class)
public void testRemove() {
- Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
+ final Map<VariantContextUtils.JexlVCMatchExp, Boolean> jexlMap = getJEXLMap();
- map.remove(exp);
+ jexlMap.remove(exp);
}
@Test(expectedExceptions=UnsupportedOperationException.class)
public void testEntrySet() {
- Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
+ final Map<VariantContextUtils.JexlVCMatchExp, Boolean> jexlMap = getJEXLMap();
- map.entrySet();
+ jexlMap.entrySet();
}
@Test(expectedExceptions=UnsupportedOperationException.class)
public void testClear() {
- Map<VariantContextUtils.JexlVCMatchExp, Boolean> map = getVarContext();
+ final Map<VariantContextUtils.JexlVCMatchExp, Boolean> jexlMap = getJEXLMap();
- map.clear();
+ jexlMap.clear();
}
/**
- * helper method
- * @return a VariantJEXLContext
+ * @return a JEXLMap for use by actual tests
*/
- private JEXLMap getVarContext() {
- List<Allele> alleles = Arrays.asList(Aref, T);
+ private JEXLMap getJEXLMap() {
+ return new JEXLMap(Collections.singletonList(exp), vc);
+ }
- VariantContext vc = new VariantContextBuilder("test", "chr1", 10, 10, alleles).make();
- return new JEXLMap(Arrays.asList(exp),vc);
+ //////////////////////// testing GenotypeJEXLContext and its base VariantJEXLContext ////////////////////////
+
+ /**
+ * Test the various if-else cases in {@link GenotypeJEXLContext#get(String)} and {@link VariantJEXLContext#get(String)}
+ * {@link GenotypeJEXLContext#has(String)} is not tested because it simply checks if get() will return null.
+ */
+ @Test
+ public void testVariantJEXLContextGetMethod() {
+
+ final VariantJEXLContext jEXLContext = getJEXLContext();
+
+ // This is not tested because there's no simple test for equality for VariantContext,
+ // except exhaustive attributes testing, which is what happening below.
+// Assert.assertEquals(jEXLContext.get("vc"), new VariantContextBuilder("test", "chr1", 10, 10, Arrays.asList(Aref, Talt)).make());
+
+ // GenotypeJEXLContext
+ Assert.assertTrue( ((Genotype) jEXLContext.get("g")).sameGenotype(gt, false));
+ Assert.assertEquals(jEXLContext.get("isHom"), VariantJEXLContext.false_string);
+ Assert.assertEquals(jEXLContext.get("isHomRef"), VariantJEXLContext.false_string);
+ Assert.assertEquals(jEXLContext.get("isHomVar"), VariantJEXLContext.false_string);
+ Assert.assertEquals(jEXLContext.get("isHet"), VariantJEXLContext.true_string);
+ Assert.assertEquals(jEXLContext.get("isCalled"), VariantJEXLContext.true_string);
+ Assert.assertEquals(jEXLContext.get("isNoCall"), VariantJEXLContext.false_string);
+ Assert.assertEquals(jEXLContext.get("isMixed"), VariantJEXLContext.false_string);
+ Assert.assertEquals(jEXLContext.get("isAvailable"), VariantJEXLContext.true_string);
+ Assert.assertEquals(jEXLContext.get("isPassFT"), VariantJEXLContext.false_string);
+ Assert.assertEquals(jEXLContext.get(VCFConstants.GENOTYPE_KEY), gt.getGenotypeString());
+ Assert.assertEquals(jEXLContext.get(VCFConstants.GENOTYPE_FILTER_KEY),"lowDP");
+ Assert.assertEquals(jEXLContext.get(VCFConstants.GENOTYPE_QUALITY_KEY),Integer.valueOf(VCFConstants.MISSING_GENOTYPE_QUALITY_v3));
+ Assert.assertEquals(jEXLContext.get("WA"),"whatEver"); // hasAnyAttribute->getAnyAttribute
+ Assert.assertEquals(jEXLContext.get("lowDP"),VariantJEXLContext.true_string); // getFilters()!=null
+
+ // VariantJEXLContext
+ Assert.assertEquals(jEXLContext.get("CHROM"), eventLoc.getContig());
+ Assert.assertEquals(jEXLContext.get("POS"), eventLoc.getStart());
+ Assert.assertEquals(jEXLContext.get("TYPE"), VariantContext.Type.SNP.name());
+ Assert.assertEquals(jEXLContext.get("QUAL"), -10.0); // because of noGQ() when building the genotype
+ Assert.assertEquals(jEXLContext.get("ALLELES"), vc.getAlleles());
+ Assert.assertEquals(jEXLContext.get("N_ALLELES"), vc.getNAlleles());
+ Assert.assertEquals(jEXLContext.get("FILTER"), VariantJEXLContext.true_string);
+ Assert.assertEquals(jEXLContext.get("homRefCount"), 0);
+ Assert.assertEquals(jEXLContext.get("homVarCount"), 0);
+ Assert.assertEquals(jEXLContext.get("hetCount"), 1);
+ Assert.assertEquals(jEXLContext.get("attr"), "notEmpty"); // hasAnyAttribute->getAnyAttribute
+ Assert.assertEquals(jEXLContext.get("q10"), VariantJEXLContext.true_string); // getFilters()!=null
+
+ // all if-else fall through
+ Assert.assertNull(jEXLContext.get("mustBeNull"));
+ }
+
+ @Test(expectedExceptions=UnsupportedOperationException.class)
+ public void testVariantJEXLContextSetMethodException(){
+ getJEXLContext().set("noMatterWhat", "willBlowup");
+ }
+
+ /**
+ * @return a GenotypeJEXLContext for use by actual tests
+ */
+ private VariantJEXLContext getJEXLContext(){
+ return new GenotypeJEXLContext(vc, gt);
}
}
diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict
new file mode 100644
index 0000000..db5b251
--- /dev/null
+++ b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.dict
@@ -0,0 +1,3 @@
+ at HD VN:1.5 SO:unsorted
+ at SQ SN:chr1 LN:16 M5:56b74a652b3ed2f610263b8bb423167c UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta
+ at SQ SN:chr2 LN:16 M5:b835d2c026aa66c52a05838dcc0b59d4 UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta
diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta
new file mode 100644
index 0000000..0b446ca
--- /dev/null
+++ b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta
@@ -0,0 +1,4 @@
+>chr1
+ACGTACGTacgtacgt
+>chr2
+TCGATCGAtcgatcga
\ No newline at end of file
diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai
new file mode 100644
index 0000000..9314c8f
--- /dev/null
+++ b/src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta.fai
@@ -0,0 +1,2 @@
+chr1 16 6 16 17
+chr2 16 29 16 16
diff --git a/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam b/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam
new file mode 100644
index 0000000..82efe85
--- /dev/null
+++ b/src/test/resources/htsjdk/samtools/SequenceUtil/upper_and_lowercase_read.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:chr1 LN:16 M5:56b74a652b3ed2f610263b8bb423167c UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta
+ at SQ SN:chr2 LN:16 M5:b835d2c026aa66c52a05838dcc0b59d4 UR:file:src/test/resources/htsjdk/samtools/SequenceUtil/reference_with_lower_and_uppercase.fasta
+ at CO chr1 value is ACGTACGTacgtacgt
+ at CO chr2 value is TCGATCGAtcgatcga
+read1 0 chr1 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:0
+read2 0 chr1 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:0
+read3 0 chr2 1 0 16M * 0 0 AcGtAcGTaCGtAcGt AAAAAAAAAAAAAAAA NM:i:8 MD:Z:0T2A0T2A0t2a0t2a0
+read4 0 chr2 1 0 8M * 0 0 TCGATCGA AAAAAAAA NM:i:0
+read5 0 chr2 1 0 4M1D2M1S * 0 0 TCGACGAA AAAAAAAA NM:i:1 MD:Z:4^T2
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htsjdk.git
More information about the debian-med-commit
mailing list