[med-svn] [htsjdk] 02/06: Imported Upstream version 2.0.1+dfsg.1
Andreas Tille
tille at debian.org
Sat Dec 19 21:26:56 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository htsjdk.
commit ad6800cf21471c4d01fc8a0f6736dbf0cc16974f
Author: Andreas Tille <tille at debian.org>
Date: Sat Dec 19 21:26:08 2015 +0100
Imported Upstream version 2.0.1+dfsg.1
.idea/modules/htsjdk.iml | 3 +-
.travis.yml | 5 +-
README.md | 2 +
build.sbt | 8 +-
build.xml | 9 +-
htsjdk.iml | 9 +
src/java/htsjdk/samtools/AbstractBAMFileIndex.java | 27 +-
.../{SAMFileSpan.java => BAMFileSpan.java} | 59 +-
src/java/htsjdk/samtools/BAMRecord.java | 7 +-
src/java/htsjdk/samtools/BAMRecordCodec.java | 6 +-
src/java/htsjdk/samtools/BinaryTagCodec.java | 11 +-
src/java/htsjdk/samtools/CRAMFileReader.java | 64 +-
src/java/htsjdk/samtools/CRAMFileWriter.java | 99 +-
src/java/htsjdk/samtools/CRAMIndexer.java | 36 +-
src/java/htsjdk/samtools/CRAMIterator.java | 34 +-
.../samtools/ChainedDownsamplingIterator.java | 90 ++
.../ConstantMemoryDownsamplingIterator.java | 88 ++
.../htsjdk/samtools/DefaultSAMRecordFactory.java | 6 +-
src/java/htsjdk/samtools/DownsamplingIterator.java | 164 ++-
.../samtools/DownsamplingIteratorFactory.java | 118 +++
.../htsjdk/samtools/DuplicateScoringStrategy.java | 39 +-
src/java/htsjdk/samtools/DuplicateSet.java | 70 +-
.../samtools/HighAccuracyDownsamplingIterator.java | 196 ++++
.../htsjdk/samtools/MergingSamRecordIterator.java | 12 +-
.../SAMBinaryTagAndUnsignedArrayValue.java | 14 +-
src/java/htsjdk/samtools/SAMBinaryTagAndValue.java | 43 +-
src/java/htsjdk/samtools/SAMFileSpan.java | 244 -----
src/java/htsjdk/samtools/SAMFileWriterFactory.java | 149 ++-
src/java/htsjdk/samtools/SAMFileWriterImpl.java | 20 +-
src/java/htsjdk/samtools/SAMRecord.java | 680 ++++++++++---
.../samtools/SAMRecordCoordinateComparator.java | 6 +
.../samtools/SAMRecordDuplicateComparator.java | 46 +-
.../samtools/SAMRecordQueryHashComparator.java | 68 ++
src/java/htsjdk/samtools/SAMTag.java | 3 +
src/java/htsjdk/samtools/SAMUtils.java | 174 +++-
src/java/htsjdk/samtools/SRAFileReader.java | 306 ++++++
src/java/htsjdk/samtools/SRAIndex.java | 257 +++++
src/java/htsjdk/samtools/SRAIterator.java | 248 +++++
src/java/htsjdk/samtools/SamFileValidator.java | 21 +-
src/java/htsjdk/samtools/SamFiles.java | 23 +-
src/java/htsjdk/samtools/SamIndexes.java | 94 ++
src/java/htsjdk/samtools/SamInputResource.java | 68 +-
src/java/htsjdk/samtools/SamPairUtil.java | 55 +-
src/java/htsjdk/samtools/SamReader.java | 1 +
src/java/htsjdk/samtools/SamReaderFactory.java | 47 +-
src/java/htsjdk/samtools/SamStreams.java | 1 +
src/java/htsjdk/samtools/TextTagCodec.java | 19 +-
src/java/htsjdk/samtools/cram/CRAIEntry.java | 148 +++
src/java/htsjdk/samtools/cram/CRAIIndex.java | 164 +++
src/java/htsjdk/samtools/cram/CRAMException.java | 22 +
.../samtools/cram/build/ContainerParser.java | 13 +-
src/java/htsjdk/samtools/cram/build/CramIO.java | 10 +-
.../samtools/cram/build/Sam2CramRecordFactory.java | 13 +
.../cram/encoding/reader/CramRecordReader.java | 22 +-
.../htsjdk/samtools/cram/ref/ReferenceSource.java | 9 +-
.../cram/structure/CramCompressionRecord.java | 6 +-
.../htsjdk/samtools/cram/structure/ReadTag.java | 30 +-
.../htsjdk/samtools/filter/FilteringIterator.java | 2 +-
.../htsjdk/samtools/filter/IntervalFilter.java | 2 +-
.../samtools/filter/OverclippedReadFilter.java | 76 ++
src/java/htsjdk/samtools/metrics/MetricsFile.java | 32 +-
.../reference/AbstractFastaSequenceFile.java | 74 +-
.../samtools/reference/FastaSequenceFile.java | 18 +-
.../samtools/reference/FastaSequenceIndex.java | 21 +-
.../reference/IndexedFastaSequenceFile.java | 96 +-
.../reference/ReferenceSequenceFileFactory.java | 45 +-
.../seekablestream/SeekableMemoryStream.java | 64 ++
src/java/htsjdk/samtools/sra/ReferenceCache.java | 79 ++
src/java/htsjdk/samtools/sra/SRAAccession.java | 108 ++
.../htsjdk/samtools/sra/SRAAlignmentIterator.java | 194 ++++
.../samtools/sra/SRAIndexedSequenceFile.java | 121 +++
src/java/htsjdk/samtools/sra/SRALazyRecord.java | 1056 ++++++++++++++++++++
.../samtools/sra/SRAUnalignmentIterator.java | 181 ++++
src/java/htsjdk/samtools/sra/SRAUtils.java | 83 ++
.../htsjdk/samtools/util/AbstractAsyncWriter.java | 2 +-
.../samtools/util/AbstractProgressLogger.java | 2 +-
src/java/htsjdk/samtools/util/BinaryCodec.java | 10 +-
src/java/htsjdk/samtools/util/DiskBackedQueue.java | 12 +-
src/java/htsjdk/samtools/util/Histogram.java | 20 +-
src/java/htsjdk/samtools/util/IOUtil.java | 62 +-
src/java/htsjdk/samtools/util/Murmur3.java | 115 +++
src/java/htsjdk/samtools/util/ProgressLogger.java | 2 +-
src/java/htsjdk/samtools/util/SequenceUtil.java | 57 +-
src/java/htsjdk/samtools/util/StringUtil.java | 4 +-
.../variant/variantcontext/VariantContext.java | 8 +-
.../variantcontext/filter/CompoundFilter.java | 74 ++
.../variantcontext/filter/FilteringIterator.java | 127 +++
.../filter/GenotypeQualityFilter.java | 79 ++
.../filter/HeterozygosityFilter.java | 84 ++
.../filter/PassingVariantFilter.java} | 71 +-
.../variantcontext/filter/SnpFilter.java} | 71 +-
.../filter/VariantContextFilter.java} | 70 +-
.../variantcontext/writer/BCF2FieldEncoder.java | 35 +-
.../writer/VariantContextWriterBuilder.java | 25 +-
src/java/htsjdk/variant/vcf/VCFRecordCodec.java | 18 +-
.../java/htsjdk/samtools/BAMFileWriterTest.java | 133 ++-
.../java/htsjdk/samtools/CRAMComplianceTest.java | 7 +-
.../java/htsjdk/samtools/CRAMEdgeCasesTest.java | 33 +-
.../java/htsjdk/samtools/CRAMFileIndexTest.java | 66 ++
.../samtools/CRAMFileWriterWithIndexTest.java | 3 +-
src/tests/java/htsjdk/samtools/CigarTest.java | 81 +-
.../java/htsjdk/samtools/CramFileWriterTest.java | 122 ++-
.../htsjdk/samtools/DownsamplingIteratorTests.java | 82 ++
.../htsjdk/samtools/DuplicateSetIteratorTest.java | 5 +-
.../samtools/MergingSamRecordIteratorTest.java | 41 +
.../java/htsjdk/samtools/SAMFileReaderTest.java | 69 +-
.../htsjdk/samtools/SAMFileWriterFactoryTest.java | 159 ++-
.../java/htsjdk/samtools/SAMIntegerTagTest.java | 167 +++-
.../samtools/SAMRecordDuplicateComparatorTest.java | 15 +
.../java/htsjdk/samtools/SAMRecordUnitTest.java | 797 ++++++++++++++-
.../java/htsjdk/samtools/SAMTextWriterTest.java | 12 +
src/tests/java/htsjdk/samtools/SAMUtilsTest.java | 106 ++
src/tests/java/htsjdk/samtools/SamFilesTest.java | 60 ++
src/tests/java/htsjdk/samtools/SamIndexesTest.java | 192 ++++
.../java/htsjdk/samtools/SamReaderFactoryTest.java | 25 +
src/tests/java/htsjdk/samtools/SamSpecIntTest.java | 4 +-
.../java/htsjdk/samtools/cram/CRAIEntryTest.java | 145 +++
.../java/htsjdk/samtools/cram/CRAIIndexTest.java | 133 +++
.../htsjdk/samtools/cram/build/CramIOTest.java | 82 ++
.../cram/structure/CramCompressionRecordTest.java | 68 ++
.../samtools/cram/structure/ReadTagTest.java | 21 +-
.../samtools/filter/OverclippedReadFilterTest.java | 83 ++
.../htsjdk/samtools/metrics/MetricsFileTest.java | 21 +
.../samtools/reference/FastaSequenceIndexTest.java | 8 +-
.../reference/IndexedFastaSequenceFileTest.java | 10 +-
.../java/htsjdk/samtools/sra/SRAIndexTest.java | 150 +++
.../htsjdk/samtools/sra/SRALazyRecordTest.java | 51 +
.../java/htsjdk/samtools/sra/SRAQueryTest.java | 116 +++
.../java/htsjdk/samtools/sra/SRAReferenceTest.java | 25 +
src/tests/java/htsjdk/samtools/sra/SRATest.java | 464 +++++++++
.../java/htsjdk/samtools/util/CodeUtilTest.java | 2 +-
.../htsjdk/samtools/util/DiskBackedQueueTest.java | 30 +-
.../htsjdk/samtools/util/SequenceUtilTest.java | 22 +
.../samtools/util/SortingCollectionTest.java | 39 +-
.../htsjdk/variant/bcf2/BCF2UtilsUnitTest.java | 25 +-
.../variantcontext/filter/AllFailFilter.java} | 68 +-
.../variantcontext/filter/AllPassFilter.java} | 68 +-
.../variantcontext/filter/CompoundFilterTest.java | 78 ++
.../filter/FilteringIteratorTest.java | 88 ++
.../filter/GenotypeQualityFilterTest.java | 105 ++
.../filter/HeterozygosityFilterTest.java | 128 +++
.../filter/PassingVariantFilterTest.java | 46 +
.../variantcontext/filter/SnpFilterTest.java | 54 +
.../VariantContextWriterBuilderUnitTest.java | 10 +
.../java/htsjdk/variant/vcf/VCFHeaderUnitTest.java | 34 +-
.../cram/CRAMException/testContigNotInRef.cram | Bin 0 -> 3433 bytes
.../cram/CRAMException/testContigNotInRef.cram.bai | Bin 0 -> 96 bytes
.../cram/CRAMException/testContigNotInRef.dict | 2 +
.../cram/CRAMException/testContigNotInRef.fa | 2 +
.../cram/CRAMException/testContigNotInRef.fa.fai | 1 +
.../cram/CRAMException/testContigNotInRef.fasta | 2 +
testdata/htsjdk/samtools/cram_tlen.fasta | 41 +
testdata/htsjdk/samtools/cram_tlen.fasta.fai | 8 +
.../htsjdk/samtools/cram_tlen_reads.sorted.sam | 19 +
testdata/htsjdk/samtools/cram_with_bai_index.cram | Bin 0 -> 4213 bytes
.../htsjdk/samtools/cram_with_bai_index.cram.bai | Bin 0 -> 336 bytes
testdata/htsjdk/samtools/cram_with_crai_index.cram | Bin 0 -> 4213 bytes
.../htsjdk/samtools/cram_with_crai_index.cram.crai | Bin 0 -> 77 bytes
testdata/htsjdk/samtools/hg19mini.fasta | 804 +++++++++++++++
testdata/htsjdk/samtools/hg19mini.fasta.fai | 4 +
.../htsjdk/samtools/metrics/metricsOne.metrics | 13 +
.../htsjdk/samtools/metrics/metricsOneCopy.metrics | 13 +
.../metrics/metricsOneModifiedHistogram.metrics | 14 +
.../metrics/metricsOneModifiedMetrics.metrics | 13 +
testdata/htsjdk/samtools/sra/test_archive.sra | Bin 0 -> 1099831 bytes
165 files changed, 11274 insertions(+), 1401 deletions(-)
diff --git a/.idea/modules/htsjdk.iml b/.idea/modules/htsjdk.iml
index b3d5fa4..57d35da 100644
--- a/.idea/modules/htsjdk.iml
+++ b/.idea/modules/htsjdk.iml
@@ -34,10 +34,11 @@
<root url="jar://$MODULE_DIR$/../../lib/commons-logging-1.1.1.jar!/" />
<root url="jar://$MODULE_DIR$/../../lib/snappy-java-1.0.3-rc3.jar!/" />
<root url="jar://$MODULE_DIR$/../../lib/commons-jexl-2.1.1.jar!/" />
+ <root url="jar://$MODULE_DIR$/../../lib/ngs-java-1.2.2.jar!/" />
\ No newline at end of file
diff --git a/.travis.yml b/.travis.yml
index f168684..75df51d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,13 @@
language: java
sudo: true
- - oraclejdk7
- oraclejdk8
- - openjdk7
- - openjdk6
install: ant
script: ant all test
- if [ "$TRAVIS_BRANCH" == "master" ] && [ "$JAVA_HOME" == "/usr/lib/jvm/java-7-oracle" ]; then
+ if [ "$TRAVIS_BRANCH" == "master" ] && [ "$JAVA_HOME" == "/usr/lib/jvm/java-8-oracle" ]; then
sbt \
'set buildSnapshot := true' \
'set javacOptions in (Compile, doc) ++= Seq("-quiet")' \
diff --git a/README.md b/README.md
index 12b185f..f634cec 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Not all sub-packages of htsjdk are subject to the same license, so a license not
#### Java Minimum Version Support Policy
+> **NOTE: _Effective November 24th 2015, HTSJDK has ended support of Java 7 and previous versions. Java 8 is now required_.**
We will support all Java SE versions supported by Oracle until at least six months after Oracle's Public Updates period has ended ([see this link](http://www.oracle.com/technetwork/java/eol-135779.html)).
Java SE Major Release | End of Java SE Oracle Public Updates | Proposed End of Support in HTSJDK | Actual End of Support in HTSJDK
diff --git a/build.sbt b/build.sbt
index 1c6b96a..c437ca0 100644
--- a/build.sbt
+++ b/build.sbt
@@ -4,10 +4,12 @@ import sbt.Package.ManifestAttributes
name := "htsjdk"
-val buildVersion = "1.138"
+val buildVersion = "2.0.1"
organization := "com.github.samtools"
+libraryDependencies += "gov.nih.nlm.ncbi" % "ngs-java" % "1.2.2"
libraryDependencies += "org.apache.commons" % "commons-jexl" % "2.1.1"
libraryDependencies += "commons-logging" % "commons-logging" % "1.1.1"
@@ -87,9 +89,7 @@ artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
crossPaths := false
-javacOptions in Compile ++= Seq("-source", "1.6")
-javacOptions in(Compile, compile) ++= Seq("-target", "1.6")
+javacOptions in (Compile,doc) ++= Seq("-Xdoclint:none")
packageOptions := Seq(ManifestAttributes(
("Implementation-Version", s"${implementationVersion.value}"),
diff --git a/build.xml b/build.xml
index b0980fe..44c2ef4 100755
--- a/build.xml
+++ b/build.xml
@@ -35,13 +35,13 @@
<property name="scripts" value="src/scripts"/>
<property name="test.output" value="dist/test"/>
- <property name="javac.target" value="1.6"/>
+ <property name="javac.target" value="1.8"/>
<property name="javac.debug" value="true"/>
<!-- Get GIT hash, if available, otherwise leave it blank. -->
<property name="repository.revision" value=""/>
- <property name="htsjdk-version" value="1.138"/>
+ <property name="htsjdk-version" value="2.0.1"/>
<property name="htsjdk-version-file" value="htsjdk.version.properties"/>
<property name="testng.verbosity" value="2"/>
<property name="test.debug.port" value="5005" /> <!-- override on the command line if desired -->
@@ -157,7 +157,7 @@
<testng suitename="htsjdk-single-test" classpathref="classpath" outputdir="${test.output}"
- <jvmarg line="-Xmx512M ${debug.jvm.args}"/>
+ <jvmarg line="-Xmx512m ${debug.jvm.args}"/>
<pathelement path="${classes}"/>
<pathelement path="${classes.test}"/>
@@ -177,7 +177,7 @@
<fileset dir="${classes}" includes="htsjdk/tribble/**/*.*"/>
<fileset dir="${classes}" includes="htsjdk/variant/**/*.*"/>
- <attribute name="Implementation-Version" value="${hts-version}(${repository.revision})"/>
+ <attribute name="Implementation-Version" value="${htsjdk-version}(${repository.revision})"/>
<attribute name="Implementation-Vendor" value="Broad Institute"/>
@@ -197,6 +197,7 @@
+ additionalparam="-Xdoclint:none -notimestamp"
<pathelement location="${java.home}/../lib/tools.jar" />
diff --git a/htsjdk.iml b/htsjdk.iml
index 59a3114..3c722e4 100644
--- a/htsjdk.iml
+++ b/htsjdk.iml
@@ -52,6 +52,15 @@
+ <orderEntry type="module-library">
+ <library>
+ <root url="jar://$MODULE_DIR$/lib/ngs-java-1.2.2.jar!/" />
+ </library>
+ </orderEntry>
diff --git a/src/java/htsjdk/samtools/AbstractBAMFileIndex.java b/src/java/htsjdk/samtools/AbstractBAMFileIndex.java
index a2a1d03..4475e00 100644
--- a/src/java/htsjdk/samtools/AbstractBAMFileIndex.java
+++ b/src/java/htsjdk/samtools/AbstractBAMFileIndex.java
@@ -64,8 +64,9 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
mBamDictionary = dictionary;
mIndexBuffer = new IndexStreamBuffer(stream);
- seek(4);
+ verifyBAMMagicNumber(stream.getSource());
sequenceIndexes = new int[readInteger() + 1];
Arrays.fill(sequenceIndexes, -1);
@@ -78,15 +79,8 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
mBamDictionary = dictionary;
mIndexBuffer = (useMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file));
- // Verify the magic number.
- seek(0);
- final byte[] buffer = new byte[4];
- readBytes(buffer);
- if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) {
- throw new RuntimeIOException("Invalid file header in BAM index " + file +
- ": " + new String(buffer));
- }
+ verifyBAMMagicNumber(file.getName());
sequenceIndexes = new int[readInteger() + 1];
Arrays.fill(sequenceIndexes, -1);
@@ -399,6 +393,17 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
return Chunk.optimizeChunkList(chunks, minimumOffset);
+ private void verifyBAMMagicNumber(final String sourceName) {
+ // Verify the magic number.
+ seek(0);
+ final byte[] buffer = new byte[4];
+ readBytes(buffer);
+ if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) {
+ throw new RuntimeIOException("Invalid file header in BAM index " + sourceName +
+ ": " + new String(buffer));
+ }
+ }
private void skipToSequence(final int sequenceIndex) {
//Use sequence position cache if available
if(sequenceIndexes[sequenceIndex] != -1){
diff --git a/src/java/htsjdk/samtools/SAMFileSpan.java b/src/java/htsjdk/samtools/BAMFileSpan.java
similarity index 83%
copy from src/java/htsjdk/samtools/SAMFileSpan.java
copy to src/java/htsjdk/samtools/BAMFileSpan.java
index 9da77bc..193e443 100644
--- a/src/java/htsjdk/samtools/SAMFileSpan.java
+++ b/src/java/htsjdk/samtools/BAMFileSpan.java
@@ -31,31 +31,6 @@ import java.util.Collections;
import java.util.List;
- * A interface representing a collection of (possibly) discontinuous segments in the
- * BAM file, possibly representing the results of an index query.
- */
-public interface SAMFileSpan extends Cloneable {
- /**
- * Gets a pointer over the data immediately following this span.
- * @return The a pointer to data immediately following this span.
- */
- public SAMFileSpan getContentsFollowing();
- /**
- * Remove all pointers in this file span before the given file span starts.
- * @param fileSpan The filespan before which to eliminate.
- * @return The portion of the chunk list after the given chunk.
- */
- public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan);
- /**
- * Does this file span point to any data, or is it completely empty?
- * @return True if the file span is empty, false otherwise.
- */
- public boolean isEmpty();
* An ordered list of chunks, capable of representing a set of discontiguous
* regions in the BAM file. FileSpans are mutable within the package, but perceived
* as immutable outside the package.
@@ -66,8 +41,8 @@ public interface SAMFileSpan extends Cloneable {
* @author mhanna
* @version 0.1
-class BAMFileSpan implements SAMFileSpan, Serializable {
- private static final long serialVersionUID = 1L;
+public class BAMFileSpan implements SAMFileSpan, Serializable {
+ private static final long serialVersionUID = 1L;
* The constituent chunks of this list.
@@ -77,7 +52,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Create a new empty list of chunks.
- protected BAMFileSpan() {
+ public BAMFileSpan() {
this.chunks = new ArrayList<Chunk>();
@@ -86,7 +61,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* a single chunk.
* @param chunk Chunk to use as the sole region in this span.
- protected BAMFileSpan(final Chunk chunk) {
+ public BAMFileSpan(final Chunk chunk) {
this.chunks = new ArrayList<Chunk>();
@@ -95,7 +70,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Create a new chunk list from the given list of chunks.
* @param chunks Constituent chunks.
- protected BAMFileSpan(final List<Chunk> chunks) {
+ public BAMFileSpan(final List<Chunk> chunks) {
this.chunks = new ArrayList<Chunk>(chunks);
@@ -104,7 +79,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @return True iff the ChunkList points to any data within the BAM.
public boolean isEmpty() {
- return chunks.isEmpty();
+ return chunks.isEmpty();
@@ -119,11 +94,11 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
- * Remove all chunks in this file span before the given file span starts.
+ * Creates a new file span by removing all chunks before the given file span starts.
* If a chunk in the chunk list starts before and ends after the given
* chunk, the first portion of the chunk will be deleted.
* @param fileSpan The filespan before which to eliminate.
- * @return The portion of the chunk list after the given chunk.
+ * @return A new BAMFileSpan which contains the portion of the chunk list after the given chunk.
public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
if(fileSpan == null)
@@ -147,7 +122,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
else {
- // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
+ // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
@@ -171,7 +146,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @param span - span with chunks to add to this one
- public void add(final BAMFileSpan span) {
+ protected void add(final BAMFileSpan span) {
for (final Chunk c : span.chunks) {
@@ -184,12 +159,12 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
protected void add(final Chunk chunk) {
* Convert the chunk list to an array of offsets, paired in [start,end) format.
* @return Array of offsets.
- protected long[] toCoordinateArray() {
+ public long[] toCoordinateArray() {
final int count = chunks.size() * 2;
if (count == 0) {
return null;
@@ -207,7 +182,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Find the first offset in the chunk list
* @return The first offset in the span
- protected long getFirstOffset() {
+ public long getFirstOffset() {
final long result = 0;
if (chunks == null){
return result;
@@ -222,7 +197,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Gets the constituent chunks stored in this span.
* @return An unmodifiable list of chunks.
- protected List<Chunk> getChunks() {
+ public List<Chunk> getChunks() {
return Collections.unmodifiableList(chunks);
@@ -284,9 +259,9 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
public static BAMFileSpan merge(final BAMFileSpan[] spans) {
final ArrayList<Chunk> inputChunks = new ArrayList<Chunk>();
for (final BAMFileSpan span : spans) {
- if(span != null){
- inputChunks.addAll(span.chunks);
- }
+ if(span != null){
+ inputChunks.addAll(span.chunks);
+ }
return new BAMFileSpan(Chunk.optimizeChunkList(inputChunks, 0));
diff --git a/src/java/htsjdk/samtools/BAMRecord.java b/src/java/htsjdk/samtools/BAMRecord.java
index f27fe20..c45566f 100644
--- a/src/java/htsjdk/samtools/BAMRecord.java
+++ b/src/java/htsjdk/samtools/BAMRecord.java
@@ -65,6 +65,11 @@ public class BAMRecord extends SAMRecord {
private boolean mBinaryDataStale;
+ /**
+ * Create a new BAM Record. If the reference sequence index or mate reference sequence index are any value other
+ * than NO_ALIGNMENT_REFERENCE_INDEX (-1), then the specified index values must exist in the sequence dictionary
+ * in the header argument.
+ */
protected BAMRecord(final SAMFileHeader header,
final int referenceID,
final int coordinate,
@@ -242,7 +247,7 @@ public class BAMRecord extends SAMRecord {
mCigarDecoded = true;
- if (getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
+ if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
// Don't know line number, and don't want to force read name to be decoded.
SAMUtils.processValidationErrors(validateCigar(-1L), -1, getValidationStringency());
diff --git a/src/java/htsjdk/samtools/BAMRecordCodec.java b/src/java/htsjdk/samtools/BAMRecordCodec.java
index 25c2b27..dc1ca81 100644
--- a/src/java/htsjdk/samtools/BAMRecordCodec.java
+++ b/src/java/htsjdk/samtools/BAMRecordCodec.java
@@ -200,7 +200,11 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
final BAMRecord ret = this.samRecordFactory.createBAMRecord(
header, referenceID, coordinate, readNameLength, mappingQuality,
bin, cigarLen, flags, readLen, mateReferenceID, mateCoordinate, insertSize, restOfRecord);
- ret.setHeader(header);
+ if (null != header) {
+ // don't reset a null header as this will clobber the reference and mate reference indices
+ ret.setHeader(header);
+ }
return ret;
diff --git a/src/java/htsjdk/samtools/BinaryTagCodec.java b/src/java/htsjdk/samtools/BinaryTagCodec.java
index 902e3ba..5603cfc 100644
--- a/src/java/htsjdk/samtools/BinaryTagCodec.java
+++ b/src/java/htsjdk/samtools/BinaryTagCodec.java
@@ -320,12 +320,15 @@ public class BinaryTagCodec {
return (char)byteBuffer.get();
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
- if (val <= Integer.MAX_VALUE) {
+ if ( val <= Integer.MAX_VALUE ) {
return (int)val;
- SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
- "Tag value " + val + " too large to store as signed integer.", null), validationStringency);
- // convert to unsigned int stored in a long
+ // If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer,
+ // return it directly as a long
+ if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) {
+ SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
+ "Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency);
+ }
return val;
case 'i':
return byteBuffer.getInt();
diff --git a/src/java/htsjdk/samtools/CRAMFileReader.java b/src/java/htsjdk/samtools/CRAMFileReader.java
index 79b1f5f..04521ba 100644
--- a/src/java/htsjdk/samtools/CRAMFileReader.java
+++ b/src/java/htsjdk/samtools/CRAMFileReader.java
@@ -17,6 +17,7 @@ package htsjdk.samtools;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SamReader.Type;
+import htsjdk.samtools.cram.CRAIIndex;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
@@ -126,10 +127,30 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
- iterator = new CRAMIterator(inputStream, referenceSource);
- iterator.setValidationStringency(validationStringency);
- if (indexInputStream != null)
- mIndex = new CachingBAMFileIndex(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
+ if (indexInputStream != null) {
+ try {
+ mIndex = new CachingBAMFileIndex(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ } catch (Exception e) {
+ // try CRAI instead:
+ indexInputStream.seek(0);
+ final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ }
+ }
+ }
+ public CRAMFileReader(final InputStream stream,
+ final File indexFile, final ReferenceSource referenceSource,
+ final ValidationStringency validationStringency) throws IOException {
+ this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency);
+ }
+ public CRAMFileReader(final File cramFile,
+ final File indexFile, final ReferenceSource referenceSource,
+ final ValidationStringency validationStringency) throws IOException {
+ this(new FileInputStream(cramFile), indexFile, referenceSource, validationStringency);
+ this.cramFile = cramFile;
@@ -165,10 +186,25 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
if (mIndex == null) {
final SAMSequenceDictionary dictionary = getFileHeader()
- mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile,
- dictionary, mEnableIndexMemoryMapping)
- : new DiskBasedBAMFileIndex(mIndexFile, dictionary,
- mEnableIndexMemoryMapping);
+ if (mIndexFile.getName().endsWith(BAMIndex.BAMIndexSuffix)) {
+ mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile,
+ dictionary, mEnableIndexMemoryMapping)
+ : new DiskBasedBAMFileIndex(mIndexFile, dictionary,
+ mEnableIndexMemoryMapping);
+ return mIndex;
+ }
+ if (!mIndexFile.getName().endsWith(CRAIIndex.CRAI_INDEX_SUFFIX)) return null;
+ // convert CRAI into BAI:
+ final SeekableStream baiStream;
+ try {
+ baiStream = CRAIIndex.openCraiFileAsBaiStream(mIndexFile, iterator.getSAMFileHeader().getSequenceDictionary());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary()) :
+ new DiskBasedBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary());
return mIndex;
@@ -191,7 +227,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
try {
// create an input stream that reads the source cram stream only within the coordinate pairs:
final SeekableStream seekableStream = getSeekableStreamOrFailWithRTE();
- return new CRAMIterator(seekableStream, referenceSource, coordinateArray);
+ return new CRAMIterator(seekableStream, referenceSource, coordinateArray, validationStringency);
} catch (final IOException e) {
throw new RuntimeException(e);
@@ -210,11 +246,10 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
final CRAMIterator newIterator;
if (cramFile != null) {
newIterator = new CRAMIterator(new FileInputStream(cramFile),
- referenceSource);
+ referenceSource, validationStringency);
} else
- newIterator = new CRAMIterator(inputStream, referenceSource);
+ newIterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
- newIterator.setValidationStringency(validationStringency);
iterator = newIterator;
return iterator;
} catch (final Exception e) {
@@ -322,7 +357,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
if (filePointers == null || filePointers.length == 0)
return emptyIterator;
- final CRAMIterator newIterator = new CRAMIterator(getSeekableStreamOrFailWithRTE(), referenceSource, filePointers);
+ final CRAMIterator newIterator = new CRAMIterator(getSeekableStreamOrFailWithRTE(), referenceSource, filePointers, validationStringency);
return new IntervalIterator(newIterator, new QueryInterval(referenceIndex, start, end), overlap);
@@ -334,8 +369,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
final CRAMIterator newIterator;
try {
- newIterator = new CRAMIterator(seekableStream, referenceSource);
- newIterator.setValidationStringency(validationStringency);
+ newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency);
seekableStream.seek(startOfLastLinearBin >>> 16);
final Container container = ContainerIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream);
seekableStream.seek(seekableStream.position() + container.containerByteSize);
diff --git a/src/java/htsjdk/samtools/CRAMFileWriter.java b/src/java/htsjdk/samtools/CRAMFileWriter.java
index dc83bc3..20347a0 100644
--- a/src/java/htsjdk/samtools/CRAMFileWriter.java
+++ b/src/java/htsjdk/samtools/CRAMFileWriter.java
@@ -32,6 +32,7 @@ import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringLineReader;
import java.io.IOException;
@@ -72,22 +73,75 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
private CRAMIndexer indexer;
private long offset;
- public CRAMFileWriter(final OutputStream outputStream, final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
- this(outputStream, null, source, samFileHeader, fileName);
+ /**
+ * Create a CRAMFileWriter on an output stream. Requires input records to be presorted to match the
+ * sort order defined by the input {@code samFileHeader}.
+ *
+ * @param outputStream where to write the output.
+ * @param source reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param fileName used for display in error messages
+ */
+ public CRAMFileWriter(
+ final OutputStream outputStream,
+ final ReferenceSource source,
+ final SAMFileHeader samFileHeader,
+ final String fileName)
+ {
+ this(outputStream, null, source, samFileHeader, fileName); // defaults to presorted == true
- public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
+ /**
+ * Create a CRAMFileWriter and index on output streams. Requires input records to be presorted to match the
+ * sort order defined by the input {@code samFileHeader}.
+ *
+ * @param outputStream where to write the output.
+ * @param indexOS where to write the output index. Can be null if no index is required.
+ * @param source reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param fileName used for display in error messages
+ */
+ public CRAMFileWriter(
+ final OutputStream outputStream,
+ final OutputStream indexOS,
+ final ReferenceSource source,
+ final SAMFileHeader samFileHeader,
+ final String fileName)
+ {
+ this(outputStream, indexOS, true, source, samFileHeader, fileName); // defaults to presorted==true
+ }
+ /**
+ * Create a CRAMFileWriter and index on output streams.
+ *
+ * @param outputStream where to write the output.
+ * @param indexOS where to write the output index. Can be null if no index is required.
+ * @param presorted if true records written to this writer must already be sorted in the order specified by the header
+ * @param source reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param fileName used for display in error message display
+ */
+ public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted,
+ final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
this.outputStream = outputStream;
- this.source = source;
this.samFileHeader = samFileHeader;
this.fileName = fileName;
- setSortOrder(samFileHeader.getSortOrder(), true);
+ initCRAMWriter(indexOS, source, samFileHeader, presorted);
+ }
+ private void initCRAMWriter(final OutputStream indexOS, final ReferenceSource source, final SAMFileHeader samFileHeader, final boolean preSorted) {
+ this.source = source;
+ setSortOrder(samFileHeader.getSortOrder(), preSorted);
- if (this.source == null) this.source = new ReferenceSource(Defaults.REFERENCE_FASTA);
+ if (this.source == null) {
+ this.source = new ReferenceSource(Defaults.REFERENCE_FASTA);
+ }
containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
- if (indexOS != null) indexer = new CRAMIndexer(indexOS, samFileHeader);
+ if (indexOS != null) {
+ indexer = new CRAMIndexer(indexOS, samFileHeader);
+ }
@@ -99,7 +153,6 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
protected boolean shouldFlushContainer(final SAMRecord nextRecord) {
return samRecords.size() >= containerSize || refSeqIndex != REF_SEQ_INDEX_NOT_INITIALIZED && refSeqIndex != nextRecord.getReferenceIndex();
private static void updateTracks(final List<SAMRecord> samRecords, final ReferenceTracks tracks) {
@@ -250,7 +303,7 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
last = last.next;
if (last.templateSize != -templateLength) detach(cramRecord);
- }
+ }else detach(cramRecord);
} else detach(cramRecord);
@@ -331,12 +384,20 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
while ((cramRecord = cramRecord.next) != null);
+ /**
+ * Write an alignment record.
+ * @param alignment must not be null and must have a valid SAMFileHeader.
+ */
protected void writeAlignment(final SAMRecord alignment) {
- if (shouldFlushContainer(alignment)) try {
- flushContainer();
- } catch (final Exception e) {
- throw new RuntimeException(e);
+ if (shouldFlushContainer(alignment)) {
+ try {
+ flushContainer();
+ } catch (IOException e) {
+ throw new RuntimeIOException(e);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
@@ -374,12 +435,18 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
protected void finish() {
try {
- if (!samRecords.isEmpty()) flushContainer();
+ if (!samRecords.isEmpty()) {
+ flushContainer();
+ }
CramIO.issueEOF(cramVersion, outputStream);
- if (indexer != null)
+ if (indexer != null) {
- } catch (final Exception e) {
+ }
+ outputStream.close();
+ } catch (final IOException e) {
+ throw new RuntimeIOException(e);
+ } catch (final IllegalAccessException e) {
throw new RuntimeException(e);
diff --git a/src/java/htsjdk/samtools/CRAMIndexer.java b/src/java/htsjdk/samtools/CRAMIndexer.java
index 338874f..eec8c31 100755
--- a/src/java/htsjdk/samtools/CRAMIndexer.java
+++ b/src/java/htsjdk/samtools/CRAMIndexer.java
@@ -46,7 +46,6 @@ import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
import htsjdk.samtools.util.Log;
-import org.testng.Assert;
import java.io.File;
import java.io.IOException;
@@ -145,39 +144,6 @@ public class CRAMIndexer {
- * Generates a BAM index file, either textual or binary, from an input BAI file.
- * Only used for testing, but located here for visibility into CachingBAMFileIndex.
- *
- * @param output BAM Index (.bai) file (or bai.txt file when text)
- * @param textOutput Whether to create text output or binary
- */
- static public void createAndWriteIndex(final File input, final File output, final boolean textOutput) {
- // content is from an existing bai file.
- final CachingBAMFileIndex existingIndex = new CachingBAMFileIndex(input, null);
- final int nRef = existingIndex.getNumberOfReferences();
- final BAMIndexWriter outputWriter;
- if (textOutput) {
- outputWriter = new TextualBAMIndexWriter(nRef, output);
- } else {
- outputWriter = new BinaryBAMIndexWriter(nRef, output);
- }
- // write the content one reference at a time
- try {
- for (int i = 0; i < nRef; i++) {
- outputWriter.writeReference(existingIndex.getQueryResults(i));
- }
- outputWriter.writeNoCoordinateRecordCount(existingIndex.getNoCoordinateCount());
- outputWriter.close();
- } catch (final Exception e) {
- throw new SAMException("Exception creating BAM index", e);
- }
- }
- /**
* Class for constructing BAM index files.
* One instance is used to construct an entire index.
* processAlignment is called for each alignment until a new reference is encountered, then
@@ -408,7 +374,7 @@ public class CRAMIndexer {
} catch (final IOException e) {
- Assert.fail("Failed to read cram container", e);
+ throw new RuntimeException("Failed to read cram container", e);
} while (!container.isEOF());
diff --git a/src/java/htsjdk/samtools/CRAMIterator.java b/src/java/htsjdk/samtools/CRAMIterator.java
index fc8915f..6e08f05 100644
--- a/src/java/htsjdk/samtools/CRAMIterator.java
+++ b/src/java/htsjdk/samtools/CRAMIterator.java
@@ -41,6 +41,8 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
+import htsjdk.samtools.cram.CRAMException;
public class CRAMIterator implements SAMRecordIterator {
private static final Log log = Log.getInstance(CRAMIterator.class);
private final CountingInputStream countingInputStream;
@@ -78,10 +80,14 @@ public class CRAMIterator implements SAMRecordIterator {
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;
- public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource)
+ public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource, final ValidationStringency validationStringency)
throws IOException {
+ if (null == referenceSource) {
+ throw new CRAMException("A reference source is required for CRAM files");
+ }
this.countingInputStream = new CountingInputStream(inputStream);
this.referenceSource = referenceSource;
+ this.validationStringency = validationStringency;
final CramContainerIterator containerIterator = new CramContainerIterator(this.countingInputStream);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
@@ -93,10 +99,14 @@ public class CRAMIterator implements SAMRecordIterator {
parser = new ContainerParser(cramHeader.getSamFileHeader());
- public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
+ public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates, final ValidationStringency validationStringency)
throws IOException {
+ if (null == referenceSource) {
+ throw new CRAMException("A reference source is required for CRAM files");
+ }
this.countingInputStream = new CountingInputStream(seekableStream);
this.referenceSource = referenceSource;
+ this.validationStringency = validationStringency;
final CramSpanContainerIterator containerIterator = CramSpanContainerIterator.fromFileSpan(seekableStream, coordinates);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
@@ -108,12 +118,18 @@ public class CRAMIterator implements SAMRecordIterator {
parser = new ContainerParser(cramHeader.getSamFileHeader());
+ @Deprecated
+ public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
+ throws IOException {
+ this(seekableStream, referenceSource, coordinates, ValidationStringency.DEFAULT_STRINGENCY);
+ }
public CramHeader getCramHeader() {
return cramHeader;
private void nextContainer() throws IOException, IllegalArgumentException,
- IllegalAccessException {
+ IllegalAccessException, CRAMException {
if (containerIterator != null) {
if (!containerIterator.hasNext()) {
@@ -145,7 +161,7 @@ public class CRAMIterator implements SAMRecordIterator {
- parser.getRecords(container, cramRecords);
+ parser.getRecords(container, cramRecords, validationStringency);
if (container.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
refs = new byte[]{};
@@ -156,6 +172,9 @@ public class CRAMIterator implements SAMRecordIterator {
final SAMSequenceRecord sequence = cramHeader.getSamFileHeader()
refs = referenceSource.getReferenceBases(sequence, true);
+ if (refs == null) {
+ throw new CRAMException(String.format("Contig %s not found in the reference file.", sequence.getSequenceName()));
+ }
prevSeqId = container.sequenceId;
@@ -242,7 +261,12 @@ public class CRAMIterator implements SAMRecordIterator {
if (!iterator.hasNext()) {
try {
- } catch (final Exception e) {
+ } catch (CRAMException ce) {
+ throw ce;
+ } catch (SAMFormatException se) {
+ throw se;
+ }
+ catch (final Exception e) {
throw new RuntimeEOFException(e);
diff --git a/src/java/htsjdk/samtools/ChainedDownsamplingIterator.java b/src/java/htsjdk/samtools/ChainedDownsamplingIterator.java
new file mode 100644
index 0000000..4fa3a7d
--- /dev/null
+++ b/src/java/htsjdk/samtools/ChainedDownsamplingIterator.java
@@ -0,0 +1,90 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+ * A DownsamplingIterator that combines the ConstantMemory and HighAccuracy downsampling techniques to provide an
+ * iterator that has accuracy approaching that of HighAccuracy, but with more limited memory usage. Instead of
+ * requiring memory proportional to number of read names in the incoming stream of reads, requires memory
+ * approximately proportional to the number of output reads.
+ *
+ * @author Tim Fennell
+ */
+class ChainedDownsamplingIterator extends HighAccuracyDownsamplingIterator {
+ public static final int MIN_ACCURATE_INPUT_READS = 50000;
+ /**
+ * Constructs a chained iterator that will read from the provided iterator and attempt to downsampling to the provided proportion.
+ */
+ ChainedDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) {
+ super(new ConstantMemoryDownsamplingIterator(iterator, adjustProportion(proportion), seed), proportion, seed);
+ // Deal with the fact that the iterator will advance and discard some reads at construction
+ final long discarded = ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).getDiscardedCount();
+ recordDiscardRecords(discarded);
+ }
+ /**
+ * Calculates the upper bound of 99.9% CI given the proportion, that is used to "buffer" the proportion on
+ * the constant memory downsampler, to make sure it leaves enough reads for us to downsample.
+ *
+ * Uses an assumed number of reads tested as this is often not known until after the fact.
+ */
+ private static double adjustProportion(final double p) {
+ final double ciAdjustment99_9 = 3.3 * Math.sqrt(p/MIN_ACCURATE_INPUT_READS);
+ return Math.min(1, p + ciAdjustment99_9);
+ }
+ /**
+ * Resets statistics before reading from the underlying iterator.
+ */
+ @Override
+ protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) {
+ // Reset the stats on the underlying iterator
+ ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics();
+ // Read from the underlying iterator
+ super.readFromUnderlyingIterator(recs, names, templatesToRead);
+ }
+ @Override
+ protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) {
+ // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded
+ final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator();
+ final double priorProportion = iter.getAcceptedFraction();
+ final double p = Math.max(0, Math.min(1, overallProportion / priorProportion));
+ final int retval = super.calculateTemplatesToKeep(templatesRead, p);
+ // Record all the discarded records to keep the overall statistics accurate, but do it after
+ // the call to super() so it doesn't affect the proportion calculation.
+ recordDiscardRecords(iter.getDiscardedCount());
+ return retval;
+ }
diff --git a/src/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java b/src/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java
new file mode 100644
index 0000000..c6e0de4
--- /dev/null
+++ b/src/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java
@@ -0,0 +1,88 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools;
+import htsjdk.samtools.util.Murmur3;
+import htsjdk.samtools.util.PeekableIterator;
+import java.util.Iterator;
+ * A DownsamplingIterator that runs in constant (and very small) memory. For each read the read name is hashed
+ * using the Murmur3_32 hash algorithm to obtain an integer value that is, enough for our purposes, uniformly
+ * distributed between the min and max int values even for highly similar inputs. The proportion is used to
+ * calculate a maximum acceptable hash value within the range. Records whose hash value is below the limit
+ * are emitted, records whose hash value is above the limit are discarded.
+ *
+ * Does not make any attempt to be accurate (have actual proportion == requested proportion) beyond what would
+ * be expected for a random process and so may become quite inaccurate when downsampling to small numbers of
+ * reads.
+ *
+ * @author Tim Fennell
+ */
+class ConstantMemoryDownsamplingIterator extends DownsamplingIterator {
+ private final PeekableIterator<SAMRecord> underlyingIterator;
+ private final int maxHashValue;
+ private final Murmur3 hasher;
+ /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */
+ ConstantMemoryDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) {
+ super(proportion);
+ this.hasher = new Murmur3(seed);
+ this.underlyingIterator = new PeekableIterator<SAMRecord>(iterator);
+ final long range = (long) Integer.MAX_VALUE - (long) Integer.MIN_VALUE;
+ this.maxHashValue = Integer.MIN_VALUE + (int) Math.round(range * proportion);
+ advanceToNextAcceptedRead();
+ }
+ /** Returns true if there is another record available post-downsampling, false otherwise. */
+ @Override public boolean hasNext() {
+ // The underlying iterator is always left at the next return-able read, so if it has a next read, so do we
+ return this.underlyingIterator.hasNext();
+ }
+ /**
+ * Advances the underlying, peekable, iterator until the next records is one that is to be emitted.
+ * @return true if there is at least one emittable record ready for emission, false otherwise
+ */
+ private boolean advanceToNextAcceptedRead() {
+ while (this.underlyingIterator.hasNext() && this.hasher.hashUnencodedChars(this.underlyingIterator.peek().getReadName()) > this.maxHashValue) {
+ this.underlyingIterator.next();
+ recordDiscardedRecord();
+ }
+ return this.underlyingIterator.hasNext();
+ }
+ /** Returns the next record from the iterator, or throws an exception if there is no next record. */
+ @Override public SAMRecord next() {
+ final SAMRecord rec = this.underlyingIterator.next();
+ recordAcceptedRecord();
+ advanceToNextAcceptedRead();
+ return rec;
+ }
diff --git a/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java b/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java
index 2f23a48..8a6077a 100644
--- a/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java
+++ b/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java
@@ -18,7 +18,11 @@ public class DefaultSAMRecordFactory implements SAMRecordFactory {
return new SAMRecord(header);
- /** Create a new BAM Record. */
+ /**
+ * Create a new BAM Record. If the reference sequence index or mate reference sequence index are
+ * any value other than NO_ALIGNMENT_REFERENCE_INDEX, the values must be resolvable against the sequence
+ * dictionary in the header argument.
+ */
public BAMRecord createBAMRecord (final SAMFileHeader header,
final int referenceSequenceIndex,
final int alignmentStart,
diff --git a/src/java/htsjdk/samtools/DownsamplingIterator.java b/src/java/htsjdk/samtools/DownsamplingIterator.java
index 4ae8ffb..8ca0d84 100644
--- a/src/java/htsjdk/samtools/DownsamplingIterator.java
+++ b/src/java/htsjdk/samtools/DownsamplingIterator.java
@@ -25,127 +25,89 @@ package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Random;
- * An iterator of SAMRecords that can downsample on the fly. Allows for inclusion of secondary and/or
- * supplemental records (off by default), though this will cause memory use to increase as the decisions
- * for each read name must be cached permanently.
+ * Abstract base class for all DownsamplingIterators that provides a uniform interface for recording
+ * and reporting statistics bout how many records have been kept and discarded.
+ *
+ * A DownsamplingIterator is an iterator that takes another iterator of SAMRecords and filters out a
+ * subset of those records in a random way, while ensuring that all records for a template (i.e. record name)
+ * are either retained or discarded. Strictly speaking the proportion parameter applies to templates,
+ * though in most instances it is safe to think about it being applied to records.
* @author Tim Fennell
-public class DownsamplingIterator implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
- private final Iterator<SAMRecord> underlyingIterator;
- private final Random random;
- private final double probabilityOfKeeping;
- private SAMRecord nextRecord;
- private long totalReads, keptReads;
- private final Map<String, Boolean> decisions = new HashMap<String, Boolean>();
- private boolean allowSecondaryAlignments = false;
- private boolean allowSupplementalAlignments = false;
- private boolean includeNoRefReads = true;
- /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */
- public DownsamplingIterator(final Iterator<SAMRecord> iterator, final Random random, final double probabilityOfKeeping) {
- this.underlyingIterator = iterator;
- this.random = random;
- this.probabilityOfKeeping = probabilityOfKeeping;
+public abstract class DownsamplingIterator implements CloseableIterator<SAMRecord> {
+ private long recordsSeen;
+ private long recordsAccepted;
+ private double targetProportion;
+ /** Constructs a downsampling iterator that aims to retain the targetProportion of reads. */
+ public DownsamplingIterator(final double targetProportion) {
+ if (targetProportion < 0) throw new IllegalArgumentException("targetProportion must be >= 0");
+ if (targetProportion > 1) throw new IllegalArgumentException("targetProportion must be <= 1");
+ this.targetProportion = targetProportion;
- /** Sets whether or not secondary alignments are allowed (true) or all discarded (false). */
- public DownsamplingIterator setAllowSecondaryAlignments(final boolean allowSecondaryAlignments) {
- this.allowSecondaryAlignments = allowSecondaryAlignments;
- return this;
- }
+ /** Does nothing. */
+ @Override public void close() { /** No Op. */ }
- /** Sets whether or not supplemental alignments are allowed (true) or all discarded (false). */
- public DownsamplingIterator setAllowSupplementalAlignments(final boolean allowSupplementalAlignments) {
- this.allowSupplementalAlignments = allowSupplementalAlignments;
- return this;
- }
+ /** Returns the number of records seen, including accepted and discarded, since creation of the last call to resetStatistics. */
+ public long getSeenCount() { return this.recordsSeen; }
- /** Sets whether the iterator will stop when no-ref reads are encountered, or keep downsampling through them. */
- public DownsamplingIterator setIncludeNoRefReads(final boolean includeNoRefReads) {
- this.includeNoRefReads = includeNoRefReads;
- return this;
- }
+ /** Returns the number of records returned since creation of the last call to resetStatistics. */
+ public long getAcceptedCount() { return this.recordsAccepted; }
+ /** Returns the number of records discarded since creation of the last call to resetStatistics. */
+ public long getDiscardedCount() { return this.recordsSeen - this.recordsAccepted; }
- /** Returns the total number of reads/records considered up to the point when the method is called. */
- public long getTotalReads() { return totalReads; }
+ /** Gets the fraction of records discarded since creation or the last call to resetStatistics(). */
+ public double getDiscardedFraction() { return getDiscardedCount() / (double) getSeenCount(); }
- /** Returns the number of reads/records kept post-downsampling up to the point when the method is called. */
- public long getKeptReads() { return keptReads; }
+ /** Gets the fraction of records accepted since creation or the last call to resetStatistics(). */
+ public double getAcceptedFraction() { return getAcceptedCount() / (double) getSeenCount(); }
- /** Simple implementation of iterable that returns this iterator. */
- @Override public Iterator<SAMRecord> iterator() { return this; }
+ /** Resets the statistics for records seen/accepted/discarded. */
+ public void resetStatistics() {
+ this.recordsSeen = 0;
+ this.recordsAccepted = 0;
+ }
+ /** Gets the target proportion of records that should be retained during downsampling. */
+ public double getTargetProportion() {
+ return targetProportion;
+ }
+ /** Method for subclasses to record a record as being discarded. */
+ protected final void recordDiscardedRecord() { this.recordsSeen++; }
- * Clears the current record and attempts to advance through the underlying iterator until a
- * record is kept during downsampling. If no more records are kept and the end of the input
- * is reached this.nextRecord will be null.
- *
- * @return true if a record is available after advancing, false otherwise
+ * Method for subclasses to record a specific record as being accepted. Null may be passed if a record
+ * was discarded but access to the object is no longer available.
- private boolean advance() {
- this.nextRecord = null;
- final boolean oneRecPerRead = !allowSecondaryAlignments && !allowSupplementalAlignments;
- while (this.nextRecord == null && this.underlyingIterator.hasNext()) {
- final SAMRecord rec = this.underlyingIterator.next();
- if (!this.allowSecondaryAlignments && rec.getNotPrimaryAlignmentFlag()) continue;
- if (!this.allowSupplementalAlignments && rec.getSupplementaryAlignmentFlag()) continue;
- if (!this.includeNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) break;
- ++totalReads;
- final String key = rec.getReadName();
- final Boolean previous = oneRecPerRead ? decisions.remove(key) : decisions.get(key);
- final boolean keeper;
- if (previous == null) {
- keeper = this.random.nextDouble() <= this.probabilityOfKeeping;
- if (rec.getReadPairedFlag() || this.allowSecondaryAlignments || this.allowSupplementalAlignments) decisions.put(key, keeper);
- }
- else {
- keeper = previous;
- }
- if (keeper) {
- this.nextRecord = rec;
- ++keptReads;
- }
- }
- return this.nextRecord != null;
- }
+ protected final void recordAcceptedRecord() { this.recordsSeen++; this.recordsAccepted++; }
- /** Returns true if there is another record available post-downsampling, false otherwise. */
- @Override public boolean hasNext() {
- return this.nextRecord != null || advance();
+ /** Record one or more records as having been discarded. */
+ protected final void recordDiscardRecords(final long n) {
+ this.recordsSeen += n;
- /** Returns the next record from the iterator, or throws an exception if there is no next record. */
- @Override public SAMRecord next() {
- if (this.nextRecord == null) {
- throw new NoSuchElementException("Call to next() when hasNext() == false");
- }
- else {
- final SAMRecord retval = this.nextRecord;
- advance();
- return retval;
- }
+ /** Record one or more records as having been discarded. */
+ protected final void recordAcceptedRecords(final long n) {
+ this.recordsSeen += n;
+ this.recordsAccepted += n;
- /** Unsupported operation. */
- @Override public void remove() {
- throw new UnsupportedOperationException("remove() is not supported.");
+ /**
+ * Indicates whether or not the strategy implemented by this DownsamplingIterator makes any effort to
+ * increase accuracy beyond random sampling (i.e. to reduce the delta between the requested proportion
+ * of reads and the actually emitted proportion of reads).
+ */
+ public boolean isHigherAccuracy() {
+ return false;
- @Override public void close() {
- // Do nothing.
+ /** Not supported. */
+ @Override public void remove() {
+ throw new UnsupportedOperationException("remove() not supported in DownsamplingIterators");
diff --git a/src/java/htsjdk/samtools/DownsamplingIteratorFactory.java b/src/java/htsjdk/samtools/DownsamplingIteratorFactory.java
new file mode 100644
index 0000000..d54e706
--- /dev/null
+++ b/src/java/htsjdk/samtools/DownsamplingIteratorFactory.java
@@ -0,0 +1,118 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools;
+import htsjdk.samtools.util.IOUtil;
+import java.io.File;
+import java.util.Iterator;
+ * A factory for creating DownsamplingIterators that uses a number of different strategies to achieve downsampling while
+ * meeting various criteria.
+ *
+ * @author Tim Fennell
+ */
+public class DownsamplingIteratorFactory {
+ public static final String HIGH_ACCURACY_DESCRIPTION =
+ "Attempts (but does not guarantee) to provide accuracy up to a specified limit. Accuracy is defined as emitting " +
+ "a proportion of reads as close to the requested proportion as possible. In order to do so this strategy requires " +
+ "memory that is proportional to the number of template names in the incoming stream of reads, and will thus require " +
+ "large amounts of memory when running on large input files.";
+ public static final String CONSTANT_MEMORY_DESCRPTION =
+ "Downsamples a stream or file of SAMRecords using a hash-projection strategy such that it can run in constant memory. " +
+ "The downsampling is stochastic, and therefore the actual retained proportion will vary around the requested proportion. Due " +
+ "to working in fixed memory this strategy is good for large inputs, and due to the stochastic nature the accuracy of this strategy " +
+ "is highest with a high number of output records, and diminishes at low output volumes.";
+ public static final String CHAINED_DESCRIPTION =
+ "Attempts to provide a compromise strategy that offers some of the advantages of both the ConstantMemory and HighAccuracy strategies. " +
+ "Uses a ConstantMemory strategy to downsample the incoming stream to approximately the desired proportion, and then a HighAccuracy " +
+ "strategy to finish. Works in a single pass, and will provide accuracy close to (but often not as good as) HighAccuracy while requiring " +
+ "memory proportional to the set of reads emitted from the ConstantMemory strategy to the HighAccuracy strategy. Works well when downsampling " +
+ "large inputs to small proportions (e.g. downsampling hundreds of millions of reads and retaining only 2%. Should be accurate 99.9% of the time " +
+ "when the input contains >= 50,000 templates (read names). For smaller inputs, HighAccuracy is recommended instead.";
+ /** Describes the available downsampling strategies. */
+ public enum Strategy {
+ public final String description;
+ Strategy(final String description) {
+ this.description = description;
+ }
+ /** Gets the description of the strategy. */
+ public String getDescription() {
+ return description;
+ }
+ }
+ /**
+ * Creates a new DownsamplingIterator using the supplied Strategy that attempts to read from the provided iterator and return
+ * approximately proportion of the records read.
+ *
+ * @param iterator The iterator from which to consume SAMRecords
+ * @param strategy The downsampling strategy to use
+ * @param proportion The proportion of records the downsampling strategy should attempt to emit
+ * @param accuracy If supported by the downsampling strategy, the accuracy goal for the downsampler. Higher accuracy will generally
+ * require higher memory usage. An accuracy value of 0.0001 tells the strategy to try and ensure the emitted proportion
+ * is within proportion +/0 0.0001.
+ * @param seed The seed value to use for any random process used in down-sampling.
+ */
+ public static DownsamplingIterator make(final Iterator<SAMRecord> iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) {
+ if (strategy == null) throw new IllegalArgumentException("strategy may not be null");
+ if (iterator == null) throw new IllegalArgumentException("iterator may not be null");
+ if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0");
+ if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1");
+ switch (strategy) {
+ case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy);
+ case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed);
+ case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy);
+ default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!");
+ }
+ }
+ /**
+ * Convenience method that constructs a downsampling iterator for all the reads in a SAM file.
+ * See {@link DownsamplingIteratorFactory#make(Iterator, Strategy, double, double, int)} for detailed parameter information.
+ */
+ public static DownsamplingIterator make(final File samFile, final Strategy strategy, final double proportion, final double accuracy, final int seed) {
+ IOUtil.assertFileIsReadable(samFile);
+ return make(SamReaderFactory.makeDefault().open(samFile), strategy, proportion, accuracy, seed);
+ }
+ /**
+ * Convenience method that constructs a downsampling iterator for all the reads available from a SamReader.
+ * See {@link DownsamplingIteratorFactory#make(Iterator, Strategy, double, double, int)} for detailed parameter information.
+ */
+ public static DownsamplingIterator make(final SamReader reader, final Strategy strategy, final double proportion, final double accuracy, final int seed) {
+ return make(reader.iterator(), strategy, proportion, accuracy, seed);
+ }
diff --git a/src/java/htsjdk/samtools/DuplicateScoringStrategy.java b/src/java/htsjdk/samtools/DuplicateScoringStrategy.java
index c6e0884..9d0bed5 100644
--- a/src/java/htsjdk/samtools/DuplicateScoringStrategy.java
+++ b/src/java/htsjdk/samtools/DuplicateScoringStrategy.java
@@ -36,6 +36,9 @@ public class DuplicateScoringStrategy {
+ /** An enum to use for storing temporary attributes on SAMRecords. */
+ private static enum Attr { DuplicateScore }
/** Calculates a score for the read which is the sum of scores over Q15. */
private static short getSumOfBaseQualities(final SAMRecord rec) {
short score = 0;
@@ -60,22 +63,30 @@ public class DuplicateScoringStrategy {
* computed on both ends.
public static short computeDuplicateScore(final SAMRecord record, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) {
- short score = 0;
+ Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore);
+ if (storedScore == null) {
+ short score = 0;
- switch (scoringStrategy) {
- score += getSumOfBaseQualities(record);
- break;
- if (!record.getReadUnmappedFlag()) {
- score += record.getCigar().getReferenceLength();
- }
- if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
- score += SAMUtils.getMateCigar(record).getReferenceLength();
- }
- break;
+ switch (scoringStrategy) {
+ score += getSumOfBaseQualities(record);
+ break;
+ if (!record.getReadUnmappedFlag()) {
+ score += record.getCigar().getReferenceLength();
+ }
+ if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
+ score += SAMUtils.getMateCigar(record).getReferenceLength();
+ }
+ break;
+ }
+ storedScore = score;
+ record.setTransientAttribute(Attr.DuplicateScore, storedScore);
- return score;
+ return storedScore;
diff --git a/src/java/htsjdk/samtools/DuplicateSet.java b/src/java/htsjdk/samtools/DuplicateSet.java
index df34526..8333069 100644
--- a/src/java/htsjdk/samtools/DuplicateSet.java
+++ b/src/java/htsjdk/samtools/DuplicateSet.java
@@ -32,21 +32,23 @@ import java.util.List;
* considered the representative of the duplicate, and typically does not have it's duplicate flag set.
* The records' duplicate flag will be set appropriately as records are added. This behavior can be
* turned off.
- *
+ *
* At this time, this set does not track optical duplicates.
* @author nhomer
public class DuplicateSet {
private final List<SAMRecord> records;
private static final SAMRecordDuplicateComparator defaultComparator = new SAMRecordDuplicateComparator();
private final SAMRecordDuplicateComparator comparator;
+ private SAMRecord representative = null;
private boolean needsSorting = false;
private boolean setDuplicateFlag = false;
/** Sets the duplicate flag by default */
@@ -67,7 +69,7 @@ public class DuplicateSet {
this.setDuplicateFlag = setDuplicateFlag;
this.comparator = comparator;
* Adds a record to the set and returns zero if either the set is empty, or it is a duplicate of the records already in the set. Otherwise,
* it does not add the record and returns non-zero.
@@ -77,24 +79,32 @@ public class DuplicateSet {
public int add(final SAMRecord record) {
if (!this.records.isEmpty()) {
- final int cmp = this.comparator.duplicateSetCompare(this.getRepresentative(), record);
+ final int cmp = this.comparator.duplicateSetCompare(this.representative, record);
if (0 != cmp) {
return cmp;
+ // update representative
+ if (0 < this.comparator.compare(this.representative, record)) {
+ this.representative = record;
+ }
+ }
+ else {
+ this.representative = record;
needsSorting = true;
return 0;
private void sort() {
if (!records.isEmpty()) {
- Collections.sort(records, this.comparator);
- final SAMRecord representative = records.get(0);
+ if (1 < records.size()) {
+ Collections.sort(records, this.comparator);
+ }
if (setDuplicateFlag) {
// reset duplicate flags
for (final SAMRecord record : records) {
@@ -104,30 +114,42 @@ public class DuplicateSet {
+ if (!records.get(0).equals(this.representative)) {
+ throw new SAMException("BUG: the representative was not the first record after sorting."
+ + "\nFIRST: " + records.get(0).getSAMString() + "\nSECOND: " + this.representative.getSAMString());
+ }
needsSorting = false; // this could be in the if above if you think hard about it
* Gets the list of records from this set.
+ *
+ * Setting sort to false likely will not yield records in duplicate order within the set.
+ *
+ * @param sort true if we want the records in the duplicate set sorted by duplicate order, false if we do not care about the order.
- public List<SAMRecord> getRecords() {
- if (needsSorting) {
+ public List<SAMRecord> getRecords(final boolean sort) {
+ if (sort && needsSorting) {
return this.records;
+ * Gets the list of records from this set.
+ */
+ public List<SAMRecord> getRecords() {
+ return getRecords(true);
+ }
+ /**
* Gets the representative record according to the duplicate comparator.
public SAMRecord getRepresentative() {
- if (needsSorting) {
- sort();
- }
- return records.get(0);
+ return this.representative;
@@ -149,8 +171,8 @@ public class DuplicateSet {
return n;
- }
+ }
public boolean isEmpty() {
return this.records.isEmpty();
@@ -158,5 +180,5 @@ public class DuplicateSet {
* Controls if we should update the duplicate flag of the records in this set.
- public void setDuplicateFlag(boolean setDuplicateFlag) { this.setDuplicateFlag = setDuplicateFlag; }
+ public void setDuplicateFlag(final boolean setDuplicateFlag) { this.setDuplicateFlag = setDuplicateFlag; }
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java b/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
new file mode 100644
index 0000000..f8561b6
--- /dev/null
+++ b/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
@@ -0,0 +1,196 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Random;
+import java.util.Set;
+ * A DownsamplingIterator that attempts to provide very high accuracy (minimizing the difference between emitted proportion
+ * and requested proportion) at the expense of using memory proportional to the number of reads in the incoming stream.
+ *
+ * @author Tim Fennell
+ */
+class HighAccuracyDownsamplingIterator extends DownsamplingIterator {
+ private final Iterator<SAMRecord> underlyingIterator;
+ private final Random random;
+ private SAMRecord nextRecord;
+ private final Map<String, Boolean> decisions = new HashMap<String, Boolean>();
+ private double targetAccuracy = 0.0001;
+ private long totalTemplates, keptTemplates;
+ private Iterator<SAMRecord> bufferedRecords = new ArrayList<SAMRecord>().iterator();
+ private Set<String> bufferedRecordsToKeep;
+ /** Override method to make it clear that this iterator attempts to provide a higher accuracy of downsampling. */
+ @Override public boolean isHigherAccuracy() {
+ return true;
+ }
+ /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */
+ HighAccuracyDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) {
+ super(proportion);
+ this.underlyingIterator = iterator;
+ this.random = new Random(seed);
+ }
+ /**
+ * Sets the target accuracy of the downsampling iterator. The value should be thought of as
+ * probability +/- accuracy. So a value of 0.001 would instruct the downsampling iterator to
+ * attempt to guarantee at accuracy to within 0.1%. The downsampler will need to buffer reads
+ * for 1/accuracy templates, so setting this to extremely small numbers is not advisable.
+ */
+ public DownsamplingIterator setTargetAccuracy(final double accuracy) {
+ if (accuracy >= 1 || accuracy <= 1d/Integer.MAX_VALUE) throw new IllegalArgumentException("Illegal value. Must be 1/MAX_INT < accuracy < 1");
+ this.targetAccuracy = accuracy;
+ return this;
+ }
+ /** Returns true if there is another record available post-downsampling, false otherwise. */
+ @Override public boolean hasNext() {
+ return this.nextRecord != null || advance();
+ }
+ /** Returns the next record from the iterator, or throws an exception if there is no next record. */
+ @Override public SAMRecord next() {
+ if (this.nextRecord == null) {
+ throw new NoSuchElementException("Call to next() when hasNext() == false");
+ }
+ else {
+ final SAMRecord retval = this.nextRecord;
+ advance();
+ return retval;
+ }
+ }
+ /** Returns the underlying iterator so that subclasses may manipulate it. */
+ protected Iterator<SAMRecord> getUnderlyingIterator() {
+ return this.underlyingIterator;
+ }
+ /**
+ * Clears the current record and attempts to advance through the underlying iterator until a
+ * record is kept during downsampling. If no more records are kept and the end of the input
+ * is reached this.nextRecord will be null.
+ *
+ * @return true if a record is available after advancing, false otherwise
+ */
+ protected boolean advance() {
+ this.nextRecord = null;
+ while (this.nextRecord == null && (this.bufferedRecords.hasNext() || bufferNextChunkOfRecords(getTargetProportion(), this.targetAccuracy))) {
+ final SAMRecord rec = this.bufferedRecords.next();
+ final String key = rec.getReadName();
+ final Boolean previous = decisions.get(key);
+ final boolean keepThisRecord;
+ if (previous == null) {
+ keepThisRecord = this.bufferedRecordsToKeep.contains(rec.getReadName());
+ decisions.put(key, keepThisRecord);
+ }
+ else {
+ keepThisRecord = previous;
+ }
+ if (keepThisRecord) {
+ this.nextRecord = rec;
+ recordAcceptedRecord();
+ }
+ else {
+ recordDiscardedRecord();
+ }
+ }
+ return this.nextRecord != null;
+ }
+ /**
+ * Buffers reads until either the end of the file is reached or enough reads have been buffered such
+ * that downsampling can be performed to the desired target accuracy. Once reads have been buffered,
+ * template names are randomly sampled out for discarding until the desired number of reads have
+ * been discarded.
+ *
+ * @return True if one or more reads have been buffered, false otherwise
+ */
+ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) {
+ final int templatesToRead = (int) Math.ceil(1 / accuracy);
+ final Set<String> names = new HashSet<String>();
+ final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead);
+ readFromUnderlyingIterator(recs, names, templatesToRead);
+ // Determine how many templates to keep/discard
+ final int templatesRead = names.size();
+ final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion);
+ // Randomly shuffle a list of all the template names, and then remove some from the set
+ final int templatesToDiscard = templatesRead - templatesToKeep;
+ final List<String> tmp = new ArrayList<String>(names);
+ Collections.shuffle(tmp, this.random);
+ for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i));
+ // Set all the instance state so that advance()/next() get what they need
+ this.bufferedRecordsToKeep = names;
+ this.bufferedRecords = recs.iterator();
+ this.totalTemplates += templatesRead;
+ this.keptTemplates += names.size();
+ return recs.size() > 0;
+ }
+ /**
+ * Calculates the number of templates to keep in a specific batch of reads having just read templatesRead reads
+ * and wanting to keep proportion of them. Rounds the final number up or down based on whether, to this point,
+ * the iterator is under or over it's goal proportion.
+ *
+ * Implemented as second method to allow ChainedDownsamplingIterator to tamper with the strategy!
+ */
+ protected int calculateTemplatesToKeep(final int templatesRead, final double proportion) {
+ final double rawTemplatesToKeep = templatesRead * proportion;
+ return (keptTemplates / (double) totalTemplates < proportion)
+ ? (int) Math.ceil(rawTemplatesToKeep) : (int) Math.floor(rawTemplatesToKeep);
+ }
+ /**
+ * Reads from the underlying iterator until it has observed templatesToRead templates (i.e. read names) that it has not yet
+ * observed, so that templatesToRead new keep/reject decisions can be made. The records that are read are placed into recs
+ * and _novel_ template names are placed into names.
+ */
+ protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) {
+ while (this.underlyingIterator.hasNext() && names.size() < templatesToRead) {
+ final SAMRecord rec = this.underlyingIterator.next();
+ recs.add(rec);
+ if (this.decisions.containsKey(rec.getReadName())) continue;
+ names.add(rec.getReadName());
+ }
+ }
diff --git a/src/java/htsjdk/samtools/MergingSamRecordIterator.java b/src/java/htsjdk/samtools/MergingSamRecordIterator.java
index 245ab64..63d0d26 100644
--- a/src/java/htsjdk/samtools/MergingSamRecordIterator.java
+++ b/src/java/htsjdk/samtools/MergingSamRecordIterator.java
@@ -124,6 +124,7 @@ public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
final ComparableSamRecordIterator iterator = this.pq.poll();
final SAMRecord record = iterator.next();
+ // this will resolve the reference indices against the new, merged header
// Fix the read group if needs be
@@ -144,17 +145,6 @@ public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
- // Fix up the sequence indexes if needs be
- if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
- if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(), record.getReferenceIndex()));
- }
- if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(), record.getMateReferenceIndex()));
- }
- }
return record;
diff --git a/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java b/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
index ae53b8c..e74e106 100644
--- a/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
+++ b/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
@@ -34,13 +34,25 @@ public class SAMBinaryTagAndUnsignedArrayValue extends SAMBinaryTagAndValue {
super(tag, value);
- /** Creates and returns a deep copy of the list of tag/values. */
+ /** Creates and returns a shallow copy of the list of tag/values. */
+ @Override
public SAMBinaryTagAndValue copy() {
final SAMBinaryTagAndValue retval = new SAMBinaryTagAndUnsignedArrayValue(this.tag, this.value);
if (next != null) retval.next = next.copy();
return retval;
+ /** Creates and returns a deep copy of the list of tag/values. */
+ @Override
+ public SAMBinaryTagAndValue deepCopy() {
+ final SAMBinaryTagAndValue retval = new SAMBinaryTagAndUnsignedArrayValue(this.tag, cloneValue());
+ if (next != null) {
+ retval.next = next.deepCopy();
+ }
+ return retval;
+ }
public boolean isUnsignedArray() {
return true;
diff --git a/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java b/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
index 7a409e8..70011f9 100644
--- a/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
+++ b/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
@@ -45,8 +45,12 @@ public class SAMBinaryTagAndValue implements Serializable {
* @param tag tagname (in binary form) for this attribute
* @param value value for this attribute (must be of a type that implements {@link Serializable} or else serialization will fail)
+ * Cannot be null.
public SAMBinaryTagAndValue(final short tag, final Object value) {
+ if (null == value) {
+ throw new IllegalArgumentException("SAMBinaryTagAndValue value may not be null");
+ }
this.tag = tag;
this.value = value;
@@ -76,13 +80,48 @@ public class SAMBinaryTagAndValue implements Serializable {
return result;
- /** Creates and returns a deep copy of the list of tag/values. */
+ /** Creates and returns a shallow copy of the list of tag/values. */
public SAMBinaryTagAndValue copy() {
final SAMBinaryTagAndValue retval = new SAMBinaryTagAndValue(this.tag, this.value);
- if (next != null) retval.next = next.copy();
+ if (next != null) {
+ retval.next = next.copy();
+ }
return retval;
+ /** Creates and returns a deep copy of the list of tag/values. */
+ public SAMBinaryTagAndValue deepCopy() {
+ final SAMBinaryTagAndValue retval = new SAMBinaryTagAndValue(this.tag, cloneValue());
+ if (next != null) {
+ retval.next = next.deepCopy();
+ }
+ return retval;
+ }
+ /* Create and return a clone of value object */
+ protected Object cloneValue() {
+ Object valueClone;
+ if (value instanceof byte[]) {
+ valueClone = ((byte[]) value).clone();
+ }
+ else if (value instanceof short[]) {
+ valueClone = ((short[]) value).clone();
+ }
+ else if (value instanceof int[]) {
+ valueClone = ((int[]) value).clone();
+ }
+ else if (value instanceof float[]) {
+ valueClone = ((float[]) value).clone();
+ }
+ else {
+ // otherwise, the api limits the remaining possible value types to
+ // immutable (String or boxed primitive) types
+ valueClone = value;
+ }
+ return valueClone;
+ }
// The methods below are for implementing a light-weight, single-direction linked list
public SAMBinaryTagAndValue getNext() { return this.next; }
diff --git a/src/java/htsjdk/samtools/SAMFileSpan.java b/src/java/htsjdk/samtools/SAMFileSpan.java
index 9da77bc..4122b24 100644
--- a/src/java/htsjdk/samtools/SAMFileSpan.java
+++ b/src/java/htsjdk/samtools/SAMFileSpan.java
@@ -23,13 +23,6 @@
package htsjdk.samtools;
-import htsjdk.samtools.util.StringUtil;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
* A interface representing a collection of (possibly) discontinuous segments in the
* BAM file, possibly representing the results of an index query.
@@ -54,240 +47,3 @@ public interface SAMFileSpan extends Cloneable {
public boolean isEmpty();
- * An ordered list of chunks, capable of representing a set of discontiguous
- * regions in the BAM file. FileSpans are mutable within the package, but perceived
- * as immutable outside the package.
- *
- * Some operations on FileSpans assume that the spans are sorted. In these cases,
- * sort order will be validated.
- *
- * @author mhanna
- * @version 0.1
- */
-class BAMFileSpan implements SAMFileSpan, Serializable {
- private static final long serialVersionUID = 1L;
- /**
- * The constituent chunks of this list.
- */
- private final List<Chunk> chunks;
- /**
- * Create a new empty list of chunks.
- */
- protected BAMFileSpan() {
- this.chunks = new ArrayList<Chunk>();
- }
- /**
- * Convenience constructor to construct a BAM file span from
- * a single chunk.
- * @param chunk Chunk to use as the sole region in this span.
- */
- protected BAMFileSpan(final Chunk chunk) {
- this.chunks = new ArrayList<Chunk>();
- chunks.add(chunk);
- }
- /**
- * Create a new chunk list from the given list of chunks.
- * @param chunks Constituent chunks.
- */
- protected BAMFileSpan(final List<Chunk> chunks) {
- this.chunks = new ArrayList<Chunk>(chunks);
- }
- /**
- * Does this chunk list map to any position within the BAM file?
- * @return True iff the ChunkList points to any data within the BAM.
- */
- public boolean isEmpty() {
- return chunks.isEmpty();
- }
- /**
- * Deep clone the given chunk list.
- * @return A copy of the chunk list.
- */
- public BAMFileSpan clone() {
- final BAMFileSpan clone = new BAMFileSpan();
- for(final Chunk chunk: chunks)
- clone.chunks.add(chunk.clone());
- return clone;
- }
- /**
- * Remove all chunks in this file span before the given file span starts.
- * If a chunk in the chunk list starts before and ends after the given
- * chunk, the first portion of the chunk will be deleted.
- * @param fileSpan The filespan before which to eliminate.
- * @return The portion of the chunk list after the given chunk.
- */
- public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
- if(fileSpan == null)
- return clone();
- if(!(fileSpan instanceof BAMFileSpan))
- throw new SAMException("Unable to compare ");
- final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;
- if(bamFileSpan.isEmpty())
- return clone();
- validateSorted();
- final BAMFileSpan trimmedChunkList = new BAMFileSpan();
- for(final Chunk chunkToTrim: chunks) {
- if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) {
- if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) {
- // This chunk from the list is completely beyond the start of the filtering chunk.
- trimmedChunkList.add(chunkToTrim.clone());
- }
- else {
- // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
- trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
- }
- }
- }
- return trimmedChunkList;
- }
- /**
- * Gets a file span over the data immediately following this span.
- * @return The a pointer to data immediately following this span.
- */
- public SAMFileSpan getContentsFollowing() {
- if(chunks.isEmpty())
- throw new SAMException("Unable to get the file pointer following this one: no data present.");
- validateSorted();
- return new BAMFileSpan(new Chunk(chunks.get(chunks.size()-1).getChunkEnd(),Long.MAX_VALUE));
- }
- /**
- * Merge one span into another
- *
- * @param span - span with chunks to add to this one
- */
- public void add(final BAMFileSpan span) {
- for (final Chunk c : span.chunks) {
- chunks.add(c);
- }
- }
- /**
- * Adds a new chunk to this list. Visible only within the BAm.
- * @param chunk Chunk to add.
- */
- protected void add(final Chunk chunk) {
- chunks.add(chunk);
- }
- /**
- * Convert the chunk list to an array of offsets, paired in [start,end) format.
- * @return Array of offsets.
- */
- protected long[] toCoordinateArray() {
- final int count = chunks.size() * 2;
- if (count == 0) {
- return null;
- }
- int index = 0;
- final long[] result = new long[count];
- for (final Chunk chunk : chunks) {
- result[index++] = chunk.getChunkStart();
- result[index++] = chunk.getChunkEnd();
- }
- return result;
- }
- /**
- * Find the first offset in the chunk list
- * @return The first offset in the span
- */
- protected long getFirstOffset() {
- final long result = 0;
- if (chunks == null){
- return result;
- }
- for (final Chunk chunk : chunks) {
- return chunk.getChunkStart();
- }
- return result;
- }
- /**
- * Gets the constituent chunks stored in this span.
- * @return An unmodifiable list of chunks.
- */
- protected List<Chunk> getChunks() {
- return Collections.unmodifiableList(chunks);
- }
- /**
- * Checks that there is only a single chunk for this span and returns it.
- * @return The single chunk stored in this span
- */
- protected Chunk getSingleChunk() {
- if (chunks.size() != 1){
- throw new SAMException("Expecting a single chunk for span. Found " + chunks.size());
- }
- return chunks.get(0);
- }
- /**
- * The list of chunks is often represented as an array of
- * longs where every even-numbered index is a start coordinate
- * and every odd-numbered index is a stop coordinate. Convert
- * from that format back to a list of chunks.
- * @param coordinateArray List of chunks to convert.
- * @return A list of chunks.
- */
- protected static SAMFileSpan toChunkList(final long[] coordinateArray) {
- if(coordinateArray.length % 2 != 0)
- throw new SAMException("Data supplied does not appear to be in coordinate array format.");
- final BAMFileSpan chunkList = new BAMFileSpan();
- for(int i = 0; i < coordinateArray.length; i += 2)
- chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1]));
- chunkList.validateSorted();
- return chunkList;
- }
- /**
- * Validates the list of chunks to ensure that they appear in sorted order.
- */
- private void validateSorted() {
- for(int i = 1; i < chunks.size(); i++) {
- if(chunks.get(i).getChunkStart() < chunks.get(i-1).getChunkEnd())
- throw new SAMException(String.format("Chunk list is unsorted; chunk %s is before chunk %s",chunks.get(i-1),chunks.get(i)));
- }
- }
- /**
- * Creates a string representation of this chunk list.
- */
- @Override
- public String toString() {
- return StringUtil.join(";", chunks);
- }
- /**
- *
- * @return A single BAMFileSpan that is an intelligent merge of the input spans, i.e. contiguous, overlapping
- * and contained chunks are intelligently merged, and the chunks are sorted.
- */
- public static BAMFileSpan merge(final BAMFileSpan[] spans) {
- final ArrayList<Chunk> inputChunks = new ArrayList<Chunk>();
- for (final BAMFileSpan span : spans) {
- if(span != null){
- inputChunks.addAll(span.chunks);
- }
- }
- return new BAMFileSpan(Chunk.optimizeChunkList(inputChunks, 0));
- }
diff --git a/src/java/htsjdk/samtools/SAMFileWriterFactory.java b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
index c32cf97..0566df1 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterFactory.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
@@ -26,20 +26,20 @@ package htsjdk.samtools;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Md5CalculatingOutputStream;
import htsjdk.samtools.util.RuntimeIOException;
-import java.io.BufferedOutputStream;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
- * Create a SAMFileWriter for writing SAM or BAM.
+ * Create a writer for writing SAM, BAM, or CRAM files.
public class SAMFileWriterFactory {
+ private final static Log log = Log.getInstance(SAMFileWriterFactory.class);
private static boolean defaultCreateIndexWhileWriting = Defaults.CREATE_INDEX;
private boolean createIndex = defaultCreateIndexWhileWriting;
private static boolean defaultCreateMd5File = Defaults.CREATE_MD5;
@@ -82,7 +82,7 @@ public class SAMFileWriterFactory {
* Convenience method allowing newSAMFileWriterFactory().setCreateIndex(true);
* Equivalent to SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(true); newSAMFileWriterFactory();
- * If a BAM (not SAM) file is created, the setting is true, and the file header specifies coordinate order,
+ * If a BAM or CRAM (not SAM) file is created, the setting is true, and the file header specifies coordinate order,
* then a BAM index file will be written along with the BAM file.
* @param setting whether to attempt to create a BAM index while creating the BAM file.
@@ -170,14 +170,14 @@ public class SAMFileWriterFactory {
try {
final boolean createMd5File = this.createMd5File && IOUtil.isRegularPath(outputFile);
if (this.createMd5File && !createMd5File) {
- System.err.println("Cannot create MD5 file for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ log.warn("Cannot create MD5 file for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
OutputStream os = IOUtil.maybeBufferOutputStream(new FileOutputStream(outputFile, false), bufferSize);
if (createMd5File) os = new Md5CalculatingOutputStream(os, new File(outputFile.getAbsolutePath() + ".md5"));
final BAMFileWriter ret = new BAMFileWriter(os, outputFile, compressionLevel);
final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
if (this.createIndex && !createIndex) {
- System.err.println("Cannot create index for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ log.warn("Cannot create index for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
if (this.tmpDir != null) ret.setTempDirectory(this.tmpDir);
initializeBAMWriter(ret, header, presorted, createIndex);
@@ -293,46 +293,133 @@ public class SAMFileWriterFactory {
return makeBAMWriter(header, presorted, outputFile);
+ /**
+ *
+ * Create a SAM, BAM or CRAM writer based on examination of the outputFile extension.
+ *
+ * @param header header. Sort order is determined by the sortOrder property of this arg.
+ * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return SAMFileWriter appropriate for the file type specified in outputFile
+ *
+ */
public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) {
- if (outputFile.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension()))
- try {
- return makeCRAMWriter(header, new FileOutputStream(outputFile), referenceFasta);
- } catch (final FileNotFoundException e) {
- throw new RuntimeIOException(e);
- }
- return makeSAMOrBAMWriter(header, presorted, outputFile);
+ if (outputFile.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
+ return makeCRAMWriter(header, presorted, outputFile, referenceFasta);
+ }
+ else {
+ return makeSAMOrBAMWriter(header, presorted, outputFile);
+ }
+ /**
+ * Create a CRAMFileWriter on an output stream. Requires the input to be presorted to match the sort order defined
+ * by the input header.
+ *
+ * Note: does not honor factory settings for CREATE_MD5, CREATE_INDEX, USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param stream where to write the output.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ */
public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final OutputStream stream, final File referenceFasta) {
+ // create the CRAMFileWriter directly without propagating factory settings
final CRAMFileWriter writer = new CRAMFileWriter(stream, new ReferenceSource(referenceFasta), header, null);
- writer.setPreserveReadNames(true);
- writer.setCaptureAllTags(true);
+ setCRAMWriterDefaults(writer);
return writer;
+ /**
+ * Create a CRAMFileWriter on an output file. Requires input record to be presorted to match the
+ * sort order defined by the input header.
+ *
+ * Note: does not honor factory settings for USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ *
+ */
public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final File outputFile, final File referenceFasta) {
+ return createCRAMWriterWithSettings(header, true, outputFile, referenceFasta);
+ }
- final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
- if (this.createIndex && !createIndex) {
- System.err.println("Cannot create index for CAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ /**
+ * Create a CRAMFileWriter on an output file.
+ *
+ * Note: does not honor factory setting for USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ *
+ */
+ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) {
+ return createCRAMWriterWithSettings(header, presorted, outputFile, referenceFasta);
+ }
+ /**
+ * Create a CRAMFileWriter on an output file based on factory settings.
+ *
+ * Note: does not honor the factory setting for USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ */
+ private CRAMFileWriter createCRAMWriterWithSettings(
+ final SAMFileHeader header,
+ final boolean presorted,
+ final File outputFile,
+ final File referenceFasta) {
+ OutputStream cramOS = null;
+ OutputStream indexOS = null ;
+ if (createIndex) {
+ if (!IOUtil.isRegularPath(outputFile)) {
+ log.warn("Cannot create index for CRAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ }
+ else {
+ try {
+ final File indexFile = new File(outputFile.getAbsolutePath() + BAMIndex.BAMIndexSuffix) ;
+ indexOS = new FileOutputStream(indexFile) ;
+ }
+ catch (final IOException ioe) {
+ throw new RuntimeIOException("Error creating index file for: " + outputFile.getAbsolutePath()+ BAMIndex.BAMIndexSuffix);
+ }
+ }
try {
- OutputStream indexOS = null ;
- if (createIndex) {
- File indexFile = new File(outputFile.getAbsolutePath() + ".bai") ;
- indexOS = new FileOutputStream(indexFile) ;
- }
- final CRAMFileWriter writer = new CRAMFileWriter(new FileOutputStream(outputFile), indexOS, new ReferenceSource(referenceFasta), header, null);
- writer.setPreserveReadNames(true);
- writer.setCaptureAllTags(true);
- return writer;
- } catch (final IOException ioe) {
- throw new RuntimeIOException("Error opening file: " + outputFile.getAbsolutePath());
+ cramOS = IOUtil.maybeBufferOutputStream(new FileOutputStream(outputFile, false), bufferSize);
+ }
+ catch (final IOException ioe) {
+ throw new RuntimeIOException("Error creating CRAM file: " + outputFile.getAbsolutePath());
+ CRAMFileWriter writer = new CRAMFileWriter(
+ createMd5File ? new Md5CalculatingOutputStream(cramOS, new File(outputFile.getAbsolutePath() + ".md5")) : cramOS,
+ indexOS,
+ presorted,
+ new ReferenceSource(referenceFasta),
+ header,
+ outputFile.getAbsolutePath());
+ setCRAMWriterDefaults(writer);
+ return writer;
+ // Set the default CRAM writer preservation parameters
+ private void setCRAMWriterDefaults(CRAMFileWriter writer) {
+ writer.setPreserveReadNames(true);
+ writer.setCaptureAllTags(true);
+ }
diff --git a/src/java/htsjdk/samtools/SAMFileWriterImpl.java b/src/java/htsjdk/samtools/SAMFileWriterImpl.java
index 82282fe..219f64c 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterImpl.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterImpl.java
@@ -123,10 +123,13 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
- * Must be called before addAlignment.
+ * Must be called before addAlignment. Header cannot be null.
public void setHeader(final SAMFileHeader header)
+ if (null == header) {
+ throw new IllegalArgumentException("A non-null SAMFileHeader is required for a writer");
+ }
this.header = header;
if (sortOrder == null) {
sortOrder = SAMFileHeader.SortOrder.unsorted;
@@ -168,12 +171,19 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
throw new IllegalStateException("sortOrder should not be null");
+ /**
+ * Add an alignment record to be emitted by the writer.
+ *
+ * @param alignment Must not be null. If the alignment record's SAMFileHeader is null, the record will be
+ * updated to the header used by this writer, which will in turn cause any unresolved reference and
+ * mate reference indices to be resolved against the new header's sequence dictionary.
+ */
public void addAlignment(final SAMRecord alignment)
+ if (null == alignment.getHeader()) {
+ alignment.setHeader(header); // re-establish the record header and attempt to resolve reference index values
+ }
if (sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
- if (!header.getGroupOrder().equals(SAMFileHeader.GroupOrder.none)) {
- throw new UnsupportedOperationException("GroupOrder " + header.getGroupOrder() + " is not supported");
- }
} else if (presorted) {
@@ -213,7 +223,7 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
* Writes the record to disk. Sort order has been taken care of by the time
- * this method is called.
+ * this method is called. The record must hava a non-null SAMFileHeader.
* @param alignment
abstract protected void writeAlignment(SAMRecord alignment);
diff --git a/src/java/htsjdk/samtools/SAMRecord.java b/src/java/htsjdk/samtools/SAMRecord.java
index 8eb9fd3..cfa922f 100644
--- a/src/java/htsjdk/samtools/SAMRecord.java
+++ b/src/java/htsjdk/samtools/SAMRecord.java
@@ -32,54 +32,77 @@ import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Set;
* Java binding for a SAM file record. c.f. http://samtools.sourceforge.net/SAM1.pdf
- *
+ * <p>
* The presence of reference name/reference index and alignment start
* do not necessarily mean that a read is aligned. Those values may merely be set to force a SAMRecord
* to appear in a certain place in the sort order. The readUnmappedFlag must be checked to determine whether
* or not a read is mapped. Only if the readUnmappedFlag is false can the reference name/index and alignment start
* be interpreted as indicating an actual alignment position.
- *
+ * <p>
* Likewise, presence of mate reference name/index and mate alignment start do not necessarily mean that the
* mate is aligned. These may be set for an unaligned mate if the mate has been forced into a particular place
* in the sort order per the above paragraph. Only if the mateUnmappedFlag is false can the mate reference name/index
* and mate alignment start be interpreted as indicating the actual alignment position of the mate.
- *
+ * <p>
* Note also that there are a number of getters & setters that are linked, i.e. they present different representations
* of the same underlying data. In these cases there is typically a representation that is preferred because it
* ought to be faster than some other representation. The following are the preferred representations:
- *
- * getReadNameLength() is preferred to getReadName().length()
- * get/setReadBases() is preferred to get/setReadString()
- * get/setBaseQualities() is preferred to get/setBaseQualityString()
- * get/setReferenceIndex() is preferred to get/setReferenceName()
- * get/setMateReferenceIndex() is preferred to get/setMateReferenceName()
- * getCigarLength() is preferred to getCigar().getNumElements()
- * get/setCigar() is preferred to get/setCigarString()
- *
- * Note that setIndexingBin() need not be called when writing SAMRecords. It will be computed as necessary. It is only
- * present as an optimization in the event that the value is already known and need not be computed.
- *
- * setHeader() need not be called when writing SAMRecords. It may be convenient to call it, however, because
- * get/setReferenceIndex() and get/setMateReferenceIndex() must have access to the SAM header, either as an argument
- * or previously passed to setHeader().
- *
+ * </p><ul>
+ * <li>getReadNameLength() is preferred to getReadName().length()</li>
+ * <li>get/setReadBases() is preferred to get/setReadString()</li>
+ * <li>get/setBaseQualities() is preferred to get/setBaseQualityString()</li>
+ * <li>get/setReferenceIndex() is preferred to get/setReferenceName() for records with valid SAMFileHeaders</li>
+ * <li>get/setMateReferenceIndex() is preferred to get/setMateReferenceName() for records with valid SAMFileHeaders</li>
+ * <li>getCigarLength() is preferred to getCigar().getNumElements()</li>
+ * <li>get/setCigar() is preferred to get/setCigarString()</li>
+ * </ul>
+ * <p>
* setHeader() is called by the SAM reading code, so the get/setReferenceIndex() and get/setMateReferenceIndex()
- * methods will have access to the sequence dictionary.
- *
+ * methods will have access to the sequence dictionary to resolve reference and mate reference names to dictionary
+ * indices.
+ * <p>
+ * setHeader() need not be called explicitly when writing SAMRecords, however the writers require a record
+ * in order to call get/setReferenceIndex() and get/setMateReferenceIndex(). Therefore adding records to a writer
+ * has a side effect: any record that does not have an assigned header at the time it is added to a writer will be
+ * updated and assigned the header associated with the writer.
+ * <p>
* Some of the get() methods return values that are mutable, due to the limitations of Java. A caller should
* never change the value returned by a get() method. If you want to change the value of some attribute of a
* SAMRecord, create a new value object and call the appropriate set() method.
- *
+ * </p>
+ * Note that setIndexingBin() need not be called when writing SAMRecords. It will be computed as necessary. It is only
+ * present as an optimization in the event that the value is already known and need not be computed.
+ * <p>
* By default, extensive validation of SAMRecords is done when they are read. Very limited validation is done when
* values are set onto SAMRecords.
- */
+ * <p>
+ * <h3>Notes on Headerless SAMRecords</h3>
+ * <p>
+ * If the header is null, the following SAMRecord methods may throw exceptions:
+ * <ul>
+ * <li>getReferenceIndex</li>
+ * <li>setReferenceIndex</li>
+ * <li>getMateReferenceIndex</li>
+ * <li>setMateReferenceIndex</li>
+ * </ul><p>
+ * Record comparators (i.e. SAMRecordCoordinateComparator and SAMRecordDuplicateComparator) require records with
+ * non-null header values.
+ * <p>
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ * <p>
+ * Also, SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
+ * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
+ * header associated with the writer.
+ * <p>
* @author alecw at broadinstitute.org
* @author mishali.naik at intel.com
@@ -168,6 +191,9 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
private transient SAMFileSource mFileSource;
private SAMFileHeader mHeader = null;
+ /** Transient Map of attributes for use by anyone. */
+ private transient Map<Object,Object> transientAttributes;
public SAMRecord(final SAMFileHeader header) {
mHeader = header;
@@ -286,8 +312,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
private static boolean hasReferenceName(final Integer referenceIndex, final String referenceName) {
- return (referenceIndex != null && referenceIndex != NO_ALIGNMENT_REFERENCE_INDEX) ||
- !NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName);
+ return (referenceIndex != null && !referenceIndex.equals(NO_ALIGNMENT_REFERENCE_INDEX)) ||
+ (!NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName));
@@ -305,119 +331,208 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
- * @return Reference name, or null if record has no reference.
+ * @return Reference name, or NO_ALIGNMENT_REFERENCE_NAME (*) if the record has no reference name
- public String getReferenceName() {
- return mReferenceName;
- }
+ public String getReferenceName() { return mReferenceName; }
- public void setReferenceName(final String value) {
- /* String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible */
- if (NO_ALIGNMENT_REFERENCE_NAME.equals(value)) {
+ /**
+ * Sets the reference name for this record. If the record has a valid SAMFileHeader and the reference
+ * name is present in the associated sequence dictionary, the record's reference index will also be
+ * updated with the corresponding sequence index. If referenceName is NO_ALIGNMENT_REFERENCE_NAME, sets
+ * the reference index to NO_ALIGNMENT_REFERENCE_INDEX.
+ *
+ * @param referenceName - must not be null
+ */
+ public void setReferenceName(final String referenceName) {
+ if (null == referenceName) {
+ throw new IllegalArgumentException(
+ "Reference name must not be null. Use SAMRecord.NO_ALIGNMENT_REFERENCE_NAME to reset the reference name.");
+ }
+ else if (NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName)) {
- return;
- } else if (mHeader != null) {
- final int referenceIndex = mHeader.getSequenceIndex(value);
- if (referenceIndex != -1) {
- setReferenceIndex(referenceIndex);
- return;
+ }
+ else if (null != mHeader) {
+ // String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible
+ final int referenceIndex = mHeader.getSequenceIndex(referenceName);
+ if (-1 != referenceIndex) {
+ setReferenceIndex(referenceIndex); // sets reference name and index
+ else {
+ mReferenceName = referenceName.intern();
+ mReferenceIndex = null;
+ }
+ }
+ else {
+ mReferenceName = referenceName.intern();
+ mReferenceIndex = null;
- // Drop through from above if nothing done.
- mReferenceName = value.intern();
- mReferenceIndex = null;
- * @return index of the reference sequence for this read in the sequence dictionary, or -1
- * if read has no reference sequence set, or if a String reference name is not found in the sequence index..
+ * Returns the reference index for this record.
+ *
+ * If the reference name for this record has previously been resolved against the sequence dictionary, the corresponding
+ * index is returned directly. Otherwise, the record must have a non-null SAMFileHeader that can be used to
+ * resolve the index for the record's current reference name, unless the reference name is NO_ALIGNMENT_REFERENCE_NAME.
+ * If the record has a header, and the name does not appear in the header's sequence dictionary, the value
+ * NO_ALIGNMENT_REFERENCE_INDEX (-1) will be returned. If the record does not have a header, an IllegalStateException
+ * is thrown.
+ *
+ * @return Index in the sequence dictionary of the reference sequence. If the read has no reference sequence, or if
+ * the reference name is not found in the sequence index, NO_ALIGNMENT_REFERENCE_INDEX (-1) is returned.
+ *
+ * @throws IllegalStateException if the reference index cannot be resolved because the SAMFileHeader for the
+ * record is null.
public Integer getReferenceIndex() {
- if (mReferenceIndex == null) {
- if (mReferenceName == null) {
+ if (null == mReferenceIndex) {
+ // try to resolve the reference index
+ if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) {
- } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) {
- } else {
+ }
+ else if (null != mHeader) {
mReferenceIndex = mHeader.getSequenceIndex(mReferenceName);
+ else {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index");
+ }
return mReferenceIndex;
- * @param referenceIndex Must either equal -1 (indicating no reference), or exist in the sequence dictionary
- * in the header associated with this record.
+ * Updates the reference index. The record must have a valid SAMFileHeader unless the referenceIndex parameter equals
+ * NO_ALIGNMENT_REFERENCE_INDEX, and the reference index must appear in the header's sequence dictionary. If the
+ * reference index is valid, the reference name will also be resolved and updated to the name for the sequence
+ * dictionary entry corresponding to the index.
+ *
+ * @param referenceIndex Must either equal NO_ALIGNMENT_REFERENCE_INDEX (-1) indicating no reference, or the
+ * record must have a SAMFileHeader and the index must exist in the associated sequence
+ * dictionary.
+ * @throws IllegalStateException if the SAMFileHeader is null for this record or the reference index is not
+ * found in the sequence dictionary for this record.
public void setReferenceIndex(final int referenceIndex) {
- mReferenceIndex = referenceIndex;
- if (mReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
+ if (referenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
- } else {
- try {
- mReferenceName = mHeader.getSequence(referenceIndex).getSequenceName();
- } catch (final NullPointerException e) {
- throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.", e);
+ }
+ else if (null == mHeader) {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index");
+ }
+ else {
+ SAMSequenceRecord samSequence = mHeader.getSequence(referenceIndex);
+ if (null != samSequence) {
+ mReferenceIndex = referenceIndex;
+ mReferenceName = samSequence.getSequenceName();
+ }
+ else {
+ throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.");
- * @return Mate reference name, or null if one is not assigned.
+ * @return Mate reference name, or NO_ALIGNMENT_REFERENCE_NAME (*) if the record has no mate reference name
public String getMateReferenceName() {
return mMateReferenceName;
+ /**
+ * Sets the mate reference name for this record. If the record has a valid SAMFileHeader and the mate reference
+ * name is present in the associated sequence dictionary, the record's mate reference index will also be
+ * updated with the corresponding sequence index. If mateReferenceName is NO_ALIGNMENT_REFERENCE_NAME, sets the
+ * mate reference index to NO_ALIGNMENT_REFERENCE_INDEX.
+ *
+ * @param mateReferenceName - must not be null
+ */
public void setMateReferenceName(final String mateReferenceName) {
- /* String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible */
- if (NO_ALIGNMENT_REFERENCE_NAME.equals(mateReferenceName)) {
+ if (null == mateReferenceName) {
+ throw new IllegalArgumentException("Mate reference name must not be null");
+ }
+ else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mateReferenceName)) {
- return;
- } else if (mHeader != null) {
- final int referenceIndex = mHeader.getSequenceIndex(mateReferenceName);
- if (referenceIndex != -1) {
- setMateReferenceIndex(referenceIndex);
- return;
+ }
+ else if (null != mHeader) {
+ final int mateReferenceIndex = mHeader.getSequenceIndex(mateReferenceName);
+ if (-1 != mateReferenceIndex) {
+ setMateReferenceIndex(mateReferenceIndex); // sets mate reference name and index
+ }
+ else {
+ mMateReferenceName = mateReferenceName.intern();
+ mMateReferenceIndex = null;
- // Drop through from above if nothing done.
- this.mMateReferenceName = mateReferenceName.intern();
- mMateReferenceIndex = null;
+ else {
+ mMateReferenceName = mateReferenceName.intern();
+ mMateReferenceIndex = null;
+ }
- * @return index of the reference sequence for this read's mate in the sequence dictionary, or -1
- * if mate has no reference sequence set.
+ * Returns the mate reference index for this record.
+ *
+ * If the mate reference name for this record has previously been resolved against the sequence dictionary, the
+ * corresponding index is returned directly. Otherwise, the record must have a non-null SAMFileHeader that can be
+ * used to resolve the index for the record's current mate reference name, unless the mate reference name is
+ * NO_ALIGNMENT_REFERENCE_NAME. If the record has a header, and the name does not appear in the header's
+ * sequence dictionary, the value NO_ALIGNMENT_REFERENCE_INDEX (-1) will be returned. If the record does not have
+ * a header, an IllegalStateException is thrown.
+ *
+ * @return Index in the sequence dictionary of the mate reference sequence. If the read has no mate reference
+ * sequence, or if the mate reference name is not found in the sequence index, NO_ALIGNMENT_REFERENCE_INDEX (-1)
+ * is returned.
+ *
+ * @throws IllegalStateException if the mate reference index cannot be resolved because the SAMFileHeader for the
+ * record is null.
public Integer getMateReferenceIndex() {
- if (mMateReferenceIndex == null) {
- if (mMateReferenceName == null) {
+ if (null == mMateReferenceIndex) {
+ // try to resolve the reference index
+ if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)) {
- } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)){
- } else {
+ }
+ else if (null != mHeader) {
mMateReferenceIndex = mHeader.getSequenceIndex(mMateReferenceName);
+ else {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the mate reference index");
+ }
return mMateReferenceIndex;
- * @param referenceIndex Must either equal -1 (indicating no reference), or exist in the sequence dictionary
- * in the header associated with this record.
- */
- public void setMateReferenceIndex(final int referenceIndex) {
- mMateReferenceIndex = referenceIndex;
- if (mMateReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
+ * Updates the mate reference index. The record must have a valid SAMFileHeader, and the mate reference index must appear in
+ * the header's sequence dictionary, unless the mateReferenceIndex parameter equals NO_ALIGNMENT_REFERENCE_INDEX. If the mate
+ * reference index is valid, the mate reference name will also be resolved and updated to the name for the sequence dictionary
+ * entry corresponding to the index.
+ *
+ * @param mateReferenceIndex Must either equal NO_ALIGNMENT_REFERENCE_INDEX (-1) indicating no reference, or the
+ * record must have a SAMFileHeader and the index must exist in the associated sequence
+ * dictionary.
+ * @throws IllegalStateException if the SAMFileHeader is null for this record or the mate reference index is not
+ * found in the sequence dictionary for this record.
+ */
+ public void setMateReferenceIndex(final int mateReferenceIndex) {
+ if (mateReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
- } else {
- try {
- mMateReferenceName = mHeader.getSequence(referenceIndex).getSequenceName();
- } catch (final NullPointerException e) {
- throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.", e);
+ }
+ else if (null == mHeader) {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the mate reference index");
+ }
+ else {
+ SAMSequenceRecord samSequence = mHeader.getSequence(mateReferenceIndex);
+ if (null != samSequence) {
+ mMateReferenceIndex = mateReferenceIndex;
+ mMateReferenceName = samSequence.getSequenceName();
+ }
+ else {
+ throw new IllegalArgumentException("Reference index " + mateReferenceIndex + " not found in sequence dictionary.");
@@ -478,21 +593,32 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
+ * @param offset 1-based location within the unclipped sequence or 0 if there is no position.
+ * <p/>
+ * Non static version of the static function with the same name.
* @return 1-based inclusive reference position of the unclipped sequence at a given offset,
- * or 0 if there is no position.
- * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1,
- * and a (1-based)offset 10 (start of GGG) it returns 7 (1-based offset starting after the soft clip.
- * For example: given the sequence AAACCCGGGTTT, cigar 4M1D6M, an alignment start of 1,
- * an offset of 4 returns reference position 4, an offset of 5 returns reference position 6.
- * Another example: given the sequence AAACCCGGGTTT, cigar 4M1I6M, an alignment start of 1,
- * an offset of 4 returns reference position 4, an offset of 5 returns 0.
- * @offset 1-based location within the unclipped sequence
public int getReferencePositionAtReadPosition(final int offset) {
+ return getReferencePositionAtReadPosition(this, offset);
+ }
+ /**
+ * @param rec record to use
+ * @param offset 1-based location within the unclipped sequence
+ * @return 1-based inclusive reference position of the unclipped sequence at a given offset,
+ * or 0 if there is no position.
+ * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1,
+ * and a (1-based)offset 10 (start of GGG) it returns 7 (1-based offset starting after the soft clip.
+ * For example: given the sequence AAACCCGGGTTT, cigar 4M1D6M, an alignment start of 1,
+ * an offset of 4 returns reference position 4, an offset of 5 returns reference position 6.
+ * Another example: given the sequence AAACCCGGGTTT, cigar 4M1I6M, an alignment start of 1,
+ * an offset of 4 returns reference position 4, an offset of 5 returns 0.
+ */
+ public static int getReferencePositionAtReadPosition(final SAMRecord rec, final int offset) {
if (offset == 0) return 0;
- for (final AlignmentBlock alignmentBlock : getAlignmentBlocks()) {
+ for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) {
if (CoordMath.getEnd(alignmentBlock.getReadStart(), alignmentBlock.getLength()) < offset) {
} else if (offset < alignmentBlock.getReadStart()) {
@@ -504,6 +630,77 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
return 0; // offset not located in an alignment block
+ /**
+ * @param pos 1-based reference position
+ * return the offset
+ * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the
+ * unclipped sequence at a given reference position, or 0 if there is no such position.
+ *
+ * See examples in the static version below
+ */
+ public int getReadPositionAtReferencePosition(final int pos) {
+ return getReadPositionAtReferencePosition(this, pos, false);
+ }
+ /**
+ * @param pos 1-based reference position
+ * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, function will
+ * return the offset
+ * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the
+ * unclipped sequence at a given reference position,
+ * or 0 if there is no such position. If returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base
+ * in the preceding block.
+ *
+ * Non-static version of static function with the same name. See examples below.
+ */
+ public int getReadPositionAtReferencePosition(final int pos, final boolean returnLastBaseIfDeleted) {
+ return getReadPositionAtReferencePosition(this, pos, returnLastBaseIfDeleted);
+ }
+ /**
+ * @param rec record to use
+ * @param pos 1-based reference position
+ * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, function will
+ * return the offset
+ * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the
+ * unclipped sequence at a given reference position,
+ * or 0 if there is no such position. If returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base
+ * in the preceding block.
+ * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1,
+ * and a (1-based)pos of 7 (start of GGG) it returns 10 (1-based offset including the soft clip.
+ * For example: given the sequence AAACCCGGGT, cigar 4M1D6M, an alignment start of 1,
+ * a reference position of 4 returns offset of 4, a reference of 5 also returns an offset 4 (using "left aligning") if returnLastBaseIfDeleted
+ * and 0 otherwise.
+ * For example: given the sequence AAACtCGGGTT, cigar 4M1I6M, an alignment start of 1,
+ * a position 4 returns an offset 5, a position of 5 returns 6 (the inserted base is the 5th offset), a position of 11 returns 0 since
+ * that position in the reference doesn't overlap the read at all.
+ *
+ */
+ public static int getReadPositionAtReferencePosition(final SAMRecord rec, final int pos, final boolean returnLastBaseIfDeleted) {
+ if (pos <= 0) {
+ return 0;
+ }
+ int lastAlignmentOffset = 0;
+ for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) {
+ if (CoordMath.getEnd(alignmentBlock.getReferenceStart(), alignmentBlock.getLength()) >= pos) {
+ if (pos < alignmentBlock.getReferenceStart()) {
+ //There must have been a deletion block that skipped
+ return returnLastBaseIfDeleted ? lastAlignmentOffset : 0;
+ } else {
+ return pos - alignmentBlock.getReferenceStart() + alignmentBlock.getReadStart() ;
+ }
+ } else {
+ // record the offset to the last base in the current block, in case the next block starts too late
+ lastAlignmentOffset = alignmentBlock.getReadStart() + alignmentBlock.getLength() - 1 ;
+ }
+ }
+ // if we are here, the reference position was not overlapping the read at all
+ return 0;
+ }
* @return 1-based inclusive leftmost position of the clipped mate sequence, or 0 if there is no position.
@@ -563,7 +760,9 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
public Cigar getCigar() {
if (mCigar == null && mCigarString != null) {
mCigar = TextCigarCodec.decode(mCigarString);
- if (getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
+ if (null != getHeader() &&
+ getValidationStringency() != ValidationStringency.SILENT &&
+ !this.getReadUnmappedFlag()) {
// Don't know line number, and don't want to force read name to be decoded.
SAMUtils.processValidationErrors(this.validateCigar(-1L), -1L, getValidationStringency());
@@ -601,16 +800,16 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Get the SAMReadGroupRecord for this SAMRecord.
* @return The SAMReadGroupRecord from the SAMFileHeader for this SAMRecord, or null if
* 1) this record has no RG tag, or 2) the header doesn't contain the read group with
- * the given ID.
- * @throws NullPointerException if this.getHeader() returns null.
+ * the given ID.or 3) this record has no SAMFileHeader
* @throws ClassCastException if RG tag does not have a String value.
public SAMReadGroupRecord getReadGroup() {
final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG);
- if (rgId == null) {
+ if (rgId == null || getHeader() == null) {
return null;
+ } else {
+ return getHeader().getReadGroup(rgId);
- return getHeader().getReadGroup(rgId);
@@ -879,8 +1078,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Get the tag value and attempt to coerce it into the requested type.
* @param tag The requested tag.
- * @return The value of a tag, converted into an Integer if possible.
- * @throws RuntimeException If the value is not an integer type, or will not fit in an Integer.
+ * @return The value of a tag, converted into a signed Integer if possible.
+ * @throws RuntimeException If the value is not an integer type, or will not fit in a signed Integer.
public Integer getIntegerAttribute(final String tag) {
final Object val = getAttribute(tag);
@@ -899,6 +1098,46 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
+ * A convenience method that will return a valid unsigned integer as a Long,
+ * or fail with an exception if the tag value is invalid.
+ *
+ * @param tag Two-character tag name.
+ * @return valid unsigned integer associated with the tag, as a Long
+ * @throws {@link htsjdk.samtools.SAMException} if the value is out of range for a 32-bit unsigned value, or not a Number
+ */
+ public Long getUnsignedIntegerAttribute(final String tag) throws SAMException {
+ return getUnsignedIntegerAttribute(SAMTagUtil.getSingleton().makeBinaryTag(tag));
+ }
+ /**
+ * A convenience method that will return a valid unsigned integer as a Long,
+ * or fail with an exception if the tag value is invalid.
+ *
+ * @param tag Binary representation of a 2-char String tag as created by SAMTagUtil.
+ * @return valid unsigned integer associated with the tag, as a Long
+ * @throws {@link htsjdk.samtools.SAMException} if the value is out of range for a 32-bit unsigned value, or not a Number
+ */
+ public Long getUnsignedIntegerAttribute(final short tag) throws SAMException {
+ final Object value = getAttribute(tag);
+ if (value == null) {
+ return null;
+ }
+ if (value instanceof Number) {
+ final long lValue = ((Number)value).longValue();
+ if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
+ return lValue;
+ } else {
+ throw new SAMException("Unsigned integer value of tag " +
+ SAMTagUtil.getSingleton().makeStringTag(tag) + " is out of bounds for a 32-bit unsigned integer: " + lValue);
+ }
+ } else {
+ throw new SAMException("Unexpected attribute value data type " + value.getClass() + " for tag " +
+ SAMTagUtil.getSingleton().makeStringTag(tag));
+ }
+ }
+ /**
* Get the tag value and attempt to coerce it into the requested type.
* @param tag The requested tag.
* @return The value of a tag, converted into a Short if possible.
@@ -1074,14 +1313,15 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Set a named attribute onto the SAMRecord. Passing a null value causes the attribute to be cleared.
* @param tag two-character tag name. See http://samtools.sourceforge.net/SAM1.pdf for standard and user-defined tags.
- * @param value Supported types are String, Char, Integer, Float, byte[], short[]. int[], float[].
+ * @param value Supported types are String, Char, Integer, Float,
+ * Long (for values that fit into a signed or unsigned 32-bit integer only),
+ * byte[], short[], int[], float[].
* If value == null, tag is cleared.
* Byte and Short are allowed but discouraged. If written to a SAM file, these will be converted to Integer,
* whereas if written to BAM, getAttribute() will return as Byte or Short, respectively.
- * Long with value between 0 and MAX_UINT is allowed for BAM but discouraged. Attempting to write such a value
- * to SAM will cause an exception to be thrown.
+ * Long is allowed for values that fit into a signed or unsigned 32-bit integer only, but discouraged.
* To set unsigned byte[], unsigned short[] or unsigned int[] (which is discouraged because of poor Java language
* support), setUnsignedArrayAttribute() must be used instead of this method.
@@ -1119,24 +1359,43 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
setAttribute(tag, value, false);
- protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
- if (value != null &&
- !(value instanceof Byte || value instanceof Short || value instanceof Integer ||
- value instanceof String || value instanceof Character || value instanceof Float ||
- value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
- value instanceof float[])) {
- throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
- SAMTagUtil.getSingleton().makeStringTag(tag));
+ /**
+ * Checks if the value is allowed as an attribute value.
+ *
+ * @param value the value to be checked
+ * @return true if the value is valid and false otherwise
+ */
+ protected static boolean isAllowedAttributeValue(final Object value) {
+ if (value instanceof Byte || value instanceof Short || value instanceof Integer ||
+ value instanceof String || value instanceof Character || value instanceof Float ||
+ value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
+ value instanceof float[]) {
+ return true;
+ // A special case for Longs: we require Long values to fit into either a uint32_t or an int32_t,
+ // as that is what the BAM spec allows.
+ if (value instanceof Long) {
+ return SAMUtils.isValidUnsignedIntegerAttribute((Long) value)
+ || ((Long) value >= Integer.MIN_VALUE && (Long) value <= Integer.MAX_VALUE);
+ }
+ return false;
+ }
+ protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value == null) {
- if (this.mAttributes != null) this.mAttributes = this.mAttributes.remove(tag);
+ // setting a tag value to null removes the tag:
+ if (this.mAttributes != null) {
+ this.mAttributes = this.mAttributes.remove(tag);
+ }
+ return;
- else {
+ if (isAllowedAttributeValue(value)) {
final SAMBinaryTagAndValue tmp;
- if(!isUnsignedArray) {
+ if (!isUnsignedArray) {
tmp = new SAMBinaryTagAndValue(tag, value);
- }
- else {
+ } else {
if (!value.getClass().isArray() || value instanceof float[]) {
throw new SAMException("Attribute type " + value.getClass() +
" cannot be encoded as an unsigned array. Tag: " +
@@ -1144,8 +1403,15 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, value);
- if (this.mAttributes == null) this.mAttributes = tmp;
- else this.mAttributes = this.mAttributes.insert(tmp);
+ if (this.mAttributes == null) {
+ this.mAttributes = tmp;
+ } else {
+ this.mAttributes = this.mAttributes.insert(tmp);
+ }
+ } else {
+ throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
+ SAMTagUtil.getSingleton().makeStringTag(tag));
@@ -1255,16 +1521,70 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
return GenomicIndexUtil.reg2bin(alignmentStart, alignmentEnd);
+ /**
+ * @return the SAMFileHeader for this record. If the header is null, the following SAMRecord methods may throw
+ * exceptions:
+ * <p><ul>
+ * <li>getReferenceIndex</li>
+ * <li>setReferenceIndex</li>
+ * <li>getMateReferenceIndex</li>
+ * <li>setMateReferenceIndex</li>
+ * </ul><p>
+ * Record comparators (i.e. SAMRecordCoordinateComparator and SAMRecordDuplicateComparator) require records with
+ * non-null header values.
+ * <p>
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ * <p>
+ * SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
+ * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
+ * header associated with the writer.
+ */
public SAMFileHeader getHeader() {
return mHeader;
- * Setting header into SAMRecord facilitates conversion btw reference sequence names and indices
+ * Sets the SAMFileHeader for this record. Setting the header into SAMRecord facilitates conversion between reference
+ * sequence names and indices.
+ * <p>
+ * <b>NOTE:</b> If the record has a reference or mate reference name, the corresponding reference and mate reference
+ * indices are resolved and updated using the sequence dictionary in the new header. setHeader does not throw an
+ * exception if either the reference or mate reference name does not appear in the new header's sequence dictionary.
+ * <p>
+ * When the SAMFileHeader is set to null, the reference and mate reference indices are cleared. Therefore, calls to
+ * the following SAMRecord methods on records with a null header may throw IllegalArgumentExceptions:
+ * <ul>
+ * <li>getReferenceIndex</li>
+ * <li>setReferenceIndex</li>
+ * <li>getMateReferenceIndex</li>
+ * <li>setMateReferenceIndex</li>
+ * </ul><p>
+ * Record comparators (i.e. SAMRecordCoordinateComparator and SAMRecordDuplicateComparator) require records with
+ * non-null header values.
+ * <p>
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ * <p>
+ * SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
+ * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
+ * header associated with the writer.
+ *
* @param header contains sequence dictionary for this SAMRecord
public void setHeader(final SAMFileHeader header) {
this.mHeader = header;
+ if (null == header) {
+ // mark the reference indices as unresolved
+ mReferenceIndex = null;
+ mMateReferenceIndex = null;
+ }
+ else {
+ // attempt to resolve the existing reference names and indices against the new sequence dictionary, but
+ // don't throw if the names don't appear in the dictionary
+ setReferenceName(mReferenceName);
+ setMateReferenceName(mMateReferenceName);
+ }
@@ -1391,7 +1711,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
public List<SAMValidationError> validateCigar(final long recordNumber) {
List<SAMValidationError> ret = null;
- if (getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
+ if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
ret = SAMUtils.validateCigar(this, getCigar(), getReferenceIndex(), getAlignmentBlocks(), recordNumber, "Read CIGAR");
return ret;
@@ -1460,7 +1780,12 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Perform various validations of SAMRecord.
* Note that this method deliberately returns null rather than Collections.emptyList() if there
* are no validation errors, because callers tend to assume that if a non-null list is returned, it is modifiable.
+ *
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ *
* @return null if valid. If invalid, returns a list of error messages.
+ *
public List<SAMValidationError> isValid() {
return isValid(false);
@@ -1470,6 +1795,10 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Perform various validations of SAMRecord.
* Note that this method deliberately returns null rather than Collections.emptyList() if there
* are no validation errors, because callers tend to assume that if a non-null list is returned, it is modifiable.
+ *
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ *
* @param firstOnly return only the first error if true, false otherwise
* @return null if valid. If invalid, returns a list of error messages.
@@ -1503,7 +1832,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_SECOND_OF_PAIR, "Second of pair flag should not be set for unpaired read.", getReadName()));
if (firstOnly) return ret;
- if (getMateReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) {
+ if (null != getHeader() && getMateReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) {
if (ret == null) ret = new ArrayList<SAMValidationError>();
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MATE_REF_INDEX, "MRNM should not be set for unpaired read.", getReadName()));
if (firstOnly) return ret;
@@ -1583,7 +1912,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
if (firstOnly) return ret;
- if (getHeader().getSequenceDictionary().size() == 0) {
+ if (getHeader() != null && getHeader().getSequenceDictionary().size() == 0) {
if (ret == null) ret = new ArrayList<SAMValidationError>();
ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_SEQUENCE_DICTIONARY, "Empty sequence dictionary.", getReadName()));
if (firstOnly) return ret;
@@ -1606,11 +1935,11 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
// Validate the RG ID is found in header
final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG);
- if (rgId != null && getHeader().getReadGroup(rgId) == null) {
- if (ret == null) ret = new ArrayList<SAMValidationError>();
- ret.add(new SAMValidationError(SAMValidationError.Type.READ_GROUP_NOT_FOUND,
- "RG ID on SAMRecord not found in header: " + rgId, getReadName()));
- if (firstOnly) return ret;
+ if (rgId != null && getHeader() != null && getHeader().getReadGroup(rgId) == null) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.READ_GROUP_NOT_FOUND,
+ "RG ID on SAMRecord not found in header: " + rgId, getReadName()));
+ if (firstOnly) return ret;
final List<SAMValidationError> errors = isValidReferenceIndexAndPosition(mReferenceIndex, mReferenceName, getAlignmentStart(), false);
if (errors != null) {
@@ -1710,8 +2039,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should != 0 because reference name != *.", isMate), getReadName()));
if (firstOnly) return ret;
- if (getHeader().getSequenceDictionary().size() > 0) {
+ if (getHeader() != null && getHeader().getSequenceDictionary().size() > 0) {
final SAMSequenceRecord sequence =
(referenceIndex != null? getHeader().getSequence(referenceIndex): getHeader().getSequence(referenceName));
if (sequence == null) {
@@ -1750,6 +2078,58 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
return newRecord;
+ /**
+ * Returns a deep copy of the SAM record, with the following exceptions:
+ *
+ * - The header field, which shares the reference with the original record
+ * - The file source field, which will always always be set to null in the copy
+ *
+ * Note that some fields, i.e. the cigar elements, alignment blocks, and
+ * indexing bin, are not explicitly populated in the copy since they are lazily
+ * generated on demand.
+ *
+ * Also note that this fails:
+ *
+ * original.deepCopy().equals(original)
+ *
+ * due to the fact that SAMBinaryTagAndValue.equals winds up calling object.equals on the
+ * value field, which uses reference equality.
+ *
+ */
+ public SAMRecord deepCopy() {
+ final SAMRecord newSAM = new SAMRecord(getHeader());
+ newSAM.setReadName(getReadName());
+ newSAM.setReadBases(Arrays.copyOf(getReadBases(), getReadLength()));
+ final byte baseQualities[] = getBaseQualities();
+ newSAM.setBaseQualities(Arrays.copyOf(baseQualities, baseQualities.length));
+ newSAM.setReferenceName(getReferenceName());
+ newSAM.setAlignmentStart(getAlignmentStart()); // clears mAlignmentEnd
+ newSAM.setMappingQuality(getMappingQuality());
+ newSAM.setCigarString(getCigarString()); // clears Cigar element and alignmentBlocks
+ newSAM.setFileSource(null);
+ newSAM.setFlags(getFlags());
+ newSAM.setMateReferenceName(getMateReferenceName());
+ newSAM.setMateAlignmentStart(getMateAlignmentStart());
+ newSAM.setInferredInsertSize(getInferredInsertSize());
+ if (null != getHeader()) {
+ newSAM.setReferenceIndex(getReferenceIndex());
+ newSAM.setMateReferenceIndex(getMateReferenceIndex());
+ }
+ else {
+ newSAM.mReferenceIndex = null;
+ newSAM.mMateReferenceIndex = null;
+ }
+ newSAM.setValidationStringency(getValidationStringency());
+ SAMBinaryTagAndValue attributes = getBinaryAttributes();
+ if (null != attributes) {
+ newSAM.setAttributes(attributes.deepCopy());
+ }
+ return newSAM;
+ }
/** Simple toString() that gives a little bit of useful info about the read. */
public String toString() {
@@ -1803,7 +2183,39 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* shortcut to <pre>SAMFlag.getFlags( this.getFlags() );</pre>
* @returns a set of SAMFlag associated to this sam record */
public final Set<SAMFlag> getSAMFlags() {
- return SAMFlag.getFlags( this.getFlags() );
+ return SAMFlag.getFlags(this.getFlags());
+ }
+ /**
+ * Fetches the value of a transient attribute on the SAMRecord, of null if not set.
+ *
+ * The intended use for transient attributes is to store values that are 1-to-1 with the SAMRecord,
+ * may be needed many times and are expensive to compute. These values can be computed lazily and
+ * then stored as transient attributes to avoid frequent re-computation.
+ */
+ public final Object getTransientAttribute(final Object key) {
+ return (this.transientAttributes == null) ? null : this.transientAttributes.get(key);
+ }
+ /**
+ * Sets the value of a transient attribute, and returns the previous value if defined.
+ *
+ * The intended use for transient attributes is to store values that are 1-to-1 with the SAMRecord,
+ * may be needed many times and are expensive to compute. These values can be computed lazily and
+ * then stored as transient attributes to avoid frequent re-computation.
+ */
+ public final Object setTransientAttribute(final Object key, final Object value) {
+ if (this.transientAttributes == null) this.transientAttributes = new HashMap<Object,Object>();
+ return this.transientAttributes.put(key, value);
+ }
+ /**
+ * Removes a transient attribute if it is stored, and returns the stored value. If there is not
+ * a stored value, will return null.
+ */
+ public final Object removeTransientAttribute(final Object key) {
+ if (this.transientAttributes != null) return this.transientAttributes.remove(key);
+ else return null;
diff --git a/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java b/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
index 717609c..24ebb90 100644
--- a/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
@@ -75,10 +75,16 @@ public class SAMRecordCoordinateComparator implements SAMRecordComparator {
* Less stringent compare method than the regular compare. If the two records
* are equal enough that their ordering in a sorted SAM file would be arbitrary,
* this method returns 0. If read is paired and unmapped, use the mate mapping to sort.
+ * Records being compared must have non-null SAMFileHeaders.
* @return negative if samRecord1 < samRecord2, 0 if equal, else positive
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+ if (null == samRecord1.getHeader() || null == samRecord2.getHeader()) {
+ throw new IllegalArgumentException("Records must have non-null SAMFileHeaders to be compared");
+ }
final int refIndex1 = samRecord1.getReferenceIndex();
final int refIndex2 = samRecord2.getReferenceIndex();
if (refIndex1 == -1) {
diff --git a/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java b/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
index 99ae9a5..6de77da 100644
--- a/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
@@ -35,10 +35,15 @@ import java.util.Map;
* There are three orderings provided by this comparator: compare, duplicateSetCompare, and fileOrderCompare.
* Specify the headers when constructing this comparator if you would like to consider the library as the major sort key.
+ * The records being compared must also have non-null SAMFileHeaders.
* @author nhomer
public class SAMRecordDuplicateComparator implements SAMRecordComparator {
+ /** An enum to provide type-safe keys for transient attributes the comparator puts on SAMRecords. */
+ private static enum Attr {
+ LibraryId, ReadCoordinate, MateCoordinate
+ }
private static final byte FF = 0, FR = 1, F = 2, RF = 3, RR = 4, R = 5;
@@ -67,6 +72,18 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
+ * Populates the set of transient attributes on SAMRecords if they are not already there.
+ */
+ private void populateTransientAttributes(final SAMRecord... recs) {
+ for (final SAMRecord rec : recs) {
+ if (rec.getTransientAttribute(Attr.LibraryId) != null) continue;
+ rec.setTransientAttribute(Attr.LibraryId, getLibraryId(rec));
+ rec.setTransientAttribute(Attr.ReadCoordinate, rec.getReadNegativeStrandFlag() ? rec.getUnclippedEnd() : rec.getUnclippedStart());
+ rec.setTransientAttribute(Attr.MateCoordinate, getMateCoordinate(rec));
+ }
+ }
+ /**
* Gets the library name from the header for the record. If the RG tag is not present on
* the record, or the library isn't denoted on the read group, a constant string is
* returned.
@@ -75,10 +92,13 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
final String readGroupId = (String) rec.getAttribute("RG");
if (readGroupId != null) {
- final SAMReadGroupRecord rg = rec.getHeader().getReadGroup(readGroupId);
- if (rg != null) {
- final String libraryName = rg.getLibrary();
- if (null != libraryName) return libraryName;
+ final SAMFileHeader samHeader = rec.getHeader();
+ if (null != samHeader) {
+ final SAMReadGroupRecord rg = samHeader.getReadGroup(readGroupId);
+ if (rg != null) {
+ final String libraryName = rg.getLibrary();
+ if (null != libraryName) return libraryName;
+ }
@@ -198,6 +218,7 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
* properly choose the first end for optical duplicate identification when both ends are mapped to the same position etc.
public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+ populateTransientAttributes(samRecord1, samRecord2);
int cmp;
// temporary variables for comparisons
@@ -234,15 +255,20 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
private int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2, final boolean collapseOrientation, final boolean considerNumberOfEndsMappedAndPairing) {
+ populateTransientAttributes(samRecord1, samRecord2);
int cmp;
+ if (null == samRecord1.getHeader() || null == samRecord2.getHeader()) {
+ throw new IllegalArgumentException("Records must have non-null SAMFileHeaders to be compared");
+ }
// temporary variables for comparisons
int samRecord1Value, samRecord2Value;
// library identifier
- samRecord1Value = getLibraryId(samRecord1);
- samRecord2Value = getLibraryId(samRecord2);
+ samRecord1Value = (Short) samRecord1.getTransientAttribute(Attr.LibraryId);
+ samRecord2Value = (Short) samRecord2.getTransientAttribute(Attr.LibraryId);
cmp = samRecord1Value - samRecord2Value;
// reference index
@@ -262,8 +288,8 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
// read coordinate
if (cmp == 0) {
- samRecord1Value = samRecord1.getReadNegativeStrandFlag() ? samRecord1.getUnclippedEnd() : samRecord1.getUnclippedStart();
- samRecord2Value = samRecord2.getReadNegativeStrandFlag() ? samRecord2.getUnclippedEnd() : samRecord2.getUnclippedStart();
+ samRecord1Value = (Integer) samRecord1.getTransientAttribute(Attr.ReadCoordinate);
+ samRecord2Value = (Integer) samRecord2.getTransientAttribute(Attr.ReadCoordinate);
cmp = samRecord1Value - samRecord2Value;
// orientation
@@ -287,8 +313,8 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
// mate's coordinate
if (cmp == 0) {
- samRecord1Value = getMateCoordinate(samRecord1);
- samRecord2Value = getMateCoordinate(samRecord2);
+ samRecord1Value = (Integer) samRecord1.getTransientAttribute(Attr.MateCoordinate);
+ samRecord2Value = (Integer) samRecord2.getTransientAttribute(Attr.MateCoordinate);;
cmp = samRecord1Value - samRecord2Value;
diff --git a/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java b/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
new file mode 100644
index 0000000..fc250e9
--- /dev/null
+++ b/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
@@ -0,0 +1,68 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools;
+import htsjdk.samtools.util.Murmur3;
+ * SAMRecord comparator that provides an ordering based on a hash of the queryname. Has
+ * the useful property that reads with the same name will be grouped together, but that
+ * reads appear in an otherwise random order. Useful for when the read names in a BAM
+ * are correlated to something else (e.g. position, read group), making a straight
+ * queryname sort undesirable.
+ *
+ * @author Tim Fennell
+ */
+public class SAMRecordQueryHashComparator extends SAMRecordQueryNameComparator {
+ private final Murmur3 hasher = new Murmur3(42);
+ /**
+ * Compares two records based on an integer hash of their read name's. If the hash
+ * values are equal, falls back to the behaviour of SAMRecordQueryNameComparator
+ * to break the tie.
+ */
+ @Override
+ public int compare(final SAMRecord lhs, final SAMRecord rhs) {
+ final int retval = compareHashes(lhs, rhs);
+ if (retval == 0) return super.compare(lhs, rhs);
+ else return retval;
+ }
+ /**
+ * Compares two records based on an integer hash of their read names. If the hash
+ * values are equal, falls back to the behaviour of SAMRecordQueryNameComparator
+ * to break the tie.
+ */
+ @Override
+ public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
+ final int retval = compareHashes(lhs, rhs);
+ if (retval == 0) return super.fileOrderCompare(lhs, rhs);
+ else return retval;
+ }
+ /** Compares the hash values for two records. */
+ private int compareHashes(final SAMRecord lhs, final SAMRecord rhs) {
+ return new Integer(this.hasher.hashUnencodedChars(lhs.getReadName())).compareTo(this.hasher.hashUnencodedChars(rhs.getReadName()));
+ }
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/samtools/SAMTag.java
index 7dac5a2..fa25728 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/samtools/SAMTag.java
@@ -41,6 +41,7 @@ public enum SAMTag {
+ FT,
GC, // for backwards compatibility
GS, // for backwards compatibility
@@ -60,6 +61,8 @@ public enum SAMTag {
+ OF,
+ OR,
diff --git a/src/java/htsjdk/samtools/SAMUtils.java b/src/java/htsjdk/samtools/SAMUtils.java
index b751ef0..14e2246 100644
--- a/src/java/htsjdk/samtools/SAMUtils.java
+++ b/src/java/htsjdk/samtools/SAMUtils.java
@@ -23,6 +23,8 @@
package htsjdk.samtools;
+import htsjdk.samtools.util.BinaryCodec;
+import htsjdk.samtools.util.CigarUtil;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.RuntimeEOFException;
@@ -546,6 +548,14 @@ public final class SAMUtils {
+ /**
+ * Strip mapping information from a SAMRecord.
+ *
+ * WARNING: by clearing the secondary and supplementary flags,
+ * this may have the affect of producing multiple distinct records with the
+ * same read name and flags, which may lead to invalid SAM/BAM output.
+ * Callers of this method should make sure to deal with this issue.
+ */
public static void makeReadUnmapped(final SAMRecord rec) {
if (rec.getReadNegativeStrandFlag()) {
@@ -558,10 +568,33 @@ public final class SAMUtils {
+ rec.setSupplementaryAlignmentFlag(false);
+ /**
+ * Strip mapping information from a SAMRecord, but preserve it in the 'O' tags if it isn't already set.
+ */
+ public static void makeReadUnmappedWithOriginalTags(final SAMRecord rec) {
+ if (!hasOriginalMappingInformation(rec)) {
+ rec.setAttribute(SAMTag.OP.name(), rec.getAlignmentStart());
+ rec.setAttribute(SAMTag.OC.name(), rec.getCigarString());
+ rec.setAttribute(SAMTag.OF.name(), rec.getFlags());
+ rec.setAttribute(SAMTag.OR.name(), rec.getReferenceName());
+ }
+ makeReadUnmapped(rec);
+ }
+ /**
+ * See if any tags pertaining to original mapping information have been set.
+ */
+ public static boolean hasOriginalMappingInformation(final SAMRecord rec) {
+ return rec.getAttribute(SAMTag.OP.name()) != null
+ || rec.getAttribute(SAMTag.OC.name()) != null
+ || rec.getAttribute(SAMTag.OF.name()) != null
+ || rec.getAttribute(SAMTag.OR.name()) != null;
+ }
* Determines if a cigar has any element that both consumes read bases and consumes reference bases
@@ -579,9 +612,15 @@ public final class SAMUtils {
* Tests if the provided record is mapped entirely beyond the end of the reference (i.e., the alignment start is greater than the
* length of the sequence to which the record is mapped).
+ * @param record must not have a null SamFileHeader
public static boolean recordMapsEntirelyBeyondEndOfReference(final SAMRecord record) {
- return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart();
+ if (record.getHeader() == null) {
+ throw new SAMException("A non-null SAMHeader is required to resolve the mapping position: " + record.getReadName());
+ }
+ else {
+ return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart();
+ }
@@ -865,14 +904,22 @@ public final class SAMUtils {
// Don't know line number, and don't want to force read name to be decoded.
List<SAMValidationError> ret = cigar.isValid(rec.getReadName(), recordNumber);
if (referenceIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- final SAMSequenceRecord sequence = rec.getHeader().getSequence(referenceIndex);
- final int referenceSequenceLength = sequence.getSequenceLength();
- for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
- if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
- if (ret == null) ret = new ArrayList<SAMValidationError>();
- ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
- cigarTypeName + " M operator maps off end of reference", rec.getReadName(), recordNumber));
- break;
+ SAMFileHeader samHeader = rec.getHeader();
+ if (null == samHeader) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_HEADER,
+ cigarTypeName + " A non-null SAMHeader is required to validate cigar elements for: ", rec.getReadName(), recordNumber));
+ }
+ else {
+ final SAMSequenceRecord sequence = samHeader.getSequence(referenceIndex);
+ final int referenceSequenceLength = sequence.getSequenceLength();
+ for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
+ if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
+ cigarTypeName + " M operator maps off end of reference", rec.getReadName(), recordNumber));
+ break;
+ }
@@ -939,4 +986,113 @@ public final class SAMUtils {
else name = name + ":" + record.getReadName();
return name;
+ /**
+ * Returns the number of bases that need to be clipped due to overlapping pairs. If the record is not paired,
+ * or the given record's start position is greater than its mate's start position, zero is automatically returned.
+ * NB: This method assumes that the record's mate is not contained within the given record's alignment.
+ *
+ * @param rec
+ * @return the number of bases at the end of the read that need to be clipped such that there would be no overlapping bases with its mate.
+ * Read bases include only those from insertion, match, or mismatch Cigar operators.
+ */
+ public static int getNumOverlappingAlignedBasesToClip(final SAMRecord rec) {
+ // NB: ignores how to handle supplemental records when present for both ends by just using the mate information in the record.
+ if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) return 0;
+ // Only clip records that are left-most in genomic order and overlapping.
+ if (rec.getMateAlignmentStart() < rec.getAlignmentStart()) return 0; // right-most, so ignore.
+ // Find the number of read bases after the given mate's alignment start.
+ int numBasesToClip = 0;
+ final int refStartPos = rec.getMateAlignmentStart(); // relative reference position after which we should start clipping
+ final Cigar cigar = rec.getCigar();
+ int refPos = rec.getAlignmentStart();
+ for (final CigarElement el : cigar.getCigarElements()) {
+ final CigarOperator operator = el.getOperator();
+ final int refBasesLength = operator.consumesReferenceBases() ? el.getLength() : 0;
+ if (refStartPos <= refPos + refBasesLength - 1) { // add to clipped bases
+ if (operator == CigarOperator.MATCH_OR_MISMATCH) { // M
+ if (refStartPos < refPos) numBasesToClip += refBasesLength; // use all of the bases
+ else numBasesToClip += (refPos + refBasesLength) - refStartPos; // since the mate's alignment start can be in the middle of a cigar element
+ }
+ else if (operator == CigarOperator.SOFT_CLIP || operator == CigarOperator.HARD_CLIP || operator == CigarOperator.PADDING || operator == CigarOperator.SKIPPED_REGION) {
+ // ignore
+ }
+ else { // ID
+ numBasesToClip += operator.consumesReadBases() ? el.getLength() : 0; // clip all the bases in the read from this operator
+ }
+ }
+ refPos += refBasesLength;
+ }
+ if (numBasesToClip < 0) return 0; // left-most but not overlapping
+ return numBasesToClip;
+ }
+ /**
+ * Returns a (possibly new) record that has been clipped if isa mapped paired and has overlapping bases with its mate.
+ * See {@link #getNumOverlappingAlignedBasesToClip(SAMRecord)} for how the number of overlapping bases is computed.
+ * NB: this does not properly consider a cigar like: 100M20S10H.
+ * NB: This method assumes that the record's mate is not contained within the given record's alignment.
+ *
+ * @param record the record from which to clip bases.
+ * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly.
+ * @return
+ */
+ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, final boolean noSideEffects) {
+ return clipOverlappingAlignedBases(record, getNumOverlappingAlignedBasesToClip(record), noSideEffects);
+ }
+ /**
+ * Returns a (possibly new) SAMRecord with the given number of bases soft-clipped at the end of the read if is a mapped
+ * paired and has overlapping bases with its mate.
+ * NB: this does not properly consider a cigar like: 100M20S10H.
+ * NB: This method assumes that the record's mate is not contained within the given record's alignment.
+ *
+ * @param record the record from which to clip bases.
+ * @param numOverlappingBasesToClip the number of bases to clip at the end of the read.
+ * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly.
+ * @return
+ */
+ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, final int numOverlappingBasesToClip, final boolean noSideEffects) {
+ // NB: ignores how to handle supplemental records when present for both ends by just using the mate information in the record.
+ if (numOverlappingBasesToClip <= 0 || record.getReadUnmappedFlag() || record.getMateUnmappedFlag()) return record;
+ try {
+ final SAMRecord rec = noSideEffects ? ((SAMRecord)record.clone()) : record;
+ // watch out for when the second read overlaps all of the first read
+ if (rec.getMateAlignmentStart() <= rec.getAlignmentStart()) { // make it unmapped
+ rec.setReadUnmappedFlag(true);
+ return rec;
+ }
+ // 1-based index of first base in read to clip.
+ int clipFrom = rec.getReadLength() - numOverlappingBasesToClip + 1;
+ // we have to check if the last cigar element is soft-clipping, so we can subtract that from clipFrom
+ final CigarElement cigarElement = rec.getCigar().getCigarElement(rec.getCigarLength()-1);
+ if (CigarOperator.SOFT_CLIP == cigarElement.getOperator()) clipFrom -= cigarElement.getLength();
+ // FIXME: does not properly consider a cigar like: 100M20S10H
+ // clip it, clip it good
+ rec.setCigar(new Cigar(CigarUtil.softClipEndOfRead(clipFrom, rec.getCigar().getCigarElements())));
+ return rec;
+ } catch (final CloneNotSupportedException e) {
+ throw new SAMException(e.getMessage(), e);
+ }
+ }
+ /**
+ * Checks if a long attribute value is within the allowed range of a 32-bit unsigned integer.
+ *
+ * @param value a long value to check
+ * @return true if value is >= 0 and <= {@link BinaryCodec#MAX_UINT}, and false otherwise
+ */
+ public static boolean isValidUnsignedIntegerAttribute(long value) {
+ return value >= 0 && value <= BinaryCodec.MAX_UINT;
+ }
diff --git a/src/java/htsjdk/samtools/SRAFileReader.java b/src/java/htsjdk/samtools/SRAFileReader.java
new file mode 100644
index 0000000..14d7df8
--- /dev/null
+++ b/src/java/htsjdk/samtools/SRAFileReader.java
@@ -0,0 +1,306 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+ * Created by andrii.nikitiuk on 8/11/15.
+ */
+package htsjdk.samtools;
+import htsjdk.samtools.sra.ReferenceCache;
+import htsjdk.samtools.sra.SRAAccession;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.SamReader.Type;
+import htsjdk.samtools.util.Log;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.ReadGroupIterator;
+import ngs.ReferenceIterator;
+import ngs.Reference;
+import java.util.ArrayList;
+import java.util.List;
+public class SRAFileReader extends SamReader.ReaderImplementation implements SamReader.Indexing {
+ private static final Log log = Log.getInstance(SRAFileReader.class);
+ private SRAAccession acc;
+ private SAMFileHeader virtualHeader;
+ private ReadCollection run;
+ private ValidationStringency validationStringency;
+ private SRAIterator.RecordRangeInfo recordRangeInfo;
+ private SRAIndex index;
+ private ReferenceCache cachedReferences;
+ public SRAFileReader(final SRAAccession acc) {
+ this.acc = acc;
+ if (!acc.isValid()) {
+ throw new IllegalArgumentException("Invalid SRA accession was passed to SRA reader: " + acc);
+ }
+ try {
+ run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString());
+ virtualHeader = loadSamHeader();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ cachedReferences = new ReferenceCache(run, virtualHeader);
+ recordRangeInfo = SRAIterator.getRecordsRangeInfo(run);
+ index = new SRAIndex(virtualHeader, recordRangeInfo);
+ }
+ @Override
+ public Type type() {
+ return Type.SRA_TYPE;
+ }
+ @Override
+ public boolean hasIndex() {
+ return true;
+ }
+ @Override
+ public BAMIndex getIndex() {
+ return index;
+ }
+ @Override
+ public SAMFileHeader getFileHeader() {
+ return virtualHeader;
+ }
+ @Override
+ public CloseableIterator<SAMRecord> getIterator() {
+ return getIterator(getFilePointerSpanningReads());
+ }
+ @Override
+ public CloseableIterator<SAMRecord> getIterator(SAMFileSpan chunks) {
+ if (run == null) {
+ throw new RuntimeException("Cannot create iterator - SRA run is uninitialized");
+ }
+ if (virtualHeader == null) {
+ throw new RuntimeException("Cannot create iterator - SAM file header is uninitialized");
+ }
+ List<Chunk> chunkList = ((BAMFileSpan) chunks).getChunks();
+ final SRAIterator newIterator = new SRAIterator(acc, run, virtualHeader, cachedReferences, recordRangeInfo, chunkList);
+ if (validationStringency != null) {
+ newIterator.setValidationStringency(validationStringency);
+ }
+ return newIterator;
+ }
+ @Override
+ public SAMFileSpan getFilePointerSpanningReads() {
+ if (recordRangeInfo.getTotalRecordRangeLength() <= 0) {
+ throw new RuntimeException("Cannot create file span - SRA file is empty");
+ }
+ return new BAMFileSpan(new Chunk(0, recordRangeInfo.getTotalRecordRangeLength()));
+ }
+ @Override
+ public CloseableIterator<SAMRecord> query(QueryInterval[] intervals, boolean contained) {
+ BAMFileSpan span = new BAMFileSpan();
+ BrowseableBAMIndex index = getBrowseableIndex();
+ for (QueryInterval interval : intervals) {
+ BAMFileSpan intervalSpan;
+ if (!contained) {
+ intervalSpan = index.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end);
+ } else {
+ intervalSpan = getSpanContained(interval.referenceIndex, interval.start, interval.end);
+ }
+ span.add(intervalSpan);
+ }
+ return getIterator(span);
+ }
+ @Override
+ public CloseableIterator<SAMRecord> queryAlignmentStart(String sequence, int start) {
+ int sequenceIndex = virtualHeader.getSequenceIndex(sequence);
+ if (sequenceIndex == -1) {
+ throw new IllegalArgumentException("Unknown sequence '" + sequence + "' was passed to SRAFileReader");
+ }
+ return getIterator(getSpanContained(sequenceIndex, start, -1));
+ }
+ @Override
+ public CloseableIterator<SAMRecord> queryUnmapped() {
+ if (recordRangeInfo.getTotalRecordRangeLength() <= 0) {
+ throw new RuntimeException("Cannot create file span - SRA file is empty");
+ }
+ SAMFileSpan span = new BAMFileSpan(new Chunk(recordRangeInfo.getTotalReferencesLength(), recordRangeInfo.getTotalRecordRangeLength()));
+ return getIterator(span);
+ }
+ @Override
+ public void close() { }
+ @Override
+ public ValidationStringency getValidationStringency() {
+ return validationStringency;
+ }
+ /** INDEXING */
+ /**
+ * Returns true if the supported index is browseable, meaning the bins in it can be traversed
+ * and chunk data inspected and retrieved.
+ *
+ * @return True if the index supports the BrowseableBAMIndex interface. False otherwise.
+ */
+ @Override
+ public boolean hasBrowseableIndex() {
+ return true;
+ }
+ /**
+ * Gets an index tagged with the BrowseableBAMIndex interface. Throws an exception if no such
+ * index is available.
+ *
+ * @return An index with a browseable interface, if possible.
+ * @throws SAMException if no such index is available.
+ */
+ @Override
+ public BrowseableBAMIndex getBrowseableIndex() {
+ return index;
+ }
+ /**
+ * Iterate through the given chunks in the file.
+ *
+ * @param chunks List of chunks for which to retrieve data.
+ * @return An iterator over the given chunks.
+ */
+ @Override
+ public SAMRecordIterator iterator(final SAMFileSpan chunks) {
+ CloseableIterator<SAMRecord> it = getIterator(chunks);
+ if (it == null) {
+ return null;
+ }
+ return (SAMRecordIterator) it;
+ }
+ /** ReaderImplementation */
+ @Override
+ void enableFileSource(final SamReader reader, final boolean enabled) {
+ log.info("enableFileSource is not supported");
+ }
+ @Override
+ void enableIndexCaching(final boolean enabled) {
+ log.info("enableIndexCaching is not supported");
+ }
+ @Override
+ void enableIndexMemoryMapping(final boolean enabled) {
+ log.info("enableIndexMemoryMapping is not supported");
+ }
+ @Override
+ void enableCrcChecking(final boolean enabled) {
+ log.info("enableCrcChecking is not supported");
+ }
+ @Override
+ void setSAMRecordFactory(final SAMRecordFactory factory) {
+ log.info("setSAMRecordFactory is not supported");
+ }
+ @Override
+ void setValidationStringency(final ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
+ protected SRAIterator.RecordRangeInfo getRecordsRangeInfo() {
+ return recordRangeInfo;
+ }
+ private SAMFileHeader loadSamHeader() throws ErrorMsg {
+ if (run == null) {
+ throw new RuntimeException("Cannot load SAMFileHeader - SRA run is uninitialized");
+ }
+ String runName = run.getName();
+ SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ ReadGroupIterator itRg = run.getReadGroups();
+ while (itRg.nextReadGroup()) {
+ String rgName = itRg.getName();
+ if (rgName.isEmpty())
+ rgName = runName;
+ SAMReadGroupRecord rg = new SAMReadGroupRecord(rgName);
+ rg.setSample(runName);
+ header.addReadGroup(rg);
+ }
+ ReferenceIterator itRef = run.getReferences();
+ while (itRef.nextReference()) {
+ header.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength()));
+ }
+ return header;
+ }
+ private BAMFileSpan getSpanContained(int sequenceIndex, long start, long end) {
+ if (recordRangeInfo.getTotalRecordRangeLength() <= 0) {
+ throw new RuntimeException("Cannot create file span - SRA file is empty");
+ }
+ long sequenceOffset = recordRangeInfo.getReferenceOffsets().get(sequenceIndex);
+ long sequenceLength = recordRangeInfo.getReferenceLengthsAligned().get(sequenceIndex);
+ if (end == -1) {
+ end = sequenceLength;
+ }
+ if (start > sequenceLength) {
+ throw new IllegalArgumentException("Sequence start position is larger than its length");
+ }
+ if (end > sequenceLength) {
+ throw new IllegalArgumentException("Sequence end position is larger than its length");
+ }
+ return new BAMFileSpan(new Chunk(sequenceOffset + start, sequenceOffset + end));
+ }
diff --git a/src/java/htsjdk/samtools/SRAIndex.java b/src/java/htsjdk/samtools/SRAIndex.java
new file mode 100644
index 0000000..b74ee63
--- /dev/null
+++ b/src/java/htsjdk/samtools/SRAIndex.java
@@ -0,0 +1,257 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+ * Emulates BAM index so that we can request chunks of records from SRAFileReader
+ *
+ * Here is how it works:
+ * SRA allows reading of alignments by Reference position fast, so we divide our "file" range for alignments as
+ * a length of all references. Reading unaligned reads is then fast if we use read positions for lookup and (internally)
+ * filter out aligned fragments.
+ *
+ * Total SRA "file" range is calculated as sum of all reference lengths plus number of reads (both aligned and unaligned)
+ * in SRA archive.
+ *
+ * Now, we can use Chunks to lookup for aligned and unaligned fragments.
+ *
+ * We emulate BAM index bins by mapping SRA reference positions to bin numbers.
+ * And then we map from bin number to list of chunks, which represent SRA "file" positions (which are simply reference
+ * positions).
+ *
+ * We only emulate last level of BAM index bins (and they refer to a portion of reference SRA_BIN_SIZE bases long).
+ * For all other bins RuntimeException will be returned (but since nobody else creates bins, except SRAIndex class
+ * that is fine).
+ *
+ * But since the last level of bins was not meant to refer to fragments that only partially overlap bin reference
+ * positions, we also return chunk that goes 5000 bases left before beginning of the bin to assure fragments that
+ * start before the bin positions but still overlap with it can be retrieved by SRA reader.
+ * Later we will add support to NGS API to get a maximum number of bases that we need to go left to retrieve such fragments.
+ *
+ * Created by andrii.nikitiuk on 9/4/15.
+ */
+public class SRAIndex implements BrowseableBAMIndex {
+ /**
+ * Number of reference bases bins in last level can represent
+ */
+ public static final int SRA_BIN_SIZE = 16 * 1024;
+ /**
+ * Chunks of that size will be created when using SRA index
+ */
+ public static final int SRA_CHUNK_SIZE = 50000;
+ /**
+ * First bin number in last level
+ */
+ private static final int SRA_BIN_INDEX_OFFSET = GenomicIndexUtil.LEVEL_STARTS[GenomicIndexUtil.LEVEL_STARTS.length - 1];
+ /**
+ * How many bases should we go left on the reference to find all fragments that start before requested interval
+ * but overlap with it
+ */
+ private static final int MAX_FRAGMENT_OVERLAP = 5000;
+ private SAMFileHeader header;
+ private SRAIterator.RecordRangeInfo recordRangeInfo;
+ /**
+ * @param header sam header
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ */
+ public SRAIndex(SAMFileHeader header, SRAIterator.RecordRangeInfo recordRangeInfo) {
+ this.header = header;
+ this.recordRangeInfo = recordRangeInfo;
+ }
+ /**
+ * Gets the size (number of bins in) a given level of a BAM index.
+ * @param levelNumber Level for which to inspect the size.
+ * @return Size of the given level.
+ */
+ @Override
+ public int getLevelSize(int levelNumber) {
+ if (levelNumber == GenomicIndexUtil.LEVEL_STARTS.length - 1)
+ return GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[levelNumber]-1;
+ else
+ return GenomicIndexUtil.LEVEL_STARTS[levelNumber+1] - GenomicIndexUtil.LEVEL_STARTS[levelNumber];
+ }
+ /**
+ * SRA only operates on bins from last level
+ * @param bin The bin for which to determine the level.
+ * @return bin level
+ */
+ @Override
+ public int getLevelForBin(Bin bin) {
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+ return GenomicIndexUtil.LEVEL_STARTS.length - 1;
+ }
+ /**
+ * Gets the first locus that this bin can index into.
+ * @param bin The bin to test.
+ * @return first position that associated with given bin number
+ */
+ @Override
+ public int getFirstLocusInBin(Bin bin) {
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+ return (bin.getBinNumber() - SRA_BIN_INDEX_OFFSET) * SRA_BIN_SIZE + 1;
+ }
+ /**
+ * Gets the last locus that this bin can index into.
+ * @param bin The bin to test.
+ * @return last position that associated with given bin number
+ */
+ @Override
+ public int getLastLocusInBin(Bin bin) {
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+ return (bin.getBinNumber() - SRA_BIN_INDEX_OFFSET + 1) * SRA_BIN_SIZE;
+ }
+ /**
+ * Provides a list of bins that contain bases at requested positions
+ * @param referenceIndex sequence of desired SAMRecords
+ * @param startPos 1-based start of the desired interval, inclusive
+ * @param endPos 1-based end of the desired interval, inclusive
+ * @return a list of bins that contain relevant data
+ */
+ @Override
+ public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) {
+ long refLength = recordRangeInfo.getReferenceLengthsAligned().get(referenceIndex);
+ // convert to chunk address space within reference
+ long refStartPos = startPos - 1;
+ long refEndPos = endPos;
+ if (refEndPos >= refLength) {
+ throw new RuntimeException("refEndPos is larger than reference length");
+ }
+ int firstBinNumber = (int)refStartPos / SRA_BIN_SIZE;
+ int lastBinNumber = (int)(refEndPos - 1) / SRA_BIN_SIZE;
+ int numberOfBins = ((int)refLength / SRA_BIN_SIZE) + 1;
+ BitSet binBitSet = new BitSet();
+ binBitSet.set(0, SRA_BIN_INDEX_OFFSET, false);
+ if (firstBinNumber > 0) {
+ binBitSet.set(SRA_BIN_INDEX_OFFSET, SRA_BIN_INDEX_OFFSET + firstBinNumber, false);
+ }
+ binBitSet.set(SRA_BIN_INDEX_OFFSET + firstBinNumber, SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, true);
+ if (lastBinNumber + 1 < numberOfBins) {
+ binBitSet.set(SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, SRA_BIN_INDEX_OFFSET + numberOfBins, false);
+ }
+ return new BinList(referenceIndex, binBitSet);
+ }
+ @Override
+ public BAMFileSpan getSpanOverlapping(Bin bin) {
+ return new BAMFileSpan(getBinChunks(bin));
+ }
+ @Override
+ public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endPos) {
+ BinList binList = getBinsOverlapping(referenceIndex, startPos, endPos);
+ BAMFileSpan result = new BAMFileSpan();
+ Set<Chunk> savedChunks = new HashSet<Chunk>();
+ for (Bin bin : binList) {
+ List<Chunk> chunks = getSpanOverlapping(bin).getChunks();
+ for (Chunk chunk : chunks) {
+ if (!savedChunks.contains(chunk)) {
+ savedChunks.add(chunk);
+ result.add(chunk);
+ }
+ }
+ }
+ return result;
+ }
+ /**
+ * @return a position where aligned fragments end
+ */
+ @Override
+ public long getStartOfLastLinearBin() {
+ int numberOfReferences = recordRangeInfo.getReferenceLengthsAligned().size();
+ long refOffset = recordRangeInfo.getReferenceOffsets().get(numberOfReferences - 1);
+ long lastChunkNumber = recordRangeInfo.getReferenceLengthsAligned().get(numberOfReferences - 1) / SRA_CHUNK_SIZE;
+ return lastChunkNumber * SRA_CHUNK_SIZE + refOffset;
+ }
+ @Override
+ public BAMIndexMetaData getMetaData(int reference) {
+ throw new UnsupportedOperationException("Getting of BAM index metadata for SRA is not implemented");
+ }
+ @Override
+ public void close() { }
+ /**
+ * @param bin Requested bin
+ * @return chunks that represent all bases of requested bin
+ */
+ private List<Chunk> getBinChunks(Bin bin) {
+ if (bin.containsChunks()) {
+ return bin.getChunkList();
+ }
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+ int binNumber = bin.getBinNumber() - SRA_BIN_INDEX_OFFSET;
+ long refOffset = recordRangeInfo.getReferenceOffsets().get(bin.getReferenceSequence());
+ // move requested position MAX_FRAGMENT_OVERLAP bases behind, so that we take all the reads that overlap requested position
+ int firstChunkCorrection = binNumber == 0 ? 0 : -MAX_FRAGMENT_OVERLAP;
+ long binGlobalOffset = binNumber * SRA_BIN_SIZE + refOffset;
+ long firstChunkNumber = (binGlobalOffset + firstChunkCorrection) / SRA_CHUNK_SIZE;
+ long lastChunkNumber = (binGlobalOffset + SRA_BIN_SIZE - 1) / SRA_CHUNK_SIZE;
+ List<Chunk> chunks = new ArrayList<Chunk>();
+ for (long chunkNumber = firstChunkNumber; chunkNumber <= lastChunkNumber; chunkNumber++) {
+ chunks.add(new Chunk(chunkNumber * SRA_CHUNK_SIZE, (chunkNumber + 1) * SRA_CHUNK_SIZE));
+ }
+ return chunks;
+ }
diff --git a/src/java/htsjdk/samtools/SRAIterator.java b/src/java/htsjdk/samtools/SRAIterator.java
new file mode 100644
index 0000000..1347e1c
--- /dev/null
+++ b/src/java/htsjdk/samtools/SRAIterator.java
@@ -0,0 +1,248 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+ * Created by andrii.nikitiuk on 8/11/15.
+ */
+package htsjdk.samtools;
+import htsjdk.samtools.SAMFileHeader.SortOrder;
+import htsjdk.samtools.sra.ReferenceCache;
+import htsjdk.samtools.sra.SRAAccession;
+import htsjdk.samtools.sra.SRAAlignmentIterator;
+import htsjdk.samtools.sra.SRAUnalignmentIterator;
+import htsjdk.samtools.sra.SRAUtils;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+ * SRA iterator which returns SAMRecords for requested list of chunks
+ */
+public class SRAIterator implements SAMRecordIterator {
+ private ValidationStringency validationStringency;
+ private SRAAccession accession;
+ private ReadCollection run;
+ private SAMFileHeader header;
+ private ReferenceCache cachedReferences;
+ private RecordRangeInfo recordRangeInfo;
+ private Iterator<Chunk> chunksIterator;
+ private Chunk currentChunk;
+ private SRAAlignmentIterator alignmentIterator;
+ private SRAUnalignmentIterator unalignmentIterator;
+ /**
+ * Describes record ranges info needed for emulating BAM index
+ */
+ public static class RecordRangeInfo {
+ private List<Long> referenceOffsets;
+ private List<Long> referenceLengthsAligned;
+ private long totalReferencesLength;
+ private long numberOfReads; // is used for unaligned read space
+ private long totalRecordRangeLength;
+ /**
+ * @param referenceLengthsAligned a list with lengths of each reference
+ * @param numberOfReads total number of reads within SRA archive
+ */
+ public RecordRangeInfo(List<Long> referenceLengthsAligned, long numberOfReads) {
+ this.numberOfReads = numberOfReads;
+ this.referenceLengthsAligned = referenceLengthsAligned;
+ referenceOffsets = new ArrayList<Long>();
+ totalReferencesLength = 0;
+ for (Long refLen : referenceLengthsAligned) {
+ referenceOffsets.add(totalReferencesLength);
+ totalReferencesLength += refLen;
+ }
+ totalRecordRangeLength = totalReferencesLength + this.numberOfReads;
+ }
+ public long getNumberOfReads() {
+ return numberOfReads;
+ }
+ public long getTotalReferencesLength() {
+ return totalReferencesLength;
+ }
+ public long getTotalRecordRangeLength() {
+ return totalRecordRangeLength;
+ }
+ public final List<Long> getReferenceOffsets() {
+ return Collections.unmodifiableList(referenceOffsets);
+ }
+ public final List<Long> getReferenceLengthsAligned() {
+ return Collections.unmodifiableList(referenceLengthsAligned);
+ }
+ }
+ /**
+ * Loads record ranges needed for emulating BAM index
+ * @param run read collection
+ * @return record ranges
+ */
+ public static RecordRangeInfo getRecordsRangeInfo(ReadCollection run) {
+ try {
+ return new RecordRangeInfo(SRAUtils.getReferencesLengthsAligned(run), SRAUtils.getNumberOfReads(run));
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ /**
+ * @param run opened read collection
+ * @param header sam header
+ * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ * @param chunks used to determine which records the iterator should return
+ */
+ public SRAIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences,
+ final RecordRangeInfo recordRangeInfo, final List<Chunk> chunks) {
+ this.accession = accession;
+ this.run = run;
+ this.header = header;
+ this.cachedReferences = cachedReferences;
+ this.recordRangeInfo = recordRangeInfo;
+ chunksIterator = chunks.iterator();
+ if (chunksIterator.hasNext()) {
+ currentChunk = chunksIterator.next();
+ }
+ hasNext();
+ }
+ /**
+ * NGS iterators implement a single method "nextObject" which return true if the operation was successful or
+ * false if there are no more objects available.
+ * That means that there is no way to check "hasNext" without actually moving the iterator forward.
+ * Because of that all the logic of moving iterator forward is actually happens in "hasNext".
+ *
+ * Here is explanation of how it works:
+ * Iterator holds a list of chunks of requested records. Here we have chunksIterator that walks though that list.
+ * We walk though that list using chunksIterator. If current chunk can represent aligned fragments then we create
+ * SRAAlignmentIterator iterator, pass the chunk into it and ask if it can find any record. If record was found,
+ * we say that we have next; otherwise we check if the chunk can represent unaligned fragments and then create
+ * SRAUnalignmentIterator if so and do the same steps as with alignemnt iterator.
+ *
+ * If record was not found in both SRAAlignmentIterator and SRAUnalignmentIterator (it is possible that reference
+ * range has no alignments or that reads range has all aligned fragment), we try the next chunk.
+ *
+ * When there are no more chunks and both iterators have no more records we return false.
+ *
+ * @return true if there are more records available
+ */
+ @Override
+ public boolean hasNext() {
+ while (currentChunk != null) {
+ if (alignmentIterator == null) {
+ if (currentChunk.getChunkStart() < recordRangeInfo.getTotalReferencesLength()) {
+ alignmentIterator = new SRAAlignmentIterator(accession, run, header, cachedReferences, recordRangeInfo, currentChunk);
+ if (validationStringency != null) {
+ alignmentIterator.setValidationStringency(validationStringency);
+ }
+ }
+ }
+ if (alignmentIterator != null && alignmentIterator.hasNext()) {
+ return true;
+ }
+ if (unalignmentIterator == null) {
+ if (currentChunk.getChunkEnd() > recordRangeInfo.getTotalReferencesLength()) {
+ unalignmentIterator = new SRAUnalignmentIterator(accession, run, header, recordRangeInfo, currentChunk);
+ if (validationStringency != null) {
+ unalignmentIterator.setValidationStringency(validationStringency);
+ }
+ }
+ }
+ if (unalignmentIterator != null && unalignmentIterator.hasNext()) {
+ return true;
+ }
+ alignmentIterator = null;
+ unalignmentIterator = null;
+ if (chunksIterator.hasNext()) {
+ currentChunk = chunksIterator.next();
+ } else {
+ currentChunk = null;
+ }
+ }
+ return false;
+ }
+ /**
+ * Call hasNext to make sure that one of inner iterators points to the next record, the retrieve the record from
+ * one of them.
+ * @return lazy SRA record
+ */
+ @Override
+ public SAMRecord next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more records are available in SRAIterator");
+ }
+ if (alignmentIterator != null && alignmentIterator.hasNext()) {
+ return alignmentIterator.next();
+ }
+ return unalignmentIterator.next();
+ }
+ @Override
+ public void remove() { throw new UnsupportedOperationException("Removal of records not implemented."); }
+ @Override
+ public void close() { }
+ @Override
+ public SAMRecordIterator assertSorted(final SortOrder sortOrder) { throw new UnsupportedOperationException("assertSorted is not implemented."); }
+ public void setValidationStringency(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ if (alignmentIterator != null) {
+ alignmentIterator.setValidationStringency(validationStringency);
+ }
+ if (unalignmentIterator != null) {
+ unalignmentIterator.setValidationStringency(validationStringency);
+ }
+ }
diff --git a/src/java/htsjdk/samtools/SamFileValidator.java b/src/java/htsjdk/samtools/SamFileValidator.java
index 5e138d3..42d2580 100644
--- a/src/java/htsjdk/samtools/SamFileValidator.java
+++ b/src/java/htsjdk/samtools/SamFileValidator.java
@@ -274,11 +274,22 @@ public class SamFileValidator {
validateMateFields(record, recordNumber);
- validateSortOrder(record, recordNumber);
+ final boolean hasValidSortOrder = validateSortOrder(record, recordNumber);
validateReadGroup(record, header);
final boolean cigarIsValid = validateCigar(record, recordNumber);
if (cigarIsValid) {
- validateNmTag(record, recordNumber);
+ try {
+ validateNmTag(record, recordNumber);
+ }
+ catch (SAMException e) {
+ if (hasValidSortOrder) {
+ // If a CRAM file has an invalid sort order, the ReferenceFileWalker will throw a
+ // SAMException due to an out of order request when retrieving reference bases during NM
+ // tag validation; rethrow the exception only if the sort order is valid, otherwise
+ // swallow the exception and carry on validating
+ throw e;
+ }
+ }
validateSecondaryBaseCalls(record, recordNumber);
validateTags(record, recordNumber);
@@ -397,9 +408,10 @@ public class SamFileValidator {
- private void validateSortOrder(final SAMRecord record, final long recordNumber) {
+ private boolean validateSortOrder(final SAMRecord record, final long recordNumber) {
final SAMRecord prev = orderChecker.getPreviousRecord();
- if (!orderChecker.isSorted(record)) {
+ boolean isValidSortOrder = orderChecker.isSorted(record);
+ if (!isValidSortOrder) {
addError(new SAMValidationError(
@@ -411,6 +423,7 @@ public class SamFileValidator {
+ return isValidSortOrder;
private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) {
diff --git a/src/java/htsjdk/samtools/SamFiles.java b/src/java/htsjdk/samtools/SamFiles.java
index 2160a5e..0112855 100644
--- a/src/java/htsjdk/samtools/SamFiles.java
+++ b/src/java/htsjdk/samtools/SamFiles.java
@@ -1,13 +1,17 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.cram.build.CramIO;
import java.io.File;
* @author mccowan
public class SamFiles {
- * Finds the index file associated with the provided SAM file. The index file must exist and be reachable to be found.
+ * Finds the index file associated with the provided SAM file. The index file must exist and be reachable to be found.
* @return The index for the provided SAM, or null if one was not found.
@@ -21,14 +25,27 @@ public class SamFiles {
if (indexFile.isFile()) {
return indexFile;
+ } else if (fileName.endsWith(CramIO.CRAM_FILE_EXTENSION)) {
+ final String crai = fileName.substring(0, fileName.length() - CramIO.CRAM_FILE_EXTENSION.length()) + CRAIIndex.CRAI_INDEX_SUFFIX;
+ indexFile = new File(samFile.getParent(), crai);
+ if (indexFile.isFile()) {
+ return indexFile;
+ }
+ indexFile = new File(samFile.getParent(), samFile.getName() + CRAIIndex.CRAI_INDEX_SUFFIX);
+ if (indexFile.isFile()) {
+ return indexFile;
+ }
// If foo.bai doesn't exist look for foo.bam.bai
indexFile = new File(samFile.getParent(), samFile.getName() + BAMIndex.BAMIndexSuffix);
if (indexFile.isFile()) {
return indexFile;
- } else {
- return null;
+ return null;
diff --git a/src/java/htsjdk/samtools/SamIndexes.java b/src/java/htsjdk/samtools/SamIndexes.java
new file mode 100644
index 0000000..a888811
--- /dev/null
+++ b/src/java/htsjdk/samtools/SamIndexes.java
@@ -0,0 +1,94 @@
+package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.seekablestream.SeekableBufferedStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+ * A helper class to read BAI and CRAI indexes. Main goal is to provide BAI stream as a sort of common API for all index types.
+ * <p/>
+ * Created by vadim on 14/08/2015.
+ */
+public enum SamIndexes {
+ BAI(BAMIndex.BAMIndexSuffix, "BAI\1".getBytes()),
+ // CRAI is gzipped text, so it's magic is same as {@link java.util.zip.GZIPInputStream.GZIP_MAGIC}
+ CRAI(CRAIIndex.CRAI_INDEX_SUFFIX, new byte[]{(byte) 0x1f, (byte) 0x8b});
+ public final String fileNameSuffix;
+ public final byte[] magic;
+ SamIndexes(final String fileNameSuffix, final byte[] magic) {
+ this.fileNameSuffix = fileNameSuffix;
+ this.magic = magic;
+ }
+ public static InputStream openIndexFileAsBaiOrNull(final File file, final SAMSequenceDictionary dictionary) throws IOException {
+ return openIndexUrlAsBaiOrNull(file.toURI().toURL(), dictionary);
+ }
+ public static InputStream openIndexUrlAsBaiOrNull(final URL url, final SAMSequenceDictionary dictionary) throws IOException {
+ if (url.getFile().toLowerCase().endsWith(BAI.fileNameSuffix.toLowerCase())) {
+ return url.openStream();
+ }
+ if (url.getFile().toLowerCase().endsWith(CRAI.fileNameSuffix.toLowerCase())) {
+ return CRAIIndex.openCraiFileAsBaiStream(url.openStream(), dictionary);
+ }
+ return null;
+ }
+ public static InputStream asBaiStreamOrNull(final InputStream inputStream, final SAMSequenceDictionary dictionary) throws IOException {
+ final BufferedInputStream bis = new BufferedInputStream(inputStream);
+ bis.mark(BAI.magic.length);
+ if (doesStreamStartWith(bis, BAI.magic)) {
+ bis.reset();
+ return bis;
+ } else {
+ bis.reset();
+ }
+ bis.mark(CRAI.magic.length);
+ if (doesStreamStartWith(bis, CRAI.magic)) {
+ bis.reset();
+ return CRAIIndex.openCraiFileAsBaiStream(bis, dictionary);
+ } else {
+ bis.reset();
+ }
+ return null;
+ }
+ public static SeekableStream asBaiSeekableStreamOrNull(final SeekableStream inputStream, final SAMSequenceDictionary dictionary) throws IOException {
+ final SeekableBufferedStream bis = new SeekableBufferedStream(inputStream);
+ bis.seek(0);
+ if (doesStreamStartWith(bis, BAI.magic)) {
+ bis.seek(0);
+ return bis;
+ }
+ bis.seek(0);
+ if (doesStreamStartWith(bis, CRAI.magic)) {
+ bis.seek(0);
+ return CRAIIndex.openCraiFileAsBaiStream(bis, dictionary);
+ } else {
+ bis.reset();
+ }
+ return null;
+ }
+ private static boolean doesStreamStartWith(final InputStream is, final byte[] bytes) throws IOException {
+ for (final byte b : bytes) {
+ if (is.read() != (0xFF & b)) {
+ return false;
+ }
+ }
+ return true;
+ }
diff --git a/src/java/htsjdk/samtools/SamInputResource.java b/src/java/htsjdk/samtools/SamInputResource.java
index 03b1ee3..2692c6e 100644
--- a/src/java/htsjdk/samtools/SamInputResource.java
+++ b/src/java/htsjdk/samtools/SamInputResource.java
@@ -1,10 +1,9 @@
package htsjdk.samtools;
-import htsjdk.samtools.seekablestream.SeekableFTPStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
-import htsjdk.samtools.seekablestream.SeekableHTTPStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
+import htsjdk.samtools.sra.SRAAccession;
import htsjdk.samtools.util.Lazy;
import htsjdk.samtools.util.RuntimeIOException;
@@ -69,6 +68,8 @@ public class SamInputResource {
/** Creates a {@link SamInputResource} reading from the provided resource, with no index. */
public static SamInputResource of(final SeekableStream seekableStream) { return new SamInputResource(new SeekableStreamInputResource(seekableStream)); }
+ public static SamInputResource of(final SRAAccession acc) { return new SamInputResource(new SRAInputResource(acc)); }
/** Creates a {@link SamInputResource} from a string specifying *either* a url or a file path */
public static SamInputResource of(final String string) {
try {
@@ -115,7 +116,7 @@ abstract class InputResource {
protected InputResource(final Type type) {this.type = type;}
enum Type {
private final Type type;
@@ -136,6 +137,9 @@ abstract class InputResource {
/** All resource types support {@link java.io.InputStream} generation. */
abstract InputStream asUnbufferedInputStream();
+ /** SRA archive resource */
+ abstract SRAAccession asSRAAccession();
public String toString() {
final String childToString;
@@ -152,6 +156,9 @@ abstract class InputResource {
case URL:
childToString = asUrl().toString();
+ childToString = asSRAAccession().toString();
+ break;
throw new IllegalStateException();
@@ -198,6 +205,11 @@ class FileInputResource extends InputResource {
public InputStream asUnbufferedInputStream() {
return asUnbufferedSeekableStream();
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
class UrlInputResource extends InputResource {
@@ -235,6 +247,11 @@ class UrlInputResource extends InputResource {
public InputStream asUnbufferedInputStream() {
return asUnbufferedSeekableStream();
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
class SeekableStreamInputResource extends InputResource {
@@ -265,6 +282,11 @@ class SeekableStreamInputResource extends InputResource {
InputStream asUnbufferedInputStream() {
return asUnbufferedSeekableStream();
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
class InputStreamInputResource extends InputResource {
@@ -295,4 +317,44 @@ class InputStreamInputResource extends InputResource {
InputStream asUnbufferedInputStream() {
return inputStreamResource;
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
+class SRAInputResource extends InputResource {
+ final SRAAccession accession;
+ SRAInputResource(final SRAAccession accession) {
+ super(Type.SRA_ACCESSION);
+ this.accession = accession;
+ }
+ @Override
+ File asFile() {
+ return null;
+ }
+ @Override
+ URL asUrl() {
+ return null;
+ }
+ @Override
+ SeekableStream asUnbufferedSeekableStream() {
+ return null;
+ }
+ @Override
+ InputStream asUnbufferedInputStream() {
+ return null;
+ }
+ @Override
+ public SRAAccession asSRAAccession() {
+ return accession;
+ }
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/SamPairUtil.java b/src/java/htsjdk/samtools/SamPairUtil.java
index 5c3ea99..5daf6e6 100644
--- a/src/java/htsjdk/samtools/SamPairUtil.java
+++ b/src/java/htsjdk/samtools/SamPairUtil.java
@@ -25,9 +25,7 @@
package htsjdk.samtools;
import htsjdk.samtools.util.PeekableIterator;
-import htsjdk.samtools.util.ProgressLogger;
-import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -52,9 +50,9 @@ public class SamPairUtil {
FR, // ( 5' --F--> <--R-- 5' ) - aka. innie
RF, // ( <--R-- 5' 5' --F--> ) - aka. outie
- TANDEM; // ( 5' --F--> 5' --F--> or ( <--R-- 5' <--R-- 5' )
+ TANDEM // ( 5' --F--> 5' --F--> or ( <--R-- 5' <--R-- 5' )
- };
+ }
@@ -64,7 +62,7 @@ public class SamPairUtil {
* @throws IllegalArgumentException If the record is not a paired read, or
* one or both reads are unmapped.
- public static PairOrientation getPairOrientation(SAMRecord r)
+ public static PairOrientation getPairOrientation(final SAMRecord r)
final boolean readIsOnReverseStrand = r.getReadNegativeStrandFlag();
@@ -185,8 +183,8 @@ public class SamPairUtil {
* Write the mate info for two SAMRecords
- * @param rec1 the first SAM record
- * @param rec2 the second SAM record
+ * @param rec1 the first SAM record. Must have a non-null SAMFileHeader.
+ * @param rec2 the second SAM record. Must have a non-null SAMFileHeader.
* @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
public static void setMateInfo(final SAMRecord rec1, final SAMRecord rec2, final boolean setMateCigar) {
@@ -361,6 +359,7 @@ public class SamPairUtil {
private final Queue<SAMRecord> records = new LinkedList<SAMRecord>();
private final boolean setMateCigar;
+ private final boolean ignoreMissingMates;
private long numMateCigarsAdded = 0;
@@ -376,8 +375,18 @@ public class SamPairUtil {
* @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
public SetMateInfoIterator(final Iterator<SAMRecord> iterator, final boolean setMateCigar) {
+ this(iterator, setMateCigar, false);
+ }
+ /**
+ * @param iterator the iterator to wrap
+ * @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
+ * @param ignoreMissingMates set this to true if we are to ignore missing mates, otherwise an exception will be thrown when a missing mate is encountered
+ */
+ public SetMateInfoIterator(final Iterator<SAMRecord> iterator, final boolean setMateCigar, final boolean ignoreMissingMates) {
this.setMateCigar = setMateCigar;
+ this.ignoreMissingMates = ignoreMissingMates;
@@ -414,8 +423,7 @@ public class SamPairUtil {
throw new SAMException("Found two records that are paired, not supplementary, and first of the pair");
firstPrimaryRecord = record;
- }
- else if (record.getSecondOfPairFlag()) {
+ } else if (record.getSecondOfPairFlag()) {
if (null != secondPrimaryRecord) {
throw new SAMException("Found two records that are paired, not supplementary, and second of the pair");
@@ -426,27 +434,34 @@ public class SamPairUtil {
+ // TODO: should we check that we do not have a mix of paired and fragment reads?
// we must find both records to update the mate info
if (null != firstPrimaryRecord && null != secondPrimaryRecord) {
// Update mate info
SamPairUtil.setMateInfo(firstPrimaryRecord, secondPrimaryRecord, this.setMateCigar);
if (this.setMateCigar) this.numMateCigarsAdded += 2;
- }
- // Set mate information on supplemental records
- if (containsSupplementalRecord) {
- for (final SAMRecord record : records) {
- if (record.getReadPairedFlag() && record.getSupplementaryAlignmentFlag()) {
- if (record.getFirstOfPairFlag()) {
- SamPairUtil.setMateInformationOnSupplementalAlignment(record, secondPrimaryRecord, this.setMateCigar);
- }
- else {
- SamPairUtil.setMateInformationOnSupplementalAlignment(record, firstPrimaryRecord, this.setMateCigar);
+ // Set mate information on supplemental records
+ if (containsSupplementalRecord) {
+ for (final SAMRecord record : records) {
+ if (record.getReadPairedFlag() && record.getSupplementaryAlignmentFlag()) {
+ if (record.getFirstOfPairFlag()) {
+ SamPairUtil.setMateInformationOnSupplementalAlignment(record, secondPrimaryRecord, this.setMateCigar);
+ } else {
+ SamPairUtil.setMateInformationOnSupplementalAlignment(record, firstPrimaryRecord, this.setMateCigar);
+ }
+ this.numMateCigarsAdded++;
- this.numMateCigarsAdded++;
+ } else if (!this.ignoreMissingMates) {
+ if (null != firstPrimaryRecord && firstPrimaryRecord.getReadPairedFlag()) {
+ throw new SAMException("Missing second read of pair: " + firstPrimaryRecord.getReadName());
+ } else if (null != secondPrimaryRecord && secondPrimaryRecord.getReadPairedFlag()) {
+ throw new SAMException("Missing first read of pair: " + secondPrimaryRecord.getReadName());
+ }
diff --git a/src/java/htsjdk/samtools/SamReader.java b/src/java/htsjdk/samtools/SamReader.java
index 9ded4a3..9493593 100644
--- a/src/java/htsjdk/samtools/SamReader.java
+++ b/src/java/htsjdk/samtools/SamReader.java
@@ -57,6 +57,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
+ public static Type SRA_TYPE = new TypeImpl("SRA", "sra", null);
public static Type CRAM_TYPE = new TypeImpl("CRAM", "cram", "crai");
public static Type BAM_TYPE = new TypeImpl("BAM", "bam", "bai");
public static Type SAM_TYPE = new TypeImpl("SAM", "sam", null);
diff --git a/src/java/htsjdk/samtools/SamReaderFactory.java b/src/java/htsjdk/samtools/SamReaderFactory.java
index 4ab92af..5403379 100644
--- a/src/java/htsjdk/samtools/SamReaderFactory.java
+++ b/src/java/htsjdk/samtools/SamReaderFactory.java
@@ -9,6 +9,7 @@ import java.util.zip.GZIPInputStream;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.sra.SRAAccession;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.BlockCompressedStreamConstants;
import htsjdk.samtools.util.CloserUtil;
@@ -241,6 +242,8 @@ public abstract class SamReaderFactory {
} else {
throw new SAMFormatException("Unrecognized file format: " + data.asUnbufferedSeekableStream());
+ } else if (type == InputResource.Type.SRA_ACCESSION) {
+ primitiveSamReader = new SRAFileReader(data.asSRAAccession());
} else {
InputStream bufferedStream =
@@ -262,19 +265,18 @@ public abstract class SamReaderFactory {
} else if (SamStreams.isGzippedSAMFile(bufferedStream)) {
primitiveSamReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory);
} else if (SamStreams.isCRAMFile(bufferedStream)) {
+ if (referenceSource == null && Defaults.REFERENCE_FASTA != null) referenceSource = new ReferenceSource(Defaults.REFERENCE_FASTA);
if (sourceFile == null || !sourceFile.isFile()) {
- sourceFile = null;
+ primitiveSamReader = new CRAMFileReader(bufferedStream, indexFile, referenceSource, validationStringency);
} else {
- bufferedStream = null;
+ primitiveSamReader = new CRAMFileReader(sourceFile, indexFile, referenceSource, validationStringency);
- // Always attempt to pass in the index. If it is null, that's fine. If the reference isn't supplied, use the default.
- if (referenceSource != null) {
- primitiveSamReader = new CRAMFileReader(sourceFile, indexMaybe == null ? null : indexMaybe.asFile(), referenceSource);
- } else {
- primitiveSamReader = new CRAMFileReader(sourceFile, indexMaybe == null ? null : indexMaybe.asFile(), new ReferenceSource(Defaults.REFERENCE_FASTA));
+ } else if (sourceFile != null && SRAAccession.isValid(sourceFile.getPath())) {
+ if (bufferedStream != null) {
+ bufferedStream.close();
+ primitiveSamReader = new SRAFileReader(new SRAAccession(sourceFile.getPath()));
} else {
if (indexDefined) {
@@ -325,6 +327,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
underlyingReader.enableFileSource(reader, true);
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ underlyingReader.enableFileSource(reader, true);
+ }
@@ -349,6 +356,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ underlyingReader.enableIndexCaching(true);
+ }
@@ -373,6 +385,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ underlyingReader.enableIndexMemoryMapping(false);
+ }
@@ -394,6 +411,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
logDebugIgnoringOption(reader, this);
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ logDebugIgnoringOption(reader, this);
+ }
@@ -416,6 +438,11 @@ public abstract class SamReaderFactory {
logDebugIgnoringOption(reader, this);
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ logDebugIgnoringOption(reader, this);
+ }
public static EnumSet<Option> DEFAULTS = EnumSet.noneOf(Option.class);
@@ -429,6 +456,8 @@ public abstract class SamReaderFactory {
applyTo((SAMTextReader) underlyingReader, reader);
} else if (underlyingReader instanceof CRAMFileReader) {
applyTo((CRAMFileReader) underlyingReader, reader);
+ } else if (underlyingReader instanceof SRAFileReader) {
+ applyTo((SRAFileReader) underlyingReader, reader);
} else {
throw new IllegalArgumentException(String.format("Unrecognized reader type: %s.", underlyingReader.getClass()));
@@ -446,5 +475,7 @@ public abstract class SamReaderFactory {
abstract void applyTo(final SAMTextReader underlyingReader, final SamReader reader);
abstract void applyTo(final CRAMFileReader underlyingReader, final SamReader reader);
+ abstract void applyTo(final SRAFileReader underlyingReader, final SamReader reader);
diff --git a/src/java/htsjdk/samtools/SamStreams.java b/src/java/htsjdk/samtools/SamStreams.java
index cea099d..173eb7c 100644
--- a/src/java/htsjdk/samtools/SamStreams.java
+++ b/src/java/htsjdk/samtools/SamStreams.java
@@ -39,6 +39,7 @@ public class SamStreams {
return Arrays.equals(buffer, CramHeader.MAGIC);
* @param stream stream.markSupported() must be true
* @return true if this looks like a BAM file.
diff --git a/src/java/htsjdk/samtools/TextTagCodec.java b/src/java/htsjdk/samtools/TextTagCodec.java
index 109555d..ceec61a 100644
--- a/src/java/htsjdk/samtools/TextTagCodec.java
+++ b/src/java/htsjdk/samtools/TextTagCodec.java
@@ -23,6 +23,7 @@
package htsjdk.samtools;
+import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.Iso8601Date;
import htsjdk.samtools.util.StringUtil;
@@ -72,8 +73,9 @@ public class TextTagCodec {
value = getArrayType(value, false) + "," + encodeArrayValue(value);
} else if (tagType == 'i') {
final long longVal = ((Number) value).longValue();
- if (longVal > Integer.MAX_VALUE || longVal < Integer.MIN_VALUE) {
- throw new SAMFormatException("Value for tag " + tagName + " cannot be stored in an Integer: " + longVal);
+ // as the spec says: [-2^31, 2^32)
+ if (longVal < Integer.MIN_VALUE || longVal > BinaryCodec.MAX_UINT) {
+ throw new IllegalArgumentException("Value for tag " + tagName + " cannot be stored in either a signed or unsigned 32-bit integer: " + longVal);
@@ -182,11 +184,22 @@ public class TextTagCodec {
return stringVal.charAt(0);
} else if (type.equals("i")) {
+ final long lValue;
try {
- return new Integer(stringVal);
+ lValue = Long.valueOf(stringVal);
} catch (NumberFormatException e) {
throw new SAMFormatException("Tag of type i should have signed decimal value");
+ if (lValue >= Integer.MIN_VALUE && lValue <= Integer.MAX_VALUE) {
+ return (int) lValue;
+ }
+ else if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
+ return lValue;
+ }
+ else {
+ throw new SAMFormatException("Integer is out of range for both a 32-bit signed and unsigned integer: " + stringVal);
+ }
} else if (type.equals("f")) {
try {
return new Float(stringVal);
diff --git a/src/java/htsjdk/samtools/cram/CRAIEntry.java b/src/java/htsjdk/samtools/cram/CRAIEntry.java
new file mode 100644
index 0000000..0c7da6e
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/CRAIEntry.java
@@ -0,0 +1,148 @@
+package htsjdk.samtools.cram;
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.Slice;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+ * A class representing CRAI index entry: file and alignment offsets for each slice.
+ * Created by vadim on 10/08/2015.
+ */
+public class CRAIEntry implements Comparable<CRAIEntry>, Cloneable {
+ public int sequenceId;
+ public int alignmentStart;
+ public int alignmentSpan;
+ public long containerStartOffset;
+ public int sliceOffset;
+ public int sliceSize;
+ public int sliceIndex;
+ public CRAIEntry() {
+ }
+ public static List<CRAIEntry> fromContainer(final Container container) {
+ final List<CRAIEntry> entries = new ArrayList<CRAIEntry>(container.slices.length);
+ for (int i = 0; i < container.slices.length; i++) {
+ final Slice s = container.slices[i];
+ final CRAIEntry e = new CRAIEntry();
+ e.sequenceId = s.sequenceId;
+ e.alignmentStart = s.alignmentStart;
+ e.alignmentSpan = s.alignmentSpan;
+ e.containerStartOffset = s.containerOffset;
+ e.sliceOffset = container.landmarks[i];
+ e.sliceSize = s.size;
+ e.sliceIndex = i;
+ entries.add(e);
+ }
+ return entries;
+ }
+ public static CRAIEntry fromCraiLine(final String line) {
+ return new CRAIEntry(line);
+ }
+ public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException {
+ final String[] chunks = line.split("\t");
+ if (chunks.length != 6) {
+ throw new CRAIIndex.CRAIIndexException("Expecting 6 columns but got " + chunks.length);
+ }
+ try {
+ sequenceId = Integer.parseInt(chunks[0]);
+ alignmentStart = Integer.parseInt(chunks[1]);
+ alignmentSpan = Integer.parseInt(chunks[2]);
+ containerStartOffset = Long.parseLong(chunks[3]);
+ sliceOffset = Integer.parseInt(chunks[4]);
+ sliceSize = Integer.parseInt(chunks[5]);
+ } catch (final NumberFormatException e) {
+ throw new CRAIIndex.CRAIIndexException(e);
+ }
+ }
+ @Override
+ public String toString() {
+ return String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan,
+ containerStartOffset, sliceOffset, sliceSize);
+ }
+ @Override
+ public int compareTo(final CRAIEntry o) {
+ if (o == null) {
+ return 1;
+ }
+ if (sequenceId != o.sequenceId) {
+ return sequenceId - o.sequenceId;
+ }
+ if (alignmentStart != o.alignmentStart) {
+ return alignmentStart - o.alignmentStart;
+ }
+ return (int) (containerStartOffset - o.containerStartOffset);
+ }
+ @Override
+ public CRAIEntry clone() throws CloneNotSupportedException {
+ super.clone();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = sequenceId;
+ entry.alignmentStart = alignmentStart;
+ entry.alignmentSpan = alignmentSpan;
+ entry.containerStartOffset = containerStartOffset;
+ entry.sliceOffset = sliceOffset;
+ entry.sliceSize = sliceSize;
+ return entry;
+ }
+ public static Comparator<CRAIEntry> byEnd = new Comparator<CRAIEntry>() {
+ @Override
+ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
+ if (o1.sequenceId != o2.sequenceId) {
+ return o2.sequenceId - o1.sequenceId;
+ }
+ if (o1.alignmentStart + o1.alignmentSpan != o2.alignmentStart + o2.alignmentSpan) {
+ return o1.alignmentStart + o1.alignmentSpan - o2.alignmentStart - o2.alignmentSpan;
+ }
+ return (int) (o1.containerStartOffset - o2.containerStartOffset);
+ }
+ };
+ public static final Comparator<CRAIEntry> byStart = new Comparator<CRAIEntry>() {
+ @Override
+ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
+ if (o1.sequenceId != o2.sequenceId) {
+ return o2.sequenceId - o1.sequenceId;
+ }
+ if (o1.alignmentStart != o2.alignmentStart) {
+ return o1.alignmentStart - o2.alignmentStart;
+ }
+ return (int) (o1.containerStartOffset - o2.containerStartOffset);
+ }
+ };
+ public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) {
+ if (e0.sequenceId != e1.sequenceId) {
+ return false;
+ }
+ if (e0.sequenceId < 0) {
+ return false;
+ }
+ final int a0 = e0.alignmentStart;
+ final int a1 = e1.alignmentStart;
+ final int b0 = a0 + e0.alignmentSpan;
+ final int b1 = a1 + e1.alignmentSpan;
+ return Math.abs(a0 + b0 - a1 - b1) < (e0.alignmentSpan + e1.alignmentSpan);
+ }
diff --git a/src/java/htsjdk/samtools/cram/CRAIIndex.java b/src/java/htsjdk/samtools/cram/CRAIIndex.java
new file mode 100644
index 0000000..0a3f567
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/CRAIIndex.java
@@ -0,0 +1,164 @@
+package htsjdk.samtools.cram;
+import htsjdk.samtools.CRAMIndexer;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.cram.structure.Slice;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Scanner;
+import java.util.zip.GZIPInputStream;
+ * A collection of static methods to read, write and convert CRAI index.
+ */
+public class CRAIIndex {
+ public static final String CRAI_INDEX_SUFFIX = ".crai";
+ public static void writeIndex(final OutputStream os, final List<CRAIEntry> index) throws IOException {
+ for (final CRAIEntry e : index) {
+ os.write(e.toString().getBytes());
+ os.write('\n');
+ }
+ }
+ public static List<CRAIEntry> readIndex(final InputStream is) throws CRAIIndexException {
+ final List<CRAIEntry> list = new LinkedList<CRAIEntry>();
+ final Scanner scanner = new Scanner(is);
+ try {
+ while (scanner.hasNextLine()) {
+ final String line = scanner.nextLine();
+ final CRAIEntry entry = CRAIEntry.fromCraiLine(line);
+ list.add(entry);
+ }
+ } finally {
+ scanner.close();
+ }
+ return list;
+ }
+ public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) {
+ final boolean whole = start < 1 || span < 1;
+ final CRAIEntry query = new CRAIEntry();
+ query.sequenceId = seqId;
+ query.alignmentStart = start < 1 ? 1 : start;
+ query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span;
+ query.containerStartOffset = Long.MAX_VALUE;
+ query.sliceOffset = Integer.MAX_VALUE;
+ query.sliceSize = Integer.MAX_VALUE;
+ final List<CRAIEntry> l = new ArrayList<CRAIEntry>();
+ for (final CRAIEntry e : list) {
+ if (e.sequenceId != seqId) {
+ continue;
+ }
+ if (whole || CRAIEntry.intersect(e, query)) {
+ l.add(e);
+ }
+ }
+ Collections.sort(l, CRAIEntry.byStart);
+ return l;
+ }
+ public static CRAIEntry getLeftmost(final List<CRAIEntry> list) {
+ if (list == null || list.isEmpty()) {
+ return null;
+ }
+ CRAIEntry left = list.get(0);
+ for (final CRAIEntry e : list) {
+ if (e.alignmentStart < left.alignmentStart) {
+ left = e;
+ }
+ }
+ return left;
+ }
+ /**
+ * Find index of the last aligned entry in the list. Assumes the index is sorted by coordinate and unmapped entries (with sequence id = -1) follow the mapped entries.
+ *
+ * @param list a list of CRAI entries
+ * @return integer index of the last entry with sequence id not equal to -1
+ */
+ public static int findLastAlignedEntry(final List<CRAIEntry> list) {
+ if (list.isEmpty()) {
+ return -1;
+ }
+ int low = 0;
+ int high = list.size() - 1;
+ while (low <= high) {
+ final int mid = (low + high) >>> 1;
+ final CRAIEntry midVal = list.get(mid);
+ if (midVal.sequenceId >= 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+ if (low >= list.size()) {
+ return list.size() - 1;
+ }
+ for (; low >= 0 && list.get(low).sequenceId == -1; low--) {
+ }
+ return low;
+ }
+ public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException {
+ return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
+ }
+ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) throws IOException, CRAIIndexException {
+ final List<CRAIEntry> full = CRAIIndex.readIndex(new GZIPInputStream(indexStream));
+ Collections.sort(full);
+ final SAMFileHeader header = new SAMFileHeader();
+ header.setSequenceDictionary(dictionary);
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final CRAMIndexer indexer = new CRAMIndexer(baos, header);
+ for (final CRAIEntry entry : full) {
+ final Slice slice = new Slice();
+ slice.containerOffset = entry.containerStartOffset;
+ slice.alignmentStart = entry.alignmentStart;
+ slice.alignmentSpan = entry.alignmentSpan;
+ slice.sequenceId = entry.sequenceId;
+ slice.nofRecords = entry.sliceSize;
+ slice.index = entry.sliceIndex;
+ slice.offset = entry.sliceOffset;
+ indexer.processAlignment(slice);
+ }
+ indexer.finish();
+ return new SeekableMemoryStream(baos.toByteArray(), null);
+ }
+ public static class CRAIIndexException extends RuntimeException {
+ public CRAIIndexException(final String s) {
+ super(s);
+ }
+ public CRAIIndexException(final NumberFormatException e) {
+ super(e);
+ }
+ }
diff --git a/src/java/htsjdk/samtools/cram/CRAMException.java b/src/java/htsjdk/samtools/cram/CRAMException.java
new file mode 100644
index 0000000..7d5fb72
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/CRAMException.java
@@ -0,0 +1,22 @@
+package htsjdk.samtools.cram;
+import htsjdk.samtools.SAMException;
+ * Created by edwardk on 8/13/15.
+ */
+public class CRAMException extends SAMException {
+ public CRAMException() {}
+ public CRAMException(final String s) {
+ super(s);
+ }
+ public CRAMException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+ public CRAMException(final Throwable throwable) {
+ super(throwable);
+ }
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/cram/build/ContainerParser.java b/src/java/htsjdk/samtools/cram/build/ContainerParser.java
index 5cef35e..002502e 100644
--- a/src/java/htsjdk/samtools/cram/build/ContainerParser.java
+++ b/src/java/htsjdk/samtools/cram/build/ContainerParser.java
@@ -20,6 +20,7 @@ package htsjdk.samtools.cram.build;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.reader.CramRecordReader;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory.DataReaderWithStats;
@@ -50,14 +51,14 @@ public class ContainerParser {
public List<CramCompressionRecord> getRecords(final Container container,
- ArrayList<CramCompressionRecord> records) throws IllegalArgumentException,
+ ArrayList<CramCompressionRecord> records, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
final long time1 = System.nanoTime();
if (records == null)
records = new ArrayList<CramCompressionRecord>(container.nofRecords);
for (final Slice slice : container.slices)
- records.addAll(getRecords(slice, container.header));
+ records.addAll(getRecords(slice, container.header, validationStringency));
final long time2 = System.nanoTime();
@@ -73,7 +74,7 @@ public class ContainerParser {
ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> records,
- final Slice slice, final CompressionHeader header) throws IllegalArgumentException,
+ final Slice slice, final CompressionHeader header, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
switch (slice.sequenceId) {
@@ -97,7 +98,7 @@ public class ContainerParser {
long time;
- final CramRecordReader reader = new CramRecordReader();
+ final CramRecordReader reader = new CramRecordReader(validationStringency);
dataReaderFactory.buildReader(reader, new DefaultBitInputStream(
new ByteArrayInputStream(slice.coreBlock.getRawContent())),
inputMap, header, slice.sequenceId);
@@ -150,8 +151,8 @@ public class ContainerParser {
return records;
- List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header)
+ List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header, ValidationStringency validationStringency)
throws IllegalArgumentException, IllegalAccessException {
- return getRecords(null, slice, header);
+ return getRecords(null, slice, header, validationStringency);
diff --git a/src/java/htsjdk/samtools/cram/build/CramIO.java b/src/java/htsjdk/samtools/cram/build/CramIO.java
index 6667ce5..4a08016 100644
--- a/src/java/htsjdk/samtools/cram/build/CramIO.java
+++ b/src/java/htsjdk/samtools/cram/build/CramIO.java
@@ -52,6 +52,7 @@ import java.util.Arrays;
* A collection of methods to open and close CRAM files.
public class CramIO {
+ public static final String CRAM_FILE_EXTENSION = ".cram";
* The 'zero-B' EOF marker as per CRAM specs v2.1. This is basically a serialized empty CRAM container with sequence id set to some
* number to spell out 'EOF' in hex.
@@ -101,13 +102,14 @@ public class CramIO {
private static boolean streamEndsWith(final SeekableStream seekableStream, final byte[] marker) throws IOException {
- final byte[] tail = new byte[ZERO_B_EOF_MARKER.length];
+ final byte[] tail = new byte[marker.length];
seekableStream.seek(seekableStream.length() - marker.length);
InputStreamUtils.readFully(seekableStream, tail, 0, tail.length);
+ if (Arrays.equals(tail, marker)) return true ;
// relaxing the ITF8 hanging bits:
- tail[8] |= 0xf0;
+ tail[8] = marker[8];
return Arrays.equals(tail, marker);
@@ -122,8 +124,8 @@ public class CramIO {
private static boolean checkEOF(final Version version, final SeekableStream seekableStream) throws IOException {
- if (version.compatibleWith(CramVersions.CRAM_v3)) return streamEndsWith(seekableStream, ZERO_B_EOF_MARKER);
- if (version.compatibleWith(CramVersions.CRAM_v2_1)) return streamEndsWith(seekableStream, ZERO_F_EOF_MARKER);
+ if (version.compatibleWith(CramVersions.CRAM_v3)) return streamEndsWith(seekableStream, ZERO_F_EOF_MARKER);
+ if (version.compatibleWith(CramVersions.CRAM_v2_1)) return streamEndsWith(seekableStream, ZERO_B_EOF_MARKER);
return false;
diff --git a/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java b/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
index 8a0b93d..f840a5f 100644
--- a/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
+++ b/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
@@ -62,6 +62,8 @@ public class Sam2CramRecordFactory {
private final Version version;
private byte[] refSNPs;
+ final private SAMFileHeader header;
private static final Log log = Log.getInstance(Sam2CramRecordFactory.class);
private final Map<String, Integer> readGroupMap = new HashMap<String, Integer>();
@@ -88,6 +90,7 @@ public class Sam2CramRecordFactory {
public Sam2CramRecordFactory(final byte[] refBases, final SAMFileHeader samFileHeader, final Version version) {
this.refBases = refBases;
this.version = version;
+ this.header = samFileHeader;
final List<SAMReadGroupRecord> readGroups = samFileHeader.getReadGroups();
for (int i = 0; i < readGroups.size(); i++) {
@@ -96,7 +99,17 @@ public class Sam2CramRecordFactory {
+ /**
+ * Create a CramCompressionRecord.
+ *
+ * @param record If the input record does not have an associated SAMFileHeader, it will be updated
+ * with the header used for the factory in order to allow reference indices to be resolved.
+ * @return CramCompressionRecord
+ */
public CramCompressionRecord createCramRecord(final SAMRecord record) {
+ if (null == record.getHeader()) {
+ record.setHeader(header);
+ }
final CramCompressionRecord cramRecord = new CramCompressionRecord();
if (record.getReadPairedFlag()) {
cramRecord.mateAlignmentStart = record.getMateAlignmentStart();
diff --git a/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java b/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java
index 01b8df2..7cbd98d 100644
--- a/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java
+++ b/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java
@@ -17,7 +17,9 @@
package htsjdk.samtools.cram.encoding.reader;
+import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore;
import htsjdk.samtools.cram.encoding.readfeatures.Bases;
import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
@@ -38,6 +40,11 @@ import java.util.LinkedList;
public class CramRecordReader extends AbstractReader {
private CramCompressionRecord prevRecord;
+ private ValidationStringency validationStringency;
+ public CramRecordReader(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
public void read(final CramCompressionRecord cramRecord) {
@@ -87,7 +94,7 @@ public class CramRecordReader extends AbstractReader {
for (int i = 0; i < ids.length; i++) {
final int id = ReadTag.name3BytesToInt(ids[i]);
final DataReader<byte[]> dataReader = tagValueCodecs.get(id);
- final ReadTag tag = new ReadTag(id, dataReader.readData());
+ final ReadTag tag = new ReadTag(id, dataReader.readData(), validationStringency);
cramRecord.tags[i] = tag;
@@ -186,10 +193,19 @@ public class CramRecordReader extends AbstractReader {
prevRecord = cramRecord;
- } catch (final Exception e) {
- if (prevRecord != null)
+ }
+ catch (final SAMFormatException e) {
+ if (prevRecord != null) {
+ System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter,
+ prevRecord.toString());
+ }
+ throw e;
+ }
+ catch (final Exception e) {
+ if (prevRecord != null) {
System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter,
+ }
throw new RuntimeException(e);
diff --git a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
index 4115a75..cf9748c 100644
--- a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
+++ b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
@@ -32,6 +32,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.lang.ref.WeakReference;
import java.net.URL;
+import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -50,8 +51,12 @@ public class ReferenceSource {
public ReferenceSource(final File file) {
- if (file != null)
- rsFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(file);
+ this(file == null ? null : file.toPath());
+ }
+ public ReferenceSource(final Path path) {
+ if (path != null)
+ rsFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(path);
public ReferenceSource(final ReferenceSequenceFile rsFile) {
diff --git a/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java b/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
index 0ba0dc4..b0b95d3 100644
--- a/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
+++ b/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
@@ -17,6 +17,7 @@
package htsjdk.samtools.cram.structure;
+import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.cram.common.MutableInt;
import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
import htsjdk.samtools.cram.encoding.readfeatures.InsertBase;
@@ -157,7 +158,10 @@ public class CramCompressionRecord {
void calculateAlignmentBoundaries() {
- if (readFeatures == null || readFeatures.isEmpty()) {
+ if (isSegmentUnmapped()) {
+ alignmentSpan = 0;
+ alignmentEnd = SAMRecord.NO_ALIGNMENT_START;
+ } else if (readFeatures == null || readFeatures.isEmpty()) {
alignmentSpan = readLength;
alignmentEnd = alignmentStart + alignmentSpan - 1;
} else {
diff --git a/src/java/htsjdk/samtools/cram/structure/ReadTag.java b/src/java/htsjdk/samtools/cram/structure/ReadTag.java
index dc5967a..791bf2c 100644
--- a/src/java/htsjdk/samtools/cram/structure/ReadTag.java
+++ b/src/java/htsjdk/samtools/cram/structure/ReadTag.java
@@ -19,9 +19,13 @@ package htsjdk.samtools.cram.structure;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFormatException;
+import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecord.SAMTagAndValue;
import htsjdk.samtools.SAMTagUtil;
+import htsjdk.samtools.SAMUtils;
+import htsjdk.samtools.SAMValidationError;
import htsjdk.samtools.TagValueAndUnsignedArrayFlag;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.StringUtil;
import java.nio.ByteBuffer;
@@ -50,10 +54,10 @@ public class ReadTag implements Comparable<ReadTag> {
private short code;
private byte index;
- public ReadTag(final int id, final byte[] dataAsByteArray) {
+ public ReadTag(final int id, final byte[] dataAsByteArray, ValidationStringency validationStringency) {
this.type = (char) (0xFF & id);
key = new String(new char[]{(char) ((id >> 16) & 0xFF), (char) ((id >> 8) & 0xFF)});
- value = restoreValueFromByteArray(type, dataAsByteArray);
+ value = restoreValueFromByteArray(type, dataAsByteArray, validationStringency);
keyType3Bytes = this.key + this.type;
keyType3BytesAsInt = id;
@@ -179,10 +183,10 @@ public class ReadTag implements Comparable<ReadTag> {
return writeSingleValue((byte) type, value, false);
- private static Object restoreValueFromByteArray(final char type, final byte[] array) {
+ private static Object restoreValueFromByteArray(final char type, final byte[] array, ValidationStringency validationStringency) {
final ByteBuffer buffer = ByteBuffer.wrap(array);
- return readSingleValue((byte) type, buffer);
+ return readSingleValue((byte) type, buffer, validationStringency);
// copied from net.sf.samtools.BinaryTagCodec 1.62:
@@ -216,8 +220,7 @@ public class ReadTag implements Comparable<ReadTag> {
// copied from net.sf.samtools.BinaryTagCodec:
static private char getIntegerType(final long val) {
if (val > MAX_UINT) {
- throw new IllegalArgumentException(
- "Integer attribute value too large to be encoded in BAM");
+ throw new IllegalArgumentException("Integer attribute value too large: "+val);
if (val > MAX_INT) {
return 'I';
@@ -288,7 +291,7 @@ public class ReadTag implements Comparable<ReadTag> {
buffer.position(buffer.position() - 4);
case 'i':
- buffer.putInt((Integer) value);
+ buffer.putInt(((Number) value).intValue());
case 's':
buffer.putShort(((Number) value).shortValue());
@@ -365,7 +368,7 @@ public class ReadTag implements Comparable<ReadTag> {
public static Object readSingleValue(final byte tagType,
- final ByteBuffer byteBuffer) {
+ final ByteBuffer byteBuffer, ValidationStringency validationStringency) {
switch (tagType) {
case 'Z':
return readNullTerminatedString(byteBuffer);
@@ -374,10 +377,15 @@ public class ReadTag implements Comparable<ReadTag> {
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
if (val <= Integer.MAX_VALUE) {
- return (int) val;
+ return (int)val;
+ }
+ // If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer,
+ // return it directly as a long
+ if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) {
+ SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
+ "Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency);
- throw new RuntimeException(
- "Tag value is too large to store as signed integer.");
+ return val;
case 'i':
return byteBuffer.getInt();
case 's':
diff --git a/src/java/htsjdk/samtools/filter/FilteringIterator.java b/src/java/htsjdk/samtools/filter/FilteringIterator.java
index df7dc35..00e489f 100644
--- a/src/java/htsjdk/samtools/filter/FilteringIterator.java
+++ b/src/java/htsjdk/samtools/filter/FilteringIterator.java
@@ -142,7 +142,7 @@ public class FilteringIterator implements CloseableIterator<SAMRecord> {
} else if (filterReadPairs && record.getReadPairedFlag() &&
record.getSecondOfPairFlag()) {
- // assume that we did a filterOut(first, second) and it passed the filter
+ // assume that we did a pass(first, second) and it passed the filter
return record;
} else if (!filter.filterOut(record)) {
return record;
diff --git a/src/java/htsjdk/samtools/filter/IntervalFilter.java b/src/java/htsjdk/samtools/filter/IntervalFilter.java
index e8647f9..ee3de6d 100644
--- a/src/java/htsjdk/samtools/filter/IntervalFilter.java
+++ b/src/java/htsjdk/samtools/filter/IntervalFilter.java
@@ -33,7 +33,7 @@ import java.util.List;
* Filter SAMRecords so that only those that overlap the given list of intervals.
- * It is required that the SAMRecords are passed in coordinate order
+ * It is required that the SAMRecords are passed in coordinate order, and have non-null SAMFileHeaders.
* $Id$
diff --git a/src/java/htsjdk/samtools/filter/OverclippedReadFilter.java b/src/java/htsjdk/samtools/filter/OverclippedReadFilter.java
new file mode 100644
index 0000000..2e8f43f
--- /dev/null
+++ b/src/java/htsjdk/samtools/filter/OverclippedReadFilter.java
@@ -0,0 +1,76 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools.filter;
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.CigarOperator;
+import htsjdk.samtools.SAMException;
+import htsjdk.samtools.SAMRecord;
+ * Filters out reads with very few unclipped bases, likely due to the read coming
+ * from a foreign organism, e.g. bacterial contamination.
+ *
+ * Based on GATK's OverclippedReadFilter.
+ */
+public class OverclippedReadFilter implements SamRecordFilter {
+ // if the number of unclipped bases is below this threshold, the read is considered overclipped
+ private final int unclippedBasesThreshold;
+ // if set to true, then reads with at least one clipped end will be filtered; if false, we require both ends to be clipped
+ private final boolean filterSingleEndClips;
+ public OverclippedReadFilter(final int unclippedBasesThreshold, final boolean filterSingleEndClips) {
+ if (unclippedBasesThreshold < 0) throw new SAMException("unclippedBasesThreshold must be non-negative");
+ this.unclippedBasesThreshold = unclippedBasesThreshold;
+ this.filterSingleEndClips = filterSingleEndClips;
+ }
+ @Override
+ public boolean filterOut(final SAMRecord record) {
+ int alignedLength = 0;
+ int softClipBlocks = 0;
+ int minSoftClipBlocks = filterSingleEndClips ? 1 : 2;
+ CigarOperator lastOperator = null;
+ for ( final CigarElement element : record.getCigar().getCigarElements() ) {
+ if ( element.getOperator() == CigarOperator.S ) {
+ //Treat consecutive S blocks as a single one
+ if(lastOperator != CigarOperator.S){
+ softClipBlocks += 1;
+ }
+ } else if ( element.getOperator().consumesReadBases() ) { // M, I, X, and EQ (S was already accounted for above)
+ alignedLength += element.getLength();
+ }
+ lastOperator = element.getOperator();
+ }
+ return(alignedLength < unclippedBasesThreshold && softClipBlocks >= minSoftClipBlocks);
+ }
+ @Override
+ public boolean filterOut(final SAMRecord first, final SAMRecord second) {
+ return filterOut(first) || filterOut(second);
+ }
diff --git a/src/java/htsjdk/samtools/metrics/MetricsFile.java b/src/java/htsjdk/samtools/metrics/MetricsFile.java
index 954aea1..f3f2216 100644
--- a/src/java/htsjdk/samtools/metrics/MetricsFile.java
+++ b/src/java/htsjdk/samtools/metrics/MetricsFile.java
@@ -28,6 +28,7 @@ import htsjdk.samtools.SAMException;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.*;
@@ -535,14 +536,12 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
* @param file to be read.
* @return list of beans from the file.
- public static List<? extends MetricBase> readBeans(final File file) {
- try {
- final MetricsFile<MetricBase, Comparable<?>> metricsFile = new MetricsFile<MetricBase, Comparable<?>>();
- metricsFile.read(new FileReader(file));
- return metricsFile.getMetrics();
- } catch (FileNotFoundException e) {
- throw new SAMException(e.getMessage(), e);
- }
+ public static <T extends MetricBase> List<T> readBeans(final File file) {
+ final MetricsFile<T, Comparable<?>> metricsFile = new MetricsFile<T, Comparable<?>>();
+ final Reader in = IOUtil.openFileForBufferedReading(file);
+ metricsFile.read(in);
+ CloserUtil.close(in);
+ return metricsFile.getMetrics();
@@ -573,4 +572,21 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
+ /**
+ * Compare the metrics and histograms in two files, ignoring headers.
+ */
+ public static boolean areMetricsAndHistogramsEqual(final File file1, final File file2) {
+ try {
+ final MetricsFile<MetricBase, Comparable<?>> mf1 = new MetricsFile<MetricBase, Comparable<?>>();
+ final MetricsFile<MetricBase, Comparable<?>> mf2 = new MetricsFile<MetricBase, Comparable<?>>();
+ mf1.read(new FileReader(file1));
+ mf2.read(new FileReader(file2));
+ return mf1.areMetricsEqual(mf2) && mf1.areHistogramsEqual(mf2);
+ } catch (FileNotFoundException e) {
+ throw new SAMException(e.getMessage(), e);
+ }
+ }
diff --git a/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
index 0f09ae6..e0c7dca 100644
--- a/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
@@ -32,14 +32,15 @@ import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.IOUtil;
import java.io.File;
-import java.io.FileInputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
* Provide core sequence dictionary functionality required by all fasta file readers.
* @author Matt Hanna
abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
- protected final File file;
+ private final Path path;
protected SAMSequenceDictionary sequenceDictionary;
@@ -47,15 +48,23 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
* @param file Fasta file to read. Also acts as a prefix for supporting files.
AbstractFastaSequenceFile(final File file) {
- this.file = file;
- final File dictionary = findSequenceDictionary(file);
+ this(file == null ? null : file.toPath());
+ }
+ /**
+ * Finds and loads the sequence file dictionary.
+ * @param path Fasta file to read. Also acts as a prefix for supporting files.
+ */
+ AbstractFastaSequenceFile(final Path path) {
+ this.path = path;
+ final Path dictionary = findSequenceDictionary(path);
if (dictionary != null) {
try {
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
- final BufferedLineReader reader = new BufferedLineReader(new FileInputStream(dictionary));
+ final BufferedLineReader reader = new BufferedLineReader(Files.newInputStream(dictionary));
final SAMFileHeader header = codec.decode(reader,
if (header.getSequenceDictionary() != null && header.getSequenceDictionary().size() > 0) {
@@ -70,33 +79,51 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
protected static File findSequenceDictionary(final File file) {
+ if (file == null) {
+ return null;
+ }
+ Path dictionary = findSequenceDictionary(file.toPath());
+ if (dictionary == null) {
+ return null;
+ }
+ return dictionary.toFile();
+ }
+ protected static Path findSequenceDictionary(final Path path) {
+ if (path == null) {
+ return null;
+ }
// Try and locate the dictionary
- String dictionaryName = file.getAbsolutePath();
- String dictionaryNameExt = file.getAbsolutePath();
+ Path dictionary = path.toAbsolutePath();
+ Path dictionaryExt = path.toAbsolutePath();
boolean fileTypeSupported = false;
for (final String extension : ReferenceSequenceFileFactory.FASTA_EXTENSIONS) {
- if (dictionaryName.endsWith(extension)) {
- dictionaryNameExt = new String(dictionaryName);
- dictionaryNameExt += IOUtil.DICT_FILE_EXTENSION;
- dictionaryName = dictionaryName.substring(0, dictionaryName.lastIndexOf(extension));
- dictionaryName += IOUtil.DICT_FILE_EXTENSION;
- fileTypeSupported = true;
- break;
+ String filename = dictionary.getFileName().toString();
+ if (filename.endsWith(extension)) {
+ dictionaryExt = dictionary.resolveSibling(filename + IOUtil
+ String filenameNoExt = filename.substring(0, filename.lastIndexOf(extension));
+ dictionary = dictionary.resolveSibling(filenameNoExt+ IOUtil.DICT_FILE_EXTENSION);
+ fileTypeSupported = true;
+ break;
if (!fileTypeSupported)
- throw new IllegalArgumentException("File is not a supported reference file type: " + file.getAbsolutePath());
+ throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath());
- final File dictionary = new File(dictionaryName);
- if (dictionary.exists())
+ if (Files.exists(dictionary))
return dictionary;
// try without removing the file extension
- final File dictionaryExt = new File(dictionaryNameExt);
- if (dictionaryExt.exists())
+ if (Files.exists(dictionaryExt))
return dictionaryExt;
else return null;
+ /** Returns the path to the reference file. */
+ protected Path getPath() {
+ return path;
+ }
* Returns the list of sequence records associated with the reference sequence if found
* otherwise null.
@@ -106,8 +133,13 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
/** Returns the full path to the reference file. */
+ protected String getAbsolutePath() {
+ return path.toAbsolutePath().toString();
+ }
+ /** Returns the full path to the reference file. */
public String toString() {
- return this.file.getAbsolutePath();
+ return getAbsolutePath();
/** default implementation -- override if index is supported */
@@ -120,7 +152,7 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
/** default implementation -- override if index is supported */
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
- throw new UnsupportedOperationException("Index does not appear to exist for" + file.getAbsolutePath() + ". samtools faidx can be used to create an index");
+ throw new UnsupportedOperationException("Index does not appear to exist for " + getAbsolutePath() + ". samtools faidx can be used to create an index");
diff --git a/src/java/htsjdk/samtools/reference/FastaSequenceFile.java b/src/java/htsjdk/samtools/reference/FastaSequenceFile.java
index f674521..72c0583 100644
--- a/src/java/htsjdk/samtools/reference/FastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/FastaSequenceFile.java
@@ -32,6 +32,7 @@ import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
+import java.nio.file.Path;
* Implementation of ReferenceSequenceFile for reading from FASTA files.
@@ -48,9 +49,14 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
/** Constructs a FastaSequenceFile that reads from the specified file. */
public FastaSequenceFile(final File file, final boolean truncateNamesAtWhitespace) {
- super(file);
+ this(file == null ? null : file.toPath(), truncateNamesAtWhitespace);
+ }
+ /** Constructs a FastaSequenceFile that reads from the specified file. */
+ public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) {
+ super(path);
this.truncateNamesAtWhitespace = truncateNamesAtWhitespace;
- this.in = new FastLineReader(IOUtil.openFileForReading(file));
+ this.in = new FastLineReader(IOUtil.openFileForReading(path));
@@ -80,7 +86,7 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
public void reset() {
this.sequenceIndex = -1;
- this.in = new FastLineReader(IOUtil.openFileForReading(file));
+ this.in = new FastLineReader(IOUtil.openFileForReading(getPath()));
@@ -91,7 +97,7 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
final byte b = in.getByte();
if (b != '>') {
- throw new SAMException("Format exception reading FASTA " + file + ". Expected > but saw chr(" +
+ throw new SAMException("Format exception reading FASTA " + getAbsolutePath() + ". Expected > but saw chr(" +
b + ") at start of sequence with index " + this.sequenceIndex);
final byte[] nameBuffer = new byte[4096];
@@ -102,11 +108,11 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
nameLength += in.readToEndOfOutputBufferOrEoln(nameBuffer, nameLength);
if (nameLength == nameBuffer.length && !in.atEoln()) {
- throw new SAMException("Sequence name too long in FASTA " + file);
+ throw new SAMException("Sequence name too long in FASTA " + getAbsolutePath());
} while (!in.atEoln());
if (nameLength == 0) {
- throw new SAMException("Missing sequence name in FASTA " + file);
+ throw new SAMException("Missing sequence name in FASTA " + getAbsolutePath());
String name = StringUtil.bytesToString(nameBuffer, 0, nameLength).trim();
if (truncateNamesAtWhitespace) {
diff --git a/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java b/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java
index d75f65e..e314fcc 100644
--- a/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java
+++ b/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java
@@ -30,6 +30,8 @@ import htsjdk.samtools.util.IOUtil;
import java.io.File;
import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.file.Path;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -51,6 +53,15 @@ public class FastaSequenceIndex implements Iterable<FastaSequenceIndexEntry> {
* @throws FileNotFoundException if the index file cannot be found.
public FastaSequenceIndex( File indexFile ) {
+ this(indexFile == null ? null : indexFile.toPath());
+ }
+ /**
+ * Build a sequence index from the specified file.
+ * @param indexFile File to open.
+ * @throws FileNotFoundException if the index file cannot be found.
+ */
+ public FastaSequenceIndex( Path indexFile ) {
@@ -111,12 +122,11 @@ public class FastaSequenceIndex implements Iterable<FastaSequenceIndexEntry> {
* Parse the contents of an index file, caching the results internally.
* @param indexFile File to parse.
- * @throws FileNotFoundException Thrown if file could not be opened.
+ * @throws IOException Thrown if file could not be opened.
- private void parseIndexFile(File indexFile) {
+ private void parseIndexFile(Path indexFile) {
try {
Scanner scanner = new Scanner(indexFile);
int sequenceIndex = 0;
while( scanner.hasNext() ) {
// Tokenize and validate the index line.
@@ -142,8 +152,9 @@ public class FastaSequenceIndex implements Iterable<FastaSequenceIndexEntry> {
add(new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine, sequenceIndex++) );
- } catch (FileNotFoundException e) {
- throw new SAMException("Fasta index file should be found but is not: " + indexFile, e);
+ } catch (IOException e) {
+ throw new SAMException("Fasta index file could not be opened: " + indexFile, e);
diff --git a/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
index bb15000..b341d6f 100644
--- a/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
@@ -37,6 +37,9 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Iterator;
@@ -47,7 +50,7 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
* The interface facilitating direct access to the fasta.
- private final FileChannel channel;
+ private final SeekableByteChannel channel;
* A representation of the sequence index, stored alongside the fasta in a .fasta.fai file.
@@ -66,33 +69,48 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
public IndexedFastaSequenceFile(final File file, final FastaSequenceIndex index) {
- super(file);
- if (index == null) throw new IllegalArgumentException("Null index for fasta " + file);
+ this(file == null ? null : file.toPath(), index);
+ }
+ /**
+ * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
+ * @param file The file to open.
+ * @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
+ */
+ public IndexedFastaSequenceFile(final File file) throws FileNotFoundException {
+ this(file, new FastaSequenceIndex((findRequiredFastaIndexFile(file))));
+ }
+ /**
+ * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
+ * @param path The file to open.
+ * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
+ */
+ public IndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
+ super(path);
+ if (index == null) throw new IllegalArgumentException("Null index for fasta " + path);
this.index = index;
- IOUtil.assertFileIsReadable(file);
- final FileInputStream in;
+ IOUtil.assertFileIsReadable(path);
try {
- in = new FileInputStream(file);
- } catch (FileNotFoundException e) {
- throw new SAMException("Fasta file should be readable but is not: " + file, e);
+ this.channel = Files.newByteChannel(path);
+ } catch (IOException e) {
+ throw new SAMException("Fasta file should be readable but is not: " + path, e);
- channel = in.getChannel();
if(getSequenceDictionary() != null)
- sanityCheckDictionaryAgainstIndex(file.getAbsolutePath(),sequenceDictionary,index);
+ sanityCheckDictionaryAgainstIndex(path.toAbsolutePath().toString(),sequenceDictionary,index);
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
- * @param file The file to open.
+ * @param path The file to open.
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
- public IndexedFastaSequenceFile(final File file) throws FileNotFoundException {
- this(file, new FastaSequenceIndex((findRequiredFastaIndexFile(file))));
+ public IndexedFastaSequenceFile(final Path path) throws FileNotFoundException {
+ this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path))));
public boolean isIndexed() {return true;}
private static File findFastaIndex(File fastaFile) {
@@ -116,6 +134,27 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
findFastaIndex(fastaFile) != null);
+ private static Path findFastaIndex(Path fastaFile) {
+ Path indexFile = getFastaIndexFileName(fastaFile);
+ if (!Files.exists(indexFile)) return null;
+ return indexFile;
+ }
+ private static Path getFastaIndexFileName(Path fastaFile) {
+ return fastaFile.resolveSibling(fastaFile.getFileName() + ".fai");
+ }
+ private static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException {
+ Path ret = findFastaIndex(fastaFile);
+ if (ret == null) throw new FileNotFoundException(getFastaIndexFileName(fastaFile) + " not found.");
+ return ret;
+ }
+ public static boolean canCreateIndexedFastaReader(final Path fastaFile) {
+ return (Files.exists(fastaFile) &&
+ findFastaIndex(fastaFile) != null);
+ }
* Do some basic checking to make sure the dictionary and the index match.
* @param fastaFile Used for error reporting only.
@@ -202,10 +241,10 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0);
try {
- startOffset += channel.read(channelBuffer,indexEntry.getLocation()+startOffset);
+ startOffset += readFromPosition(channel, channelBuffer, indexEntry.getLocation()+startOffset);
catch(IOException ex) {
- throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + file);
+ throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getAbsolutePath(), ex);
// Reset the buffer for outbound transfers.
@@ -235,6 +274,29 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
+ * Reads a sequence of bytes from this channel into the given buffer,
+ * starting at the given file position.
+ * @param channel the channel to read from
+ * @param buffer the buffer into which bytes are to be transferred
+ * @param position the position to start reading at
+ * @return the number of bytes read
+ * @throws IOException if an I/O error occurs while reading
+ */
+ private static int readFromPosition(final SeekableByteChannel channel, final ByteBuffer buffer, long position) throws IOException {
+ if (channel instanceof FileChannel) { // special case to take advantage of native code path
+ return ((FileChannel) channel).read(buffer,position);
+ } else {
+ long oldPos = channel.position();
+ try {
+ channel.position(position);
+ return channel.read(buffer);
+ } finally {
+ channel.position(oldPos);
+ }
+ }
+ }
+ /**
* Gets the next sequence if available, or null if not present.
* @return next sequence if available, or null if not present.
@@ -256,7 +318,7 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
* @return String representation of the file.
public String toString() {
- return this.file.getAbsolutePath();
+ return getAbsolutePath();
diff --git a/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
index 6441140..5978072 100644
--- a/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
+++ b/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
@@ -26,6 +26,7 @@ package htsjdk.samtools.reference;
import java.io.File;
import java.io.FileNotFoundException;
+import java.nio.file.Path;
import java.util.HashSet;
import java.util.Set;
@@ -78,24 +79,58 @@ public class ReferenceSequenceFileFactory {
* @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) {
- final String name = file.getName();
+ return getReferenceSequenceFile(file.toPath(), truncateNamesAtWhitespace, preferIndexed);
+ }
+ /**
+ * Attempts to determine the type of the reference file and return an instance
+ * of ReferenceSequenceFile that is appropriate to read it. Sequence names
+ * will be truncated at first whitespace, if any.
+ *
+ * @param path the reference sequence file on disk
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path) {
+ return getReferenceSequenceFile(path, true);
+ }
+ /**
+ * Attempts to determine the type of the reference file and return an instance
+ * of ReferenceSequenceFile that is appropriate to read it.
+ *
+ * @param path the reference sequence file on disk
+ * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) {
+ return getReferenceSequenceFile(path, truncateNamesAtWhitespace, true);
+ }
+ /**
+ * Attempts to determine the type of the reference file and return an instance
+ * of ReferenceSequenceFile that is appropriate to read it.
+ *
+ * @param path the reference sequence file path
+ * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
+ * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) {
+ final String name = path.getFileName().toString();
for (final String ext : FASTA_EXTENSIONS) {
if (name.endsWith(ext)) {
// Using faidx requires truncateNamesAtWhitespace
- if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(file)) {
+ if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(path)) {
try {
- return new IndexedFastaSequenceFile(file);
+ return new IndexedFastaSequenceFile(path);
catch (final FileNotFoundException e) {
throw new IllegalStateException("Should never happen, because existence of files has been checked.", e);
else {
- return new FastaSequenceFile(file, truncateNamesAtWhitespace);
+ return new FastaSequenceFile(path, truncateNamesAtWhitespace);
- throw new IllegalArgumentException("File is not a supported reference file type: " + file.getAbsolutePath());
+ throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath());
diff --git a/src/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java b/src/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java
new file mode 100644
index 0000000..a6efc91
--- /dev/null
+++ b/src/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java
@@ -0,0 +1,64 @@
+package htsjdk.samtools.seekablestream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+public class SeekableMemoryStream extends SeekableStream {
+ private final ByteBuffer buf;
+ private final String source;
+ public SeekableMemoryStream(final byte[] data, final String source) {
+ this.buf = ByteBuffer.wrap(data);
+ this.source = source;
+ }
+ @Override
+ public void close() throws IOException {
+ buf.clear();
+ }
+ @Override
+ public boolean eof() throws IOException {
+ return buf.position() == buf.limit();
+ }
+ @Override
+ public String getSource() {
+ return source;
+ }
+ @Override
+ public long length() {
+ return buf.array().length - buf.arrayOffset();
+ }
+ @Override
+ public int read(final byte[] buffer, final int offset, final int length) throws IOException {
+ int availableLength = Math.min(length, buf.remaining());
+ if (availableLength < 1) {
+ return -1;
+ }
+ buf.get(buffer, offset, availableLength);
+ return availableLength;
+ }
+ @Override
+ public void seek(final long position) throws IOException {
+ buf.position((int) position);
+ }
+ @Override
+ public int read() throws IOException {
+ if (buf.position() < buf.limit()) {
+ return buf.get();
+ } else {
+ return -1;
+ }
+ }
+ @Override
+ public long position() throws IOException {
+ return buf.position();
+ }
diff --git a/src/java/htsjdk/samtools/sra/ReferenceCache.java b/src/java/htsjdk/samtools/sra/ReferenceCache.java
new file mode 100644
index 0000000..de6e27b
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/ReferenceCache.java
@@ -0,0 +1,79 @@
+package htsjdk.samtools.sra;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+import java.util.ArrayList;
+import java.util.List;
+ * That is a thread-safe wrapper for a list of cache Reference objects.
+ * Those objects can be used from different threads without issues, however to load and save a Reference object, we
+ * need to acquire a lock.
+ *
+ * Created by andrii.nikitiuk on 10/28/15.
+ */
+public class ReferenceCache {
+ private ReadCollection run;
+ private SAMFileHeader virtualHeader;
+ private final List<Reference> cachedReferences;
+ public ReferenceCache(ReadCollection run, SAMFileHeader virtualHeader) {
+ this.run = run;
+ this.virtualHeader = virtualHeader;
+ cachedReferences = initializeReferenceCache();
+ }
+ /**
+ * This method returns Reference objects by reference indexes in SAM header
+ * Those obejcts can be used from different threads
+ *
+ * This method maintains thread safety, so that if Reference object is set already, it can be easily returned
+ * without locks. However, if Reference object is null, we need to acquire a lock, load the object and save it in
+ * array.
+ *
+ * @param referenceIndex reference index in
+ * @return a Reference object
+ */
+ public Reference get(int referenceIndex) {
+ Reference reference = cachedReferences.get(referenceIndex);
+ if (reference != null) {
+ return reference;
+ }
+ // maintain thread safety
+ synchronized (this) {
+ reference = cachedReferences.get(referenceIndex);
+ if (reference == null) {
+ try {
+ reference = run.getReference(virtualHeader.getSequence(referenceIndex).getSequenceName());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ cachedReferences.set(referenceIndex, reference);
+ }
+ }
+ return reference;
+ }
+ private List<Reference> initializeReferenceCache() {
+ if (virtualHeader == null) {
+ throw new RuntimeException("Cannot cache references - header is uninitialized");
+ }
+ SAMSequenceDictionary sequenceDictionary = virtualHeader.getSequenceDictionary();
+ List<Reference> references = new ArrayList<Reference>(sequenceDictionary.size());
+ for (SAMSequenceRecord sequence : sequenceDictionary.getSequences()) {
+ references.add(null);
+ }
+ return references;
+ }
diff --git a/src/java/htsjdk/samtools/sra/SRAAccession.java b/src/java/htsjdk/samtools/sra/SRAAccession.java
new file mode 100644
index 0000000..6f39eca
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAAccession.java
@@ -0,0 +1,108 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import htsjdk.samtools.util.Log;
+import gov.nih.nlm.ncbi.ngs.NGS;
+import java.io.Serializable;
+ * Describes a single SRA accession
+ * Also provides app string functionality and allows to check if working SRA is supported on the running platform
+ */
+public class SRAAccession implements Serializable {
+ private static final Log log = Log.getInstance(SRAAccession.class);
+ private static Boolean isSupportedCached = null;
+ private static String appVersionString = null;
+ private final static String defaultAppVersionString = "[unknown software]";
+ private final static String htsJdkVersionString = "HTSJDK-NGS";
+ private String acc;
+ /**
+ * Sets an app version string which will let SRA know which software uses it.
+ * @param appVersionString a string that describes running application
+ */
+ public static void setAppVersionString(String appVersionString) {
+ SRAAccession.appVersionString = appVersionString;
+ }
+ /**
+ * Returns true if SRA is supported on the running platform
+ * @return true if SRA engine was successfully loaded and operational, false otherwise
+ */
+ public static boolean isSupported() {
+ if (isSupportedCached == null) {
+ log.debug("Checking if SRA module is supported in that environment");
+ isSupportedCached = NGS.isSupported();
+ if (!isSupportedCached) {
+ log.info("SRA is not supported. Will not be able to read from SRA");
+ } else {
+ NGS.setAppVersionString(getFullVersionString());
+ }
+ }
+ return isSupportedCached;
+ }
+ /**
+ * @param acc accession
+ * @return true if a string is a valid SRA accession
+ */
+ public static boolean isValid(String acc) {
+ if (!isSupported()) {
+ return false;
+ }
+ return NGS.isValid(acc);
+ }
+ /**
+ * @param acc accession
+ */
+ public SRAAccession(String acc) {
+ this.acc = acc;
+ }
+ public String toString() {
+ return acc;
+ }
+ /**
+ * @return true if contained string is an SRA accession
+ */
+ public boolean isValid() {
+ return SRAAccession.isValid(acc);
+ }
+ private static String getFullVersionString() {
+ String versionString = appVersionString == null ? defaultAppVersionString : appVersionString;
+ versionString += " through " + htsJdkVersionString;
+ return versionString;
+ }
diff --git a/src/java/htsjdk/samtools/sra/SRAAlignmentIterator.java b/src/java/htsjdk/samtools/sra/SRAAlignmentIterator.java
new file mode 100644
index 0000000..2ebade1
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAAlignmentIterator.java
@@ -0,0 +1,194 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import htsjdk.samtools.Chunk;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SRAIterator;
+import htsjdk.samtools.ValidationStringency;
+import ngs.Alignment;
+import ngs.AlignmentIterator;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+import ngs.ReferenceIterator;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+ * Iterator for aligned reads.
+ * Is used from SRAIterator.
+ * Created by andrii.nikitiuk on 9/3/15.
+ */
+public class SRAAlignmentIterator implements Iterator<SAMRecord> {
+ private ValidationStringency validationStringency;
+ private SRAAccession accession;
+ private ReadCollection run;
+ private SAMFileHeader header;
+ private ReferenceCache cachedReferences;
+ private List<Long> referencesLengths;
+ private Iterator<Chunk> referencesChunksIterator;
+ private int currentReference = -1;
+ private boolean hasMoreReferences = true;
+ private AlignmentIterator alignedIterator;
+ private Boolean hasMoreAlignments = false;
+ private SRALazyRecord lastRecord;
+ /**
+ * @param run opened read collection
+ * @param header sam header
+ * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ * @param chunk used to determine which alignments the iterator should return
+ */
+ public SRAAlignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences,
+ final SRAIterator.RecordRangeInfo recordRangeInfo, final Chunk chunk) {
+ this.accession = accession;
+ this.run = run;
+ this.header = header;
+ this.cachedReferences = cachedReferences;
+ this.referencesLengths = recordRangeInfo.getReferenceLengthsAligned();
+ referencesChunksIterator = getReferenceChunks(chunk).iterator();
+ try {
+ nextReference();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ @Override
+ public boolean hasNext() {
+ // check aligned
+ if (hasMoreAlignments == null) {
+ try {
+ lastRecord.detachFromIterator();
+ hasMoreAlignments = alignedIterator.nextAlignment();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ while (!hasMoreAlignments && hasMoreReferences) {
+ nextReference();
+ }
+ return hasMoreAlignments;
+ }
+ @Override
+ public SAMRecord next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more alignments are available");
+ }
+ return nextAlignment();
+ }
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Removal of records not implemented.");
+ }
+ public void setValidationStringency(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
+ private SAMRecord nextAlignment() {
+ try {
+ lastRecord = new SRALazyRecord(header, accession, run, alignedIterator, alignedIterator.getReadId(), alignedIterator.getAlignmentId());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ if (validationStringency != null) {
+ lastRecord.setValidationStringency(validationStringency);
+ }
+ hasMoreAlignments = null;
+ return lastRecord;
+ }
+ private void nextReference() {
+ if (!hasMoreReferences) {
+ throw new NoSuchElementException("Cannot get next reference - already at last one");
+ }
+ try {
+ hasMoreReferences = referencesChunksIterator.hasNext();
+ if (!hasMoreReferences) {
+ hasMoreAlignments = false;
+ return;
+ }
+ currentReference++;
+ Chunk refChunk = referencesChunksIterator.next();
+ if (refChunk == null) {
+ hasMoreAlignments = false;
+ return;
+ }
+ Reference reference = cachedReferences.get(currentReference);
+ alignedIterator = reference.getFilteredAlignmentSlice(
+ refChunk.getChunkStart(), refChunk.getChunkEnd() - refChunk.getChunkStart(),
+ Alignment.all, Alignment.startWithinSlice | Alignment.passDuplicates | Alignment.passFailed, 0);
+ hasMoreAlignments = alignedIterator.nextAlignment();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private List<Chunk> getReferenceChunks(final Chunk chunk) {
+ List<Chunk> referencesChunks = new ArrayList<Chunk>();
+ long refOffset = 0;
+ for (Long refLen : referencesLengths) {
+ if (chunk.getChunkStart() - refOffset >= refLen || chunk.getChunkEnd() - refOffset <= 0) {
+ referencesChunks.add(null);
+ } else {
+ long refChunkStart = Math.max(chunk.getChunkStart() - refOffset, 0);
+ long refChunkEnd = Math.min(chunk.getChunkEnd() - refOffset, refLen);
+ referencesChunks.add(new Chunk(refChunkStart, refChunkEnd));
+ }
+ refOffset += refLen;
+ }
+ return referencesChunks;
+ }
diff --git a/src/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java b/src/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java
new file mode 100644
index 0000000..567bce0
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java
@@ -0,0 +1,121 @@
+package htsjdk.samtools.sra;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.sra.SRAAccession;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+import ngs.ReferenceIterator;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+ * Allows reading Reference data from SRA
+ */
+public class SRAIndexedSequenceFile implements ReferenceSequenceFile {
+ private SRAAccession acc;
+ private ReadCollection run;
+ private ThreadLocal<HashMap<Integer, Reference>> cachedReferences = new ThreadLocal<HashMap<Integer, Reference>>();
+ private Iterator<SAMSequenceRecord> sequenceRecordIterator;
+ protected SAMSequenceDictionary sequenceDictionary;
+ /**
+ * @param acc accession
+ */
+ public SRAIndexedSequenceFile(SRAAccession acc) {
+ this.acc = acc;
+ if (!acc.isValid()) {
+ throw new RuntimeException("Passed an invalid SRA accession into SRA reader: " + acc);
+ }
+ try {
+ run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString());
+ sequenceDictionary = loadSequenceDictionary();
+ } catch (final ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ reset();
+ }
+ @Override
+ public SAMSequenceDictionary getSequenceDictionary() {
+ return sequenceDictionary;
+ }
+ @Override
+ public ReferenceSequence nextSequence() {
+ SAMSequenceRecord sequence = sequenceRecordIterator.next();
+ return getSubsequenceAt(sequence.getSequenceName(), 1L, sequence.getSequenceLength());
+ }
+ @Override
+ public void reset() {
+ sequenceRecordIterator = sequenceDictionary.getSequences().iterator();
+ }
+ @Override
+ public boolean isIndexed() {
+ return true;
+ }
+ @Override
+ public ReferenceSequence getSequence(String contig) {
+ return getSubsequenceAt(contig, 1L, sequenceDictionary.getSequence(contig).getSequenceLength());
+ }
+ @Override
+ public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) {
+ SAMSequenceRecord sequence = sequenceDictionary.getSequence(contig);
+ int referenceIndex = sequence.getSequenceIndex();
+ byte[] bases;
+ try {
+ HashMap<Integer, Reference> localRefs = cachedReferences.get();
+ if (localRefs == null) {
+ localRefs = new HashMap<Integer, Reference>();
+ cachedReferences.set(localRefs);
+ }
+ Reference reference = localRefs.get(referenceIndex);
+ if (reference == null) {
+ reference = run.getReference(contig);
+ localRefs.put(referenceIndex, reference);
+ }
+ bases = reference.getReferenceBases(start - 1, stop - (start - 1)).getBytes();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return new ReferenceSequence(contig, referenceIndex, bases);
+ }
+ @Override
+ public void close() throws IOException {
+ }
+ protected SAMSequenceDictionary loadSequenceDictionary() throws ErrorMsg {
+ SAMSequenceDictionary dict = new SAMSequenceDictionary();
+ ReferenceIterator itRef = run.getReferences();
+ while (itRef.nextReference()) {
+ dict.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength()));
+ }
+ return dict;
+ }
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/sra/SRALazyRecord.java b/src/java/htsjdk/samtools/sra/SRALazyRecord.java
new file mode 100644
index 0000000..4391857
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRALazyRecord.java
@@ -0,0 +1,1056 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import gov.nih.nlm.ncbi.ngs.NGS;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMTagUtil;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.SAMBinaryTagAndValue;
+import htsjdk.samtools.SAMUtils;
+import htsjdk.samtools.SAMValidationError;
+import htsjdk.samtools.util.Log;
+import ngs.ReadCollection;
+import ngs.AlignmentIterator;
+import ngs.Alignment;
+import ngs.ReadIterator;
+import ngs.Read;
+import ngs.Fragment;
+import ngs.ErrorMsg;
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+ * Extends SAMRecord so that any of the fields will be loaded only when needed.
+ * Since SRA is a column oriented database, it is very inefficient to load all the fields at once.
+ * However, loading only set of actually needed fields will be even faster than in row oriented databases.
+ *
+ * Because of that we are providing lazy loading of fields, flags and attributes.
+ *
+ * Created by andrii.nikitiuk on 8/25/15.
+ */
+public class SRALazyRecord extends SAMRecord {
+ private static final Log log = Log.getInstance(SRALazyRecord.class);
+ private SRAAccession accession;
+ private boolean isAligned;
+ private transient ReadCollection run;
+ private transient Alignment alignmentIterator;
+ private transient Read unalignmentIterator;
+ private String sraReadId;
+ private String sraAlignmentId;
+ private int unalignedReadFragmentIndex = -1;
+ private Set<LazyField> initializedFields = EnumSet.noneOf(LazyField.class);
+ private Set<LazyFlag> initializedFlags = EnumSet.noneOf(LazyFlag.class);
+ private Set<LazyAttribute> initializedAttributes = EnumSet.noneOf(LazyAttribute.class);
+ private enum LazyField {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getAlignmentStart();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getMappingQuality();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getReferenceName();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getCigarString();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getReadBases();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getBaseQualities();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getMateAlignmentStart();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getMateReferenceName();
+ }
+ },
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getInferredInsertSize();
+ }
+ };
+ public abstract void loadValue(SRALazyRecord self);
+ }
+ private enum LazyFlag {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getReadNegativeStrandFlag();
+ }
+ },
+ READ_PAIRED(true) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getReadPairedFlag();
+ }
+ },
+ PROPER_PAIR(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getProperPairFlag();
+ }
+ },
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getNotPrimaryAlignmentFlag();
+ }
+ },
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getMateNegativeStrandFlag();
+ }
+ },
+ MATE_UNMAPPED(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getMateUnmappedFlag();
+ }
+ },
+ FIRST_OF_PAIR(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getFirstOfPairFlag();
+ }
+ },
+ SECOND_OF_PAIR(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getSecondOfPairFlag();
+ }
+ };
+ private final boolean canCallOnNotPaired;
+ LazyFlag(final boolean canCallOnNotPaired) {
+ this.canCallOnNotPaired = canCallOnNotPaired;
+ }
+ public boolean canCallOnNotPaired() { return canCallOnNotPaired; }
+ public abstract boolean getFlag(SRALazyRecord self);
+ }
+ private enum LazyAttribute {
+ RG {
+ @Override
+ public String getAttribute(SRALazyRecord self) {
+ return self.getAttributeGroupNameImpl();
+ }
+ };
+ public abstract String getAttribute(SRALazyRecord self);
+ }
+ private static Map<Short, LazyAttribute> lazyAttributeTags;
+ static
+ {
+ lazyAttributeTags = new HashMap<Short, LazyAttribute>();
+ lazyAttributeTags.put(SAMTagUtil.getSingleton().RG, LazyAttribute.RG);
+ }
+ public SRALazyRecord(final SAMFileHeader header, SRAAccession accession, ReadCollection run, AlignmentIterator alignmentIterator, String readId, String alignmentId) {
+ this(header, accession, readId, alignmentId);
+ this.run = run;
+ this.alignmentIterator = alignmentIterator;
+ }
+ public SRALazyRecord(final SAMFileHeader header, SRAAccession accession, ReadCollection run, ReadIterator unalignmentIterator, String readId, int unalignedReadFragmentIndex) {
+ this(header, accession, readId, unalignedReadFragmentIndex);
+ this.run = run;
+ this.unalignmentIterator = unalignmentIterator;
+ }
+ protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, String alignmentId) {
+ this(header, accession, readId, true);
+ this.sraAlignmentId = alignmentId;
+ }
+ protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, int unalignedReadFragmentIndex) {
+ this(header, accession, readId, false);
+ this.unalignedReadFragmentIndex = unalignedReadFragmentIndex;
+ }
+ private SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, boolean isAligned) {
+ super(header);
+ this.accession = accession;
+ this.isAligned = isAligned;
+ this.sraReadId = readId;
+ setReadName(readId);
+ setReadUnmappedFlag(!isAligned);
+ }
+ /**
+ * Is being called when original NGS iterator is being moved to the next object.
+ * Later, if any of uninitialized fields is requested, either Read object or Alignment has to be retrieved from
+ * ReadCollection
+ */
+ public void detachFromIterator() {
+ alignmentIterator = null;
+ unalignmentIterator = null;
+ }
+ // ===== fields =====
+ @Override
+ public int getAlignmentStart() {
+ if (!initializedFields.contains(LazyField.ALIGNMENT_START)) {
+ setAlignmentStart(getAlignmentStartImpl());
+ }
+ return super.getAlignmentStart();
+ }
+ @Override
+ public void setAlignmentStart(final int value) {
+ if (!initializedFields.contains(LazyField.ALIGNMENT_START)) {
+ initializedFields.add(LazyField.ALIGNMENT_START);
+ }
+ super.setAlignmentStart(value);
+ }
+ @Override
+ public int getMappingQuality() {
+ if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) {
+ setMappingQuality(getMappingQualityImpl());
+ }
+ return super.getMappingQuality();
+ }
+ @Override
+ public void setMappingQuality(final int value) {
+ if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) {
+ initializedFields.add(LazyField.MAPPING_QUALITY);
+ }
+ super.setMappingQuality(value);
+ }
+ @Override
+ public String getReferenceName() {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ setReferenceName(getReferenceNameImpl());
+ }
+ return super.getReferenceName();
+ }
+ @Override
+ public void setReferenceName(final String value) {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ initializedFields.add(LazyField.REFERENCE_NAME);
+ }
+ super.setReferenceName(value);
+ }
+ @Override
+ public Integer getReferenceIndex() {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ setReferenceName(getReferenceNameImpl());
+ }
+ return super.getReferenceIndex();
+ }
+ @Override
+ public void setReferenceIndex(final int value) {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ initializedFields.add(LazyField.REFERENCE_NAME);
+ }
+ super.setReferenceIndex(value);
+ }
+ @Override
+ public String getCigarString() {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ setCigarString(getCigarStringImpl());
+ }
+ return super.getCigarString();
+ }
+ @Override
+ public void setCigarString(final String value) {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ initializedFields.add(LazyField.CIGAR_STRING);
+ }
+ super.setCigarString(value);
+ }
+ @Override
+ public Cigar getCigar() {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ setCigarString(getCigarStringImpl());
+ }
+ return super.getCigar();
+ }
+ @Override
+ public void setCigar(final Cigar value) {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ initializedFields.add(LazyField.CIGAR_STRING);
+ }
+ super.setCigar(value);
+ }
+ @Override
+ public byte[] getReadBases() {
+ if (!initializedFields.contains(LazyField.BASES)) {
+ setReadBases(getReadBasesImpl());
+ }
+ return super.getReadBases();
+ }
+ @Override
+ public void setReadBases(final byte[] value) {
+ if (!initializedFields.contains(LazyField.BASES)) {
+ initializedFields.add(LazyField.BASES);
+ }
+ super.setReadBases(value);
+ }
+ @Override
+ public byte[] getBaseQualities() {
+ if (!initializedFields.contains(LazyField.QUALS)) {
+ setBaseQualities(getBaseQualitiesImpl());
+ }
+ return super.getBaseQualities();
+ }
+ @Override
+ public void setBaseQualities(final byte[] value) {
+ if (!initializedFields.contains(LazyField.QUALS)) {
+ initializedFields.add(LazyField.QUALS);
+ }
+ super.setBaseQualities(value);
+ }
+ @Override
+ public int getMateAlignmentStart() {
+ if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) {
+ setMateAlignmentStart(getMateAlignmentStartImpl());
+ }
+ return super.getMateAlignmentStart();
+ }
+ @Override
+ public void setMateAlignmentStart(final int value) {
+ if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) {
+ initializedFields.add(LazyField.MATE_ALIGNMENT_START);
+ }
+ super.setMateAlignmentStart(value);
+ }
+ @Override
+ public String getMateReferenceName() {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ setMateReferenceName(getMateReferenceNameImpl());
+ }
+ return super.getMateReferenceName();
+ }
+ @Override
+ public void setMateReferenceName(final String value) {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ initializedFields.add(LazyField.MATE_REFERENCE_NAME);
+ }
+ super.setMateReferenceName(value);
+ }
+ @Override
+ public Integer getMateReferenceIndex() {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ setMateReferenceName(getMateReferenceNameImpl());
+ }
+ return super.getMateReferenceIndex();
+ }
+ @Override
+ public void setMateReferenceIndex(final int value) {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ initializedFields.add(LazyField.MATE_REFERENCE_NAME);
+ }
+ super.setMateReferenceIndex(value);
+ }
+ @Override
+ public int getInferredInsertSize() {
+ if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) {
+ setInferredInsertSize(getInferredInsertSizeImpl());
+ }
+ return super.getInferredInsertSize();
+ }
+ @Override
+ public void setInferredInsertSize(final int value) {
+ if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) {
+ initializedFields.add(LazyField.INFERRED_INSERT_SIZE);
+ }
+ super.setInferredInsertSize(value);
+ }
+ // ===== flags =====
+ @Override
+ public int getFlags() {
+ for (LazyFlag flag : LazyFlag.values()) {
+ if (initializedFlags.contains(flag)) {
+ continue;
+ }
+ if (flag.canCallOnNotPaired() || getReadPairedFlag()) {
+ flag.getFlag(this);
+ }
+ }
+ return super.getFlags();
+ }
+ @Override
+ public void setFlags(final int value) {
+ for (LazyFlag flag : LazyFlag.values()) {
+ if (!initializedFlags.contains(flag)) {
+ initializedFlags.add(flag);
+ }
+ }
+ super.setFlags(value);
+ }
+ @Override
+ public boolean getReadNegativeStrandFlag() {
+ if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) {
+ setReadNegativeStrandFlag(getReadNegativeStrandFlagImpl());
+ }
+ return super.getReadNegativeStrandFlag();
+ }
+ @Override
+ public void setReadNegativeStrandFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) {
+ initializedFlags.add(LazyFlag.READ_NEGATIVE_STRAND);
+ }
+ super.setReadNegativeStrandFlag(flag);
+ }
+ @Override
+ public boolean getReadPairedFlag() {
+ if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) {
+ setReadPairedFlag(getReadPairedFlagImpl());
+ }
+ return super.getReadPairedFlag();
+ }
+ @Override
+ public void setReadPairedFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) {
+ initializedFlags.add(LazyFlag.READ_PAIRED);
+ }
+ super.setReadPairedFlag(flag);
+ }
+ @Override
+ public boolean getProperPairFlag() {
+ if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) {
+ setProperPairFlag(getProperPairFlagImpl());
+ }
+ return super.getProperPairFlag();
+ }
+ @Override
+ public void setProperPairFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) {
+ initializedFlags.add(LazyFlag.PROPER_PAIR);
+ }
+ super.setProperPairFlag(flag);
+ }
+ @Override
+ public boolean getNotPrimaryAlignmentFlag() {
+ if (!initializedFlags.contains(LazyFlag.NOT_PRIMARY_ALIGNMENT)) {
+ setNotPrimaryAlignmentFlag(getNotPrimaryAlignmentFlagImpl());
+ }
+ return super.getNotPrimaryAlignmentFlag();
+ }
+ @Override
+ public void setNotPrimaryAlignmentFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.NOT_PRIMARY_ALIGNMENT)) {
+ initializedFlags.add(LazyFlag.NOT_PRIMARY_ALIGNMENT);
+ }
+ super.setNotPrimaryAlignmentFlag(flag);
+ }
+ @Override
+ public boolean getMateNegativeStrandFlag() {
+ if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) {
+ setMateNegativeStrandFlag(getMateNegativeStrandFlagImpl());
+ }
+ return super.getMateNegativeStrandFlag();
+ }
+ @Override
+ public void setMateNegativeStrandFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) {
+ initializedFlags.add(LazyFlag.MATE_NEGATIVE_STRAND);
+ }
+ super.setMateNegativeStrandFlag(flag);
+ }
+ @Override
+ public boolean getMateUnmappedFlag() {
+ if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) {
+ setMateUnmappedFlag(getMateUnmappedFlagImpl());
+ }
+ return super.getMateUnmappedFlag();
+ }
+ @Override
+ public void setMateUnmappedFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) {
+ initializedFlags.add(LazyFlag.MATE_UNMAPPED);
+ }
+ super.setMateUnmappedFlag(flag);
+ }
+ @Override
+ public boolean getFirstOfPairFlag() {
+ if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) {
+ setFirstOfPairFlag(getFirstOfPairFlagImpl());
+ }
+ return super.getFirstOfPairFlag();
+ }
+ @Override
+ public void setFirstOfPairFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) {
+ initializedFlags.add(LazyFlag.FIRST_OF_PAIR);
+ }
+ super.setFirstOfPairFlag(flag);
+ }
+ @Override
+ public boolean getSecondOfPairFlag() {
+ if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) {
+ setSecondOfPairFlag(getSecondOfPairFlagImpl());
+ }
+ return super.getSecondOfPairFlag();
+ }
+ @Override
+ public void setSecondOfPairFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) {
+ initializedFlags.add(LazyFlag.SECOND_OF_PAIR);
+ }
+ super.setSecondOfPairFlag(flag);
+ }
+ // ===== attributes =====
+ @Override
+ public Object getAttribute(final short tag) {
+ LazyAttribute attr = lazyAttributeTags.get(tag);
+ if (attr != null) {
+ if (!initializedAttributes.contains(attr)) {
+ setAttribute(tag, attr.getAttribute(this));
+ }
+ }
+ return super.getAttribute(tag);
+ }
+ @Override
+ public void setAttribute(final short tag, final Object value) {
+ LazyAttribute attr = lazyAttributeTags.get(tag);
+ if (attr != null && !initializedAttributes.contains(attr)) {
+ initializedAttributes.add(attr);
+ }
+ super.setAttribute(tag, value);
+ }
+ @Override
+ protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
+ LazyAttribute attr = lazyAttributeTags.get(tag);
+ if (attr != null && !initializedAttributes.contains(attr)) {
+ initializedAttributes.add(attr);
+ }
+ super.setAttribute(tag, value, isUnsignedArray);
+ }
+ @Override
+ public void clearAttributes() {
+ for (LazyAttribute lazyAttribute : LazyAttribute.values()) {
+ if (!initializedAttributes.contains(lazyAttribute)) {
+ initializedAttributes.add(lazyAttribute);
+ }
+ }
+ super.clearAttributes();
+ }
+ @Override
+ protected void setAttributes(final SAMBinaryTagAndValue attributes) {
+ for (LazyAttribute lazyAttribute : LazyAttribute.values()) {
+ if (!initializedAttributes.contains(lazyAttribute)) {
+ initializedAttributes.add(lazyAttribute);
+ }
+ }
+ super.setAttributes(attributes);
+ }
+ @Override
+ protected SAMBinaryTagAndValue getBinaryAttributes() {
+ for (Map.Entry<Short, LazyAttribute> info : lazyAttributeTags.entrySet()) {
+ if (!initializedAttributes.contains(info.getValue())) {
+ getAttribute(info.getKey());
+ }
+ }
+ return super.getBinaryAttributes();
+ }
+ public boolean isUnsignedArrayAttribute(final String tag) {
+ Short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag(tag);
+ LazyAttribute attr = lazyAttributeTags.get(binaryTag);
+ if (attr != null && !initializedAttributes.contains(attr)) {
+ getAttribute(binaryTag);
+ }
+ return super.isUnsignedArrayAttribute(tag);
+ }
+ // ===== misc ====
+ /**
+ * For records equality, we should only compare read id, reference and position on the reference.
+ * Since read id is a constructor parameter, we only need to make sure that reference info is loaded.
+ * @param o other
+ * @return comparison result
+ */
+ @Override
+ public boolean equals(final Object o) {
+ if (o instanceof SRALazyRecord) {
+ SRALazyRecord otherRecord = (SRALazyRecord)o;
+ otherRecord.getReferenceIndex();
+ otherRecord.getAlignmentStart();
+ }
+ getReferenceIndex();
+ getAlignmentStart();
+ return super.equals(o);
+ }
+ /**
+ * The same approach as with 'equals' method. We only load reference and position.
+ */
+ @Override
+ public int hashCode() {
+ getReferenceIndex();
+ getAlignmentStart();
+ return super.hashCode();
+ }
+ /**
+ * Performs a deep copy of the SAMRecord and detaches a copy from NGS iterator
+ * @return new object
+ * @throws CloneNotSupportedException
+ */
+ @Override
+ public Object clone() throws CloneNotSupportedException {
+ SRALazyRecord newObject = (SRALazyRecord)super.clone();
+ newObject.initializedFields = EnumSet.copyOf(this.initializedFields);
+ newObject.initializedFlags = EnumSet.copyOf(this.initializedFlags);
+ newObject.initializedAttributes = EnumSet.copyOf(this.initializedAttributes);
+ newObject.detachFromIterator();
+ return newObject;
+ }
+ @Override
+ public String format() {
+ if (!initializedAttributes.contains(LazyAttribute.RG)) {
+ getAttribute("RG");
+ }
+ return super.format();
+ }
+ @Override
+ public List<SAMValidationError> isValid(final boolean firstOnly) {
+ loadFields();
+ getFlags();
+ getBinaryAttributes();
+ return super.isValid(firstOnly);
+ }
+ // =============================== Implementation ========================================
+ private ReadCollection getReadCollection() {
+ if (run != null) {
+ return run;
+ }
+ log.debug("Recovering SRA read collection. Accession: " + accession);
+ try {
+ return run = NGS.openReadCollection(accession.toString());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private Alignment getCurrentAlignment() throws ErrorMsg {
+ if (!isAligned) {
+ throw new RuntimeException("Should be called for aligned records only");
+ }
+ if (alignmentIterator == null) {
+ log.debug("Recovering SAM record after detaching from iterator. Alignment id: " + sraAlignmentId);
+ if (sraAlignmentId == null) {
+ throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no alignment id");
+ }
+ alignmentIterator = getReadCollection().getAlignment(sraAlignmentId);
+ }
+ return alignmentIterator;
+ }
+ private Read getCurrentUnalignedRead() throws ErrorMsg {
+ if (isAligned) {
+ throw new RuntimeException("Should be called for unaligned records only");
+ }
+ if (unalignmentIterator == null) {
+ log.debug("Recovering SAM record after detaching from iterator. Read id: " + sraReadId + ", fragment index: " + unalignedReadFragmentIndex);
+ if (sraReadId == null) {
+ throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no read id");
+ }
+ Read read = getReadCollection().getRead(sraReadId);
+ for (int i = 0; i < unalignedReadFragmentIndex + 1; i++) {
+ read.nextFragment();
+ }
+ unalignmentIterator = read;
+ }
+ return unalignmentIterator;
+ }
+ // ===== fields =====
+ private void loadFields() {
+ for (LazyField field : LazyField.values()) {
+ if (initializedFields.contains(field)) {
+ continue;
+ }
+ field.loadValue(this);
+ }
+ }
+ private int getAlignmentStartImpl() {
+ try {
+ if (isAligned) {
+ return (int) getCurrentAlignment().getAlignmentPosition() + 1;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private int getMappingQualityImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getMappingQuality();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private String getReferenceNameImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getReferenceSpec();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private String getCigarStringImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getShortCigar(false);
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private byte[] getReadBasesImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getAlignedFragmentBases().getBytes();
+ } else {
+ return getCurrentUnalignedRead().getFragmentBases().getBytes();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private byte[] getBaseQualitiesImpl() {
+ try {
+ Fragment fragment;
+ if (isAligned) {
+ fragment = getCurrentAlignment();
+ } else {
+ fragment = getCurrentUnalignedRead();
+ }
+ // quals are being taken from PRIMARY_ALIGNMENT.SAM_QUALITY column which reverse automatically them if needed
+ return SAMUtils.fastqToPhred(fragment.getFragmentQualities());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private int getMateAlignmentStartImpl() {
+ try {
+ if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) {
+ Alignment mate = getCurrentAlignment().getMateAlignment();
+ return (int) mate.getAlignmentPosition() + 1;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private String getMateReferenceNameImpl() {
+ try {
+ if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) {
+ return getCurrentAlignment().getMateReferenceSpec();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private int getInferredInsertSizeImpl() {
+ try {
+ if (isAligned) {
+ return (int) getCurrentAlignment().getTemplateLength();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return 0;
+ }
+ // ===== flags =====
+ private boolean getReadNegativeStrandFlagImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getIsReversedOrientation();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return false;
+ }
+ private boolean getReadPairedFlagImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().isPaired();
+ } else {
+ return getCurrentUnalignedRead().getNumFragments() > 1;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private boolean getProperPairFlagImpl() {
+ return isAligned && getReadPairedFlag() && !getMateUnmappedFlag();
+ }
+ private boolean getNotPrimaryAlignmentFlagImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getAlignmentCategory() == Alignment.secondaryAlignment;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return false;
+ }
+ private boolean getMateNegativeStrandFlagImpl() {
+ try {
+ if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) {
+ Alignment mate = getCurrentAlignment().getMateAlignment();
+ return mate.getIsReversedOrientation();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return false;
+ }
+ private boolean getMateUnmappedFlagImpl() {
+ try {
+ if (isAligned) {
+ return !getCurrentAlignment().hasMate();
+ } else {
+ Read unalignedRead = getCurrentUnalignedRead();
+ int numFragments = unalignedRead.getNumFragments();
+ int nextFragmentIdx = unalignedReadFragmentIndex + 1;
+ if (nextFragmentIdx == numFragments) {
+ nextFragmentIdx = 0;
+ }
+ return unalignedRead.fragmentIsAligned(nextFragmentIdx);
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private boolean getFirstOfPairFlagImpl() {
+ if (!getReadPairedFlag()) {
+ return false;
+ }
+ try {
+ if (isAligned) {
+ String fragmentId = getCurrentAlignment().getFragmentId();
+ if (!fragmentId.contains(".FA")) {
+ throw new RuntimeException("Invalid fragment id: " + fragmentId);
+ }
+ return fragmentId.contains(".FA0.");
+ } else {
+ return unalignedReadFragmentIndex == 0;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ private boolean getSecondOfPairFlagImpl() {
+ if (!getReadPairedFlag()) {
+ return false;
+ }
+ try {
+ if (isAligned) {
+ String fragmentId = getCurrentAlignment().getFragmentId();
+ if (!fragmentId.contains(".FA")) {
+ throw new RuntimeException("Invalid fragment id: " + fragmentId);
+ }
+ return !fragmentId.contains(".FA0.");
+ } else {
+ return unalignedReadFragmentIndex != 0;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ // ===== attributes =====
+ private String getAttributeGroupNameImpl() {
+ try {
+ String readGroupName;
+ if (isAligned) {
+ readGroupName = getCurrentAlignment().getReadGroup();
+ } else {
+ readGroupName = getCurrentUnalignedRead().getReadGroup();
+ }
+ if (!readGroupName.isEmpty()) {
+ return readGroupName;
+ }
+ return getReadCollection().getName();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
diff --git a/src/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java b/src/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java
new file mode 100644
index 0000000..f128a2b
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java
@@ -0,0 +1,181 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import htsjdk.samtools.Chunk;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SRAIterator;
+import htsjdk.samtools.ValidationStringency;
+import ngs.ErrorMsg;
+import ngs.Read;
+import ngs.ReadCollection;
+import ngs.ReadIterator;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+ * Iterator for unaligned reads.
+ * Is used from SRAIterator.
+ *
+ * Created by andrii.nikitiuk on 9/3/15.
+ */
+public class SRAUnalignmentIterator implements Iterator<SAMRecord> {
+ private ValidationStringency validationStringency;
+ private SRAAccession accession;
+ private ReadCollection run;
+ private SAMFileHeader header;
+ private SRAIterator.RecordRangeInfo recordRangeInfo;
+ private ReadIterator unalignedIterator;
+ private boolean hasMoreUnalignedReads = true;
+ private Boolean hasMoreUnalignedFragments = false;
+ private int lastUnalignedFragmentIndex;
+ private SRALazyRecord lastRecord;
+ /**
+ *
+ * @param run opened read collection
+ * @param header sam header
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ * @param chunk used to determine which unaligned reads the iterator should return
+ */
+ public SRAUnalignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, SRAIterator.RecordRangeInfo recordRangeInfo, Chunk chunk) {
+ this.accession = accession;
+ this.run = run;
+ this.header = header;
+ this.recordRangeInfo = recordRangeInfo;
+ long readStart = chunk.getChunkStart() - recordRangeInfo.getTotalReferencesLength();
+ if (readStart < 0) {
+ readStart = 0;
+ } else if (readStart >= recordRangeInfo.getNumberOfReads()) {
+ throw new RuntimeException("Invalid chunk provided: chunkStart position is after last read");
+ }
+ long readEnd = chunk.getChunkEnd() - recordRangeInfo.getTotalReferencesLength();
+ if (readEnd > recordRangeInfo.getNumberOfReads()) {
+ readEnd = recordRangeInfo.getNumberOfReads();
+ } else if (readEnd <= 0) {
+ throw new RuntimeException("Invalid chunk provided: chunkEnd position is before last read");
+ }
+ try {
+ unalignedIterator = run.getReadRange(readStart + 1, readEnd - readStart, Read.partiallyAligned | Read.unaligned);
+ nextUnalignedFragment();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+ @Override
+ public boolean hasNext() {
+ // check unaligned
+ if (hasMoreUnalignedFragments == null) {
+ try {
+ lastRecord.detachFromIterator();
+ nextUnalignedFragment();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return hasMoreUnalignedFragments;
+ }
+ @Override
+ public SAMRecord next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more alignments are available");
+ }
+ return nextUnalignment();
+ }
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Removal of records not implemented.");
+ }
+ public void setValidationStringency(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
+ private SAMRecord nextUnalignment() {
+ try {
+ lastRecord = new SRALazyRecord(header, accession, run, unalignedIterator, unalignedIterator.getReadId(), lastUnalignedFragmentIndex);
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ if (validationStringency != null) {
+ lastRecord.setValidationStringency(validationStringency);
+ }
+ hasMoreUnalignedFragments = null;
+ return lastRecord;
+ }
+ private void nextUnalignedFragment() throws ErrorMsg {
+ while (hasMoreUnalignedFragments == null || hasMoreUnalignedFragments) {
+ hasMoreUnalignedFragments = unalignedIterator.nextFragment();
+ lastUnalignedFragmentIndex++;
+ if (hasMoreUnalignedFragments && !unalignedIterator.isAligned()) {
+ return;
+ }
+ }
+ if (!hasMoreUnalignedReads) {
+ throw new RuntimeException("Cannot get next unaligned read - already at last one");
+ }
+ while (true) {
+ hasMoreUnalignedReads = unalignedIterator.nextRead();
+ lastUnalignedFragmentIndex = -1;
+ if (!hasMoreUnalignedReads) {
+ break;
+ }
+ // search for unaligned fragment
+ do {
+ hasMoreUnalignedFragments = unalignedIterator.nextFragment();
+ lastUnalignedFragmentIndex++;
+ } while (hasMoreUnalignedFragments && unalignedIterator.isAligned());
+ // means that we found fragment
+ if (hasMoreUnalignedFragments) {
+ return;
+ }
+ }
+ }
diff --git a/src/java/htsjdk/samtools/sra/SRAUtils.java b/src/java/htsjdk/samtools/sra/SRAUtils.java
new file mode 100644
index 0000000..e72caa8
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAUtils.java
@@ -0,0 +1,83 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import ngs.ErrorMsg;
+import ngs.Read;
+import ngs.ReadCollection;
+import ngs.ReferenceIterator;
+import java.util.ArrayList;
+import java.util.List;
+ * Provides some functionality which can be used by other classes
+ *
+ * Created by andrii.nikitiuk on 10/28/15.
+ */
+public class SRAUtils {
+ /**
+ * References are stored in SRA table in chunks of 5k bases per row, while last chunk of a reference is less or
+ * equal than 5k bases in size (even if the next reference follows).
+ * So, it will be optimal if we align reference sizes to 5k bases to read by reference rows.
+ */
+ public static final int REFERENCE_ALIGNMENT = 5000;
+ /**
+ * Is used to build RecordRangeInfo
+ * @param run open read collection
+ * @return total number of reads (both aligned and unaligned) in SRA archive
+ * @throws ErrorMsg
+ */
+ public static long getNumberOfReads(ReadCollection run) throws ErrorMsg {
+ return run.getReadCount(Read.all);
+ }
+ /**
+ * Loads reference lengths from a read collection.
+ * Aligns reference lengths by REFERENCE_ALIGNMENT bases for optimal loads of alignments
+ * (references are stored in REFERENCE_ALIGNMENT bases chunks in SRA table)
+ *
+ * Is used to build RecordRangeInfo
+ * @param run single opened read collection
+ * @return list with references lengths
+ * @throws ErrorMsg
+ */
+ public static List<Long> getReferencesLengthsAligned(ReadCollection run) throws ErrorMsg {
+ ReferenceIterator refIt = run.getReferences();
+ List<Long> lengths = new ArrayList<Long>();
+ while (refIt.nextReference()) {
+ long refLen = refIt.getLength();
+ // lets optimize references so they always align in 5000 bases positions
+ if (refLen % REFERENCE_ALIGNMENT != 0) {
+ }
+ lengths.add(refLen);
+ }
+ return lengths;
+ }
diff --git a/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java b/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
index 5088890..bd2f654 100644
--- a/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
+++ b/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
@@ -64,7 +64,7 @@ public abstract class AbstractAsyncWriter<T> implements Closeable {
if (!this.isClosed.getAndSet(true)) {
try {
- this.writer.interrupt(); // signal to writer clean up
+ if (this.queue.isEmpty()) this.writer.interrupt(); // signal to writer clean up
} catch (final InterruptedException ie) {
throw new RuntimeException("Interrupted waiting on writer thread.", ie);
diff --git a/src/java/htsjdk/samtools/util/AbstractProgressLogger.java b/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
index 9bc2dc7..5bd5e92 100644
--- a/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
+++ b/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
@@ -76,7 +76,7 @@ abstract public class AbstractProgressLogger implements ProgressLoggerInterface
public synchronized boolean record(final SAMRecord rec) {
- if (rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+ if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec.getReferenceName())) {
return record(null, 0);
else {
diff --git a/src/java/htsjdk/samtools/util/BinaryCodec.java b/src/java/htsjdk/samtools/util/BinaryCodec.java
index aaf69bf..843c128 100644
--- a/src/java/htsjdk/samtools/util/BinaryCodec.java
+++ b/src/java/htsjdk/samtools/util/BinaryCodec.java
@@ -79,9 +79,9 @@ public class BinaryCodec implements Closeable {
private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
private static final byte NULL_BYTE[] = {0};
- private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
- private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
- private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
+ public static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
+ public static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
+ public static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
// We never serialize more than this much at a time (except for Strings)
private static final int MAX_BYTE_BUFFER = 8;
@@ -101,10 +101,10 @@ public class BinaryCodec implements Closeable {
try {
this.isWriting = writing;
if (this.isWriting) {
- this.outputStream = new FileOutputStream(file);
+ this.outputStream = IOUtil.maybeBufferOutputStream(new FileOutputStream(file));
this.outputFileName = file.getName();
} else {
- this.inputStream = new FileInputStream(file);
+ this.inputStream = IOUtil.maybeBufferInputStream(new FileInputStream(file));
this.inputFileName = file.getName();
} catch (FileNotFoundException e) {
diff --git a/src/java/htsjdk/samtools/util/DiskBackedQueue.java b/src/java/htsjdk/samtools/util/DiskBackedQueue.java
index 0af6818..fd07f68 100644
--- a/src/java/htsjdk/samtools/util/DiskBackedQueue.java
+++ b/src/java/htsjdk/samtools/util/DiskBackedQueue.java
@@ -25,6 +25,7 @@
package htsjdk.samtools.util;
import htsjdk.samtools.Defaults;
+import htsjdk.samtools.SAMException;
import java.io.File;
import java.io.FileInputStream;
@@ -100,13 +101,13 @@ public class DiskBackedQueue<E> implements Queue<E> {
* Syntactic sugar around the ctor, to save some typing of type parameters
* @param codec For writing records to file and reading them back into RAM
- * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk
+ * @param maxRecordsInRam how many records to accumulate in memory before spilling to disk
* @param tmpDir Where to write files of records that will not fit in RAM
public static <T> DiskBackedQueue<T> newInstance(final SortingCollection.Codec<T> codec,
- final int maxRecordsInRAM,
+ final int maxRecordsInRam,
final List<File> tmpDir) {
- return new DiskBackedQueue<T>(codec, maxRecordsInRAM, tmpDir);
+ return new DiskBackedQueue<T>(codec, maxRecordsInRam, tmpDir);
public boolean canAdd() {
@@ -135,12 +136,14 @@ public class DiskBackedQueue<E> implements Queue<E> {
// NB: we add all the records before removing them, so we can never have spilled to disk unless all the space for ram records
// have been exhausted.
if (this.headRecord == null) { // this is the first record in the queue
+ if (0 < this.numRecordsOnDisk) throw new SAMException("Head record was null but we have records on disk. Bug!");
this.headRecord = record;
else if (this.ramRecords.size() == this.maxRecordsInRamQueue) {
else {
+ if (0 < this.numRecordsOnDisk) throw new SAMException("Trying to add records to RAM but there were records on disk. Bug!");
return true;
@@ -274,11 +277,14 @@ public class DiskBackedQueue<E> implements Queue<E> {
private void updateQueueHead() {
if (!this.ramRecords.isEmpty()) {
this.headRecord = this.ramRecords.poll();
+ if (0 < numRecordsOnDisk) this.canAdd = false;
else if (this.diskRecords != null) {
this.headRecord = this.readFileRecord(this.diskRecords);
+ this.canAdd = false;
else {
+ this.canAdd = true;
this.headRecord = null;
diff --git a/src/java/htsjdk/samtools/util/Histogram.java b/src/java/htsjdk/samtools/util/Histogram.java
index 4ebbdbd..f69408c 100644
--- a/src/java/htsjdk/samtools/util/Histogram.java
+++ b/src/java/htsjdk/samtools/util/Histogram.java
@@ -45,7 +45,6 @@ import static java.lang.Math.*;
public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
private String binLabel = "BIN";
private String valueLabel = "VALUE";
- private Double mean;
/** Constructs a new Histogram with default bin and value labels. */
public Histogram() { }
@@ -73,7 +72,6 @@ public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
this.binLabel = in.binLabel;
this.valueLabel = in.valueLabel;
- this.mean = in.mean;
/** Represents a bin in the Histogram. */
@@ -146,7 +144,6 @@ public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
bin.value += increment;
- mean = null;
public String getBinLabel() { return binLabel; }
@@ -164,12 +161,23 @@ public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
+ /**
+ * Assuming that the key type for the histogram is a Number type, returns the mean of
+ * all the items added to the histogram.
+ */
public double getMean() {
- if (mean == null) {
- mean = getSum() / getCount();
+ // Could use simply getSum() / getCount(), but that would require iterating over the
+ // values() set twice, which seems inefficient given how simply the computation is.
+ double product=0, totalCount=0;
+ for (final Bin bin : values()) {
+ final double idValue = bin.getIdValue();
+ final double count = bin.getValue();
+ product += idValue * count;
+ totalCount += count;
- return mean;
+ return product / totalCount;
diff --git a/src/java/htsjdk/samtools/util/IOUtil.java b/src/java/htsjdk/samtools/util/IOUtil.java
index 9e7427a..199c6d1 100644
--- a/src/java/htsjdk/samtools/util/IOUtil.java
+++ b/src/java/htsjdk/samtools/util/IOUtil.java
@@ -50,6 +50,8 @@ import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -320,16 +322,26 @@ public class IOUtil {
* @param file the file to check for readability
public static void assertFileIsReadable(final File file) {
- if (file == null) {
- throw new IllegalArgumentException("Cannot check readability of null file.");
- } else if (!file.exists()) {
- throw new SAMException("Cannot read non-existent file: " + file.getAbsolutePath());
+ assertFileIsReadable(file == null ? null : file.toPath());
+ }
+ /**
+ * Checks that a file is non-null, exists, is not a directory and is readable. If any
+ * condition is false then a runtime exception is thrown.
+ *
+ * @param path the file to check for readability
+ */
+ public static void assertFileIsReadable(final Path path) {
+ if (path == null) {
+ throw new IllegalArgumentException("Cannot check readability of null file.");
+ } else if (!Files.exists(path)) {
+ throw new SAMException("Cannot read non-existent file: " + path.toAbsolutePath());
- else if (file.isDirectory()) {
- throw new SAMException("Cannot read file because it is a directory: " + file.getAbsolutePath());
+ else if (Files.isDirectory(path)) {
+ throw new SAMException("Cannot read file because it is a directory: " + path.toAbsolutePath());
- else if (!file.canRead()) {
- throw new SAMException("File exists but is not readable: " + file.getAbsolutePath());
+ else if (!Files.isReadable(path)) {
+ throw new SAMException("File exists but is not readable: " + path.toAbsolutePath());
@@ -487,18 +499,28 @@ public class IOUtil {
* @return the input stream to read from
public static InputStream openFileForReading(final File file) {
+ return openFileForReading(file.toPath());
+ }
+ /**
+ * Opens a file for reading, decompressing it if necessary
+ *
+ * @param path The file to open
+ * @return the input stream to read from
+ */
+ public static InputStream openFileForReading(final Path path) {
try {
- if (file.getName().endsWith(".gz") ||
- file.getName().endsWith(".bfq")) {
- return openGzipFileForReading(file);
+ if (path.getFileName().toString().endsWith(".gz") ||
+ path.getFileName().toString().endsWith(".bfq")) {
+ return openGzipFileForReading(path);
else {
- return new FileInputStream(file);
+ return Files.newInputStream(path);
catch (IOException ioe) {
- throw new SAMException("Error opening file: " + file.getName(), ioe);
+ throw new SAMException("Error opening file: " + path, ioe);
@@ -510,12 +532,22 @@ public class IOUtil {
* @return the input stream to read from
public static InputStream openGzipFileForReading(final File file) {
+ return openGzipFileForReading(file.toPath());
+ }
+ /**
+ * Opens a GZIP-encoded file for reading, decompressing it if necessary
+ *
+ * @param path The file to open
+ * @return the input stream to read from
+ */
+ public static InputStream openGzipFileForReading(final Path path) {
try {
- return new GZIPInputStream(new FileInputStream(file));
+ return new GZIPInputStream(Files.newInputStream(path));
catch (IOException ioe) {
- throw new SAMException("Error opening file: " + file.getName(), ioe);
+ throw new SAMException("Error opening file: " + path, ioe);
diff --git a/src/java/htsjdk/samtools/util/Murmur3.java b/src/java/htsjdk/samtools/util/Murmur3.java
new file mode 100644
index 0000000..9372008
--- /dev/null
+++ b/src/java/htsjdk/samtools/util/Murmur3.java
@@ -0,0 +1,115 @@
+ * Copyright (C) 2011 The Guava Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ * Source:
+ * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+ * (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
+ *
+ * Modified to remove stuff Clojure doesn't need, placed under clojure.lang namespace,
+ * all fns made static, added hashOrdered/Unordered
+ *
+ * Modified again by Tim Fennell to remove code not needed by HTSJDK, to make methods non-static (so that different uses can
+ * supply different seed values without colliding) and to comform to HTSJDK coding conventions where possible.
+ *
+ * @author Austin Appleby
+ * @author Dimitris Andreou
+ * @author Kurt Alfred Kluever
+ */
+package htsjdk.samtools.util;
+ * Provides an implementation of the Murmur3_32 hash algorithm that has desirable properties in terms of randomness
+ * and uniformity of the distribution of output values that make it a useful hashing algorithm for downsampling.
+ */
+public final class Murmur3 {
+ private final int seed ;
+ /** Constructs a Murmur3 hash with the given seed. */
+ public Murmur3(final int seed) {
+ this.seed = seed;
+ }
+ /** Hashes a character stream to an int using Murmur3. */
+ public int hashUnencodedChars(CharSequence input){
+ int h1 = this.seed;
+ // step through the CharSequence 2 chars at a time
+ final int length = input.length();
+ for(int i = 1; i < length; i += 2) {
+ int k1 = input.charAt(i - 1) | (input.charAt(i) << 16);
+ k1 = mixK1(k1);
+ h1 = mixH1(h1, k1);
+ }
+ // deal with any remaining characters
+ if((length & 1) == 1) {
+ int k1 = input.charAt(length - 1);
+ k1 = mixK1(k1);
+ h1 ^= k1;
+ }
+ return fmix(h1, 2 * length);
+ }
+ private int hashInt(int input){
+ if(input == 0) return 0;
+ int k1 = mixK1(input);
+ int h1 = mixH1(this.seed, k1);
+ return fmix(h1, 4);
+ }
+ private int hashLong(long input){
+ if(input == 0) return 0;
+ int low = (int) input;
+ int high = (int) (input >>> 32);
+ int k1 = mixK1(low);
+ int h1 = mixH1(this.seed, k1);
+ k1 = mixK1(high);
+ h1 = mixH1(h1, k1);
+ return fmix(h1, 8);
+ }
+ private static int mixK1(int k1){
+ final int c1 = 0xcc9e2d51;
+ final int c2 = 0x1b873593;
+ k1 *= c1;
+ k1 = Integer.rotateLeft(k1, 15);
+ k1 *= c2;
+ return k1;
+ }
+ private static int mixH1(int h1, int k1){
+ h1 ^= k1;
+ h1 = Integer.rotateLeft(h1, 13);
+ h1 = h1 * 5 + 0xe6546b64;
+ return h1;
+ }
+ // Finalization mix - force all bits of a hash block to avalanche
+ private static int fmix(int h1, int length){
+ h1 ^= length;
+ h1 ^= h1 >>> 16;
+ h1 *= 0x85ebca6b;
+ h1 ^= h1 >>> 13;
+ h1 *= 0xc2b2ae35;
+ h1 ^= h1 >>> 16;
+ return h1;
+ }
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/util/ProgressLogger.java b/src/java/htsjdk/samtools/util/ProgressLogger.java
index 8603dd4..6a293d6 100644
--- a/src/java/htsjdk/samtools/util/ProgressLogger.java
+++ b/src/java/htsjdk/samtools/util/ProgressLogger.java
@@ -47,6 +47,6 @@ public class ProgressLogger extends AbstractProgressLogger {
protected void log(final String... message) {
- log.info(message);
+ log.info((Object[])message);
diff --git a/src/java/htsjdk/samtools/util/SequenceUtil.java b/src/java/htsjdk/samtools/util/SequenceUtil.java
index 6594880..bd4bfdd 100644
--- a/src/java/htsjdk/samtools/util/SequenceUtil.java
+++ b/src/java/htsjdk/samtools/util/SequenceUtil.java
@@ -103,21 +103,43 @@ public class SequenceUtil {
- * Throws an exception only if both parameters are not null
+ * default signature that forces the lists to be the same size
* @param s1 a list of sequence headers
* @param s2 a second list of sequence headers
public static void assertSequenceListsEqual(final List<SAMSequenceRecord> s1, final List<SAMSequenceRecord> s2) {
+ assertSequenceListsEqual(s1, s2, false);
+ }
+ /**
+ * Throws an exception only if both (first) parameters are not null
+ * optionally check that one list is a (nonempty) prefix of the other.
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
+ * @param checkPrefixOnly a flag specifying whether to only look at the first records in the lists. This will then check that the
+ * records of the smaller dictionary are equal to the records of the beginning of the larger dictionary, which can be useful since
+ * sometimes different pipelines choose to use only the first contigs of a standard reference.
+ */
+ public static void assertSequenceListsEqual(final List<SAMSequenceRecord> s1, final List<SAMSequenceRecord> s2, final boolean checkPrefixOnly) {
if (s1 != null && s2 != null) {
- if (s1.size() != s2.size()) {
- throw new SequenceListsDifferException(
- "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() +
- ")");
- }
+ final int sizeToTest;
- for (int i = 0; i < s1.size(); ++i) {
+ if (checkPrefixOnly) {
+ sizeToTest = Math.min(s1.size(), s2.size());
+ if (sizeToTest == 0) {
+ throw new SequenceListsDifferException("Neither of the dictionaries can be empty.");
+ }
+ } else {
+ sizeToTest = s1.size();
+ if (s1.size() != s2.size()) {
+ throw new SequenceListsDifferException(
+ "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() +
+ ")");
+ }
+ }
+ for (int i = 0; i < sizeToTest; ++i) {
if (!s1.get(i).isSameSequence(s2.get(i))) {
String s1Attrs = "";
for (final java.util.Map.Entry<String, String> entry : s1.get(i)
@@ -159,6 +181,9 @@ public class SequenceUtil {
* Returns true if both parameters are null or equal, otherwise returns false
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
public static boolean areSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2) {
if (s1 == null && s2 == null) return true;
@@ -174,10 +199,26 @@ public class SequenceUtil {
* Throws an exception if both parameters are non-null and unequal.
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2) {
+ assertSequenceDictionariesEqual(s1, s2, false);
+ }
+ /**
+ * Throws an exception if both (first) parameters are non-null and unequal (if checkPrefixOnly, checks prefix of lists only).
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
+ * @param checkPrefixOnly a flag specifying whether to only look at the first records in the lists. This will then check that the
+ * records of the smaller dictionary are equal to the records of the beginning of the larger dictionary, which can be useful since
+ * sometimes different pipelines choose to use only the first contigs of a standard reference.
+ */
+ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, final boolean checkPrefixOnly) {
if (s1 == null || s2 == null) return;
- assertSequenceListsEqual(s1.getSequences(), s2.getSequences());
+ assertSequenceListsEqual(s1.getSequences(), s2.getSequences(), checkPrefixOnly);
diff --git a/src/java/htsjdk/samtools/util/StringUtil.java b/src/java/htsjdk/samtools/util/StringUtil.java
index 44a6aaf..e205bbf 100644
--- a/src/java/htsjdk/samtools/util/StringUtil.java
+++ b/src/java/htsjdk/samtools/util/StringUtil.java
@@ -84,7 +84,7 @@ public class StringUtil {
tokens[nTokens++] = aString;
return nTokens;
- while ((end > 0) && (nTokens < maxTokens))
+ while ((end >= 0) && (nTokens < maxTokens))
tokens[nTokens++] = aString.substring(start, end);
start = end + 1;
@@ -125,7 +125,7 @@ public class StringUtil {
tokens[nTokens++] = aString;
return nTokens;
- while ((end > 0) && (nTokens < maxTokens - 1))
+ while ((end >= 0) && (nTokens < maxTokens - 1))
tokens[nTokens++] = aString.substring(start, end);
start = end + 1;
diff --git a/src/java/htsjdk/variant/variantcontext/VariantContext.java b/src/java/htsjdk/variant/variantcontext/VariantContext.java
index 32db7b5..d2cc5af 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantContext.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantContext.java
@@ -1011,11 +1011,15 @@ public class VariantContext implements Feature, Serializable {
return getGenotypes().containsSample(sample);
+ /**
+ * @param ith the sample index
+ *
+ * @return the ith genotype in this context or null if there aren't that many genotypes
+ */
public Genotype getGenotype(int ith) {
- return genotypes.get(ith);
+ return genotypes.size() > ith ? genotypes.get(ith) : null;
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
diff --git a/src/java/htsjdk/variant/variantcontext/filter/CompoundFilter.java b/src/java/htsjdk/variant/variantcontext/filter/CompoundFilter.java
new file mode 100644
index 0000000..9a3724a
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/CompoundFilter.java
@@ -0,0 +1,74 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
+import java.util.ArrayList;
+ * A Predicate on VariantContexts that returns true when either all its sub-predicates are true, or none are false.
+ *
+ * @author Yossi Farjoun
+ */
+public class CompoundFilter extends ArrayList<VariantContextFilter> implements VariantContextFilter {
+ final boolean requireAll;
+ /**
+ * A constructor that will determine if this compound filter will require that *all* the included filters pass
+ * or *some* of them pass (depending on the requireAll parameter in the constructor).
+ *
+ * @param requireAll a boolean parameter determining whether this filter requires all its elements to pass (true) for
+ * it to pass, or only one (false). If there are no variantfilters it will return true.
+ */
+ public CompoundFilter(final boolean requireAll) {
+ super();
+ this.requireAll = requireAll;
+ }
+ /**
+ * @param variantContext the record to examine against the sub-filters
+ * @return true if variantContext either passes all the filters (when requireAll==true)
+ * or doesn't fail any of the filters (when requireAll==false)
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ if (requireAll) {
+ for (final VariantContextFilter filter : this) {
+ if (!filter.test(variantContext)) return false;
+ }
+ return true;
+ } else {
+ for (final VariantContextFilter filter : this) {
+ if (filter.test(variantContext)) return true;
+ }
+ return isEmpty();
+ }
+ }
diff --git a/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java b/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
new file mode 100644
index 0000000..c5b943f
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
@@ -0,0 +1,127 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.variant.variantcontext.VariantContext;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+ * A filtering iterator for VariantContexts that takes a base iterator and a VariantContextFilter.
+ *
+ * The iterator returns all the variantcontexts for which the filter's function "test" returns true (and only those)
+ *
+ * @author Yossi Farjoun
+ */
+public class FilteringIterator implements CloseableIterator<VariantContext>, Iterable<VariantContext>{
+ private final Iterator<VariantContext> iterator;
+ private final VariantContextFilter filter;
+ private VariantContext next = null;
+ /**
+ * Constructor of an iterator based on the provided iterator and predicate. The resulting
+ * records will be all those VariantContexts from iterator for which filter.test( . ) is true
+ *
+ * @param iterator the backing iterator
+ * @param filter the filter
+ */
+ public FilteringIterator(final Iterator<VariantContext> iterator, final VariantContextFilter filter) {
+ this.iterator = iterator;
+ this.filter = filter;
+ next = getNextVC();
+ }
+ @Override
+ public void close() {
+ CloserUtil.close(iterator);
+ }
+ /**
+ * Returns true if the iteration has more elements.
+ *
+ * @return true if the iteration has more elements. Otherwise returns false.
+ */
+ @Override
+ public boolean hasNext() {
+ return next != null;
+ }
+ /**
+ * Returns the next element in the iteration.
+ *
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if there are no more elements to return
+ *
+ */
+ @Override
+ public VariantContext next() throws NoSuchElementException {
+ if (next == null) {
+ throw new NoSuchElementException("Iterator has no more elements.");
+ }
+ final VariantContext result = next;
+ next = getNextVC();
+ return result;
+ }
+ /**
+ * Required method for Iterator API.
+ *
+ * @throws UnsupportedOperationException since it is unsupported here.
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
+ }
+ /**
+ * Gets the next record from the underlying iterator that passes the filter
+ *
+ * @return VariantContext the next filter-passing record
+ */
+ private VariantContext getNextVC() {
+ while (iterator.hasNext()) {
+ final VariantContext record = iterator.next();
+ if (filter.test(record)) {
+ return record;
+ }
+ }
+ return null;
+ }
+ /**
+ * function to satisfy the Iterable interface
+ *
+ * @return itself since the class inherits from Iterator
+ */
+ @Override
+ public Iterator<VariantContext> iterator() {
+ return this;
+ }
diff --git a/src/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilter.java b/src/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilter.java
new file mode 100644
index 0000000..862dcce
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilter.java
@@ -0,0 +1,79 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.VariantContext;
+ * A Predicate on VariantContexts that returns true at sites that are either unfiltered, or passing (as variants).
+ *
+ * @author Yossi Farjoun
+ */
+public class GenotypeQualityFilter implements VariantContextFilter {
+ final private String sample;
+ final private int gqThreshold;
+ /**
+ * Constructor for a filter that will keep VC for which the
+ * genotype quality (GQ) of sample passes a threshold. If sample is null, the first genotype in the
+ * variant context will be used.
+ *
+ * @param gqThreshold the smallest value of GQ that this filter will pass
+ * @param sample the name of the sample in the variant context whose genotype should be examined.
+ */
+ public GenotypeQualityFilter(final int gqThreshold, final String sample ) {
+ this.sample = sample;
+ this.gqThreshold = gqThreshold;
+ }
+ /**
+ * Constructor as above that doesn't take a sample, instead it will look at the first genotype of the variant context.
+ * @param gqThreshold the smallest value of GQ that this filter will pass
+ */
+ public GenotypeQualityFilter(final int gqThreshold) {
+ this( gqThreshold, null);
+ }
+ /**
+ * @return true if variantContext is to be kept, otherwise false
+ * Assumes that this.sample is a sample in the variantContext, if not null,
+ * otherwise looks for the first genotype (and assumes it exists).
+ * @param variantContext the record to examine for GQ
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ final Genotype gt = (sample == null) ? variantContext.getGenotype(0) : variantContext.getGenotype(sample);
+ if (gt == null) {
+ throw new IllegalArgumentException((sample == null) ?
+ "Cannot find any genotypes in VariantContext: " + variantContext :
+ "Cannot find sample requested: " + sample);
+ }
+ return gt.getGQ() >= gqThreshold;
+ }
diff --git a/src/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilter.java b/src/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilter.java
new file mode 100644
index 0000000..0675b25
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilter.java
@@ -0,0 +1,84 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.VariantContext;
+ * A Predicate on VariantContexts that either returns true at heterozygous sites (invertible to false).
+ * if optional "sample" argument to constructor is given, the genotype of that sample will be examined,
+ * otherwise first genotype will be used.
+ *
+ * Missing sample, or no genotype will result in an exception being thrown.
+ *
+ * @author Yossi Farjoun
+ */
+public class HeterozygosityFilter implements VariantContextFilter {
+ final private String sample;
+ final private boolean keepHets;
+ /**
+ * Constructor for a filter that will keep (or remove, if keepHets is false) VC for which the
+ * genotype of sample is heterozygous. If sample is null, the first genotype in the
+ * variant context will be used.
+ *
+ * @param keepHets determine whether to keep the het sites (true) or filter them out (false)
+ * @param sample the name of the sample in the variant context whose genotype should be examined.
+ */
+ public HeterozygosityFilter(final boolean keepHets, final String sample) {
+ this.keepHets = keepHets;
+ this.sample = sample;
+ }
+ /**
+ * Constructor as above that doesn't take a sample, instead it will look at the first genotype of the variant context.
+ * @param keepHets if true, the heterozygous variant contexts will pass the filter, otherwise they will fail.
+ */
+ public HeterozygosityFilter(final boolean keepHets) {
+ this(keepHets, null);
+ }
+ /**
+ * @return true if variantContext is to be kept, otherwise false
+ * Assumes that this.sample is a sample in the variantContext, if not null,
+ * otherwise looks for the first genotype (and assumes it exists).
+ * @param variantContext the record to examine for heterozygosity
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ final Genotype gt = (sample == null) ? variantContext.getGenotype(0) : variantContext.getGenotype(sample);
+ if (gt == null) {
+ throw new IllegalArgumentException((sample == null) ?
+ "Cannot find any genotypes in VariantContext: " + variantContext :
+ "Cannot find sample requested: " + sample);
+ }
+ //XOR operator to reverse behaviour if keepHets is true.
+ return gt.isHet() ^ !keepHets;
+ }
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/variant/variantcontext/filter/PassingVariantFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/java/htsjdk/variant/variantcontext/filter/PassingVariantFilter.java
index 7dac5a2..f24678f 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/PassingVariantFilter.java
@@ -1,7 +1,7 @@
* The MIT License
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,24 @@
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A Predicate on VariantContexts that returns true at sites that are either unfiltered, or passing (as variants).
+ *
+ * @author Yossi Farjoun
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class PassingVariantFilter implements VariantContextFilter {
+ /**
+ * @return true if variantContext is a SNP
+ * @param variantContext the record to examine for being a SNP
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ return variantContext.isNotFiltered();
+ }
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/variant/variantcontext/filter/SnpFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/java/htsjdk/variant/variantcontext/filter/SnpFilter.java
index 7dac5a2..4d8b17b 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/SnpFilter.java
@@ -1,7 +1,7 @@
* The MIT License
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,24 @@
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A Predicate on VariantContexts that returns true at sites that are SNPs
+ *
+ * @author Yossi Farjoun
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class SnpFilter implements VariantContextFilter {
+ /**
+ * @return true if variantContext is a SNP
+ * @param variantContext the record to examine for being a SNP
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ return variantContext.isSNP();
+ }
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/variant/variantcontext/filter/VariantContextFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/java/htsjdk/variant/variantcontext/filter/VariantContextFilter.java
index 7dac5a2..451dc63 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/VariantContextFilter.java
@@ -1,7 +1,7 @@
* The MIT License
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,23 @@
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
- * The standard tags for a SAM record that are defined in the SAM spec.
+ *
+ * API for filtering VariantContexts
+ *
+ * @author Yossi Farjoun
+ *
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public interface VariantContextFilter {
+ /**
+ * Determines whether a VariantContext matches this filter
+ *
+ * @param record the VariantContext to evaluate
+ * @return true if the VariantContext matches the filter, otherwise false
+ */
+ boolean test(VariantContext record);
diff --git a/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java b/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
index fab5095..7d1f0de 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
@@ -116,65 +116,56 @@ public abstract class BCF2FieldEncoder {
- * True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
- *
- * @return
+ * @return True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
public boolean hasConstantNumElements() {
return getCountType() == VCFHeaderLineCount.INTEGER;
- * True if the only way to determine how many elements this field contains is by
+ * @return True if the only way to determine how many elements this field contains is by
* inspecting the actual value directly, such as when the number of elements
* is a variable length list per site or per genotype.
- * @return
public boolean hasValueDeterminedNumElements() {
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
- * True if this field has a non-fixed number of elements that depends only on the properties
+ * @return True if this field has a non-fixed number of elements that depends only on the properties
* of the current VariantContext, such as one value per Allele or per genotype configuration.
- *
- * @return
public boolean hasContextDeterminedNumElements() {
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
- * Get the number of elements, assuming this field has a constant number of elements.
- * @return
+ * @return the number of elements, assuming this field has a constant number of elements.
public int numElements() {
return headerLine.getCount();
- * Get the number of elements by looking at the actual value provided
- * @return
+ * @return the number of elements by looking at the actual value provided
public int numElements(final Object value) {
return numElementsFromValue(value);
- * Get the number of elements, assuming this field has context-determined number of elements.
- * @return
+ * @return the number of elements, assuming this field has context-determined number of elements.
public int numElements(final VariantContext vc) {
return headerLine.getCount(vc);
- * A convenience access for the number of elements, returning
- * the number of encoded elements, either from the fixed number
- * it has, from the VC, or from the value itself.
+ * A convenience access for the number of elements.
* @param vc
* @param value
- * @return
+ * @return the number of encoded elements, either from the fixed number
+ * it has, from the VC, or from the value itself.
public final int numElements(final VariantContext vc, final Object value) {
if ( hasConstantNumElements() ) return numElements();
@@ -188,7 +179,7 @@ public abstract class BCF2FieldEncoder {
* Assumes the value is encoded as a List
* @param value
- * @return
+ * @return the number of elements we will encode for {@param value}.
protected int numElementsFromValue(final Object value) {
if ( value == null ) return 0;
@@ -205,14 +196,14 @@ public abstract class BCF2FieldEncoder {
* Is the BCF2 type of this field static, or does it have to be determine from
* the actual field value itself?
- * @return
+ * @return true if the field is static
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
* Is the BCF2 type of this field static, or does it have to be determine from
* the actual field value itself?
- * @return
+ * @return true if the field is not static
public final boolean isDynamicallyTyped() { return staticType == null; }
@@ -220,7 +211,7 @@ public abstract class BCF2FieldEncoder {
* Get the BCF2 type for this field, either from the static type of the
* field itself or by inspecting the value itself.
- * @return
+ * @return the BCF2 type for this field
public final BCF2Type getType(final Object value) {
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
diff --git a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
index 5e5eb95..4e95888 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
@@ -125,7 +125,7 @@ public class VariantContextWriterBuilder {
private IndexCreator idxCreator = null;
private int bufferSize = Defaults.BUFFER_SIZE;
private boolean createMD5 = Defaults.CREATE_MD5;
- private EnumSet<Options> options = DEFAULT_OPTIONS.clone();
+ protected EnumSet<Options> options = DEFAULT_OPTIONS.clone();
* Default constructor. Adds <code>USE_ASYNC_IO</code> to the Options if it is present in Defaults.
@@ -338,12 +338,33 @@ public class VariantContextWriterBuilder {
+ * Add one option to the set of default <code>Options</code> that will be used as the initial set of options
+ * for all VariantContextWriterBuilders created after this call.
+ *
+ * @param option the option to set
+ */
+ public static void setDefaultOption(final Options option) {
+ VariantContextWriterBuilder.DEFAULT_OPTIONS.add(option);
+ }
+ /**
+ * Remove an option from the set of default <code>Options</code> that will be used as the initial set of options
+ * for all VariantContextWriterBuilders created after this call.
+ *
+ * @param option the option to unset
+ * @return this <code>VariantContextWriterBuilder</code>
+ */
+ public static void unsetDefaultOption(final Options option) {
+ VariantContextWriterBuilder.DEFAULT_OPTIONS.remove(option);
+ }
+ /**
* Remove all options from the set of <code>Options</code> for the <code>VariantContextWriterBuilder</code>.
* @return this VariantContextWriterBuilder
public VariantContextWriterBuilder clearOptions() {
- this.options = NO_OPTIONS;
+ this.options = NO_OPTIONS.clone();
return this;
diff --git a/src/java/htsjdk/variant/vcf/VCFRecordCodec.java b/src/java/htsjdk/variant/vcf/VCFRecordCodec.java
index cddfa22..8fe9b67 100644
--- a/src/java/htsjdk/variant/vcf/VCFRecordCodec.java
+++ b/src/java/htsjdk/variant/vcf/VCFRecordCodec.java
@@ -16,20 +16,20 @@ import java.io.PrintStream;
* with SortingCollection ONLY.
public class VCFRecordCodec implements SortingCollection.Codec<VariantContext> {
- final VCFCodec vcfDecoder = new VCFCodec();
- final VCFEncoder vcfEncoder;
+ private final VCFCodec vcfDecoder = new VCFCodec();
+ private final VCFEncoder vcfEncoder;
private PrintStream outputStream = null;
private BufferedReader inputReader = null;
public VCFRecordCodec(final VCFHeader header) {
- this.vcfEncoder = new VCFEncoder(header, false, false);
+ this(header, false);
+ }
+ public VCFRecordCodec(final VCFHeader header, final boolean allowMissingFieldsInHeader) {
+ this.vcfEncoder = new VCFEncoder(header, allowMissingFieldsInHeader, false);
// Explicitly set the version because it's not available in the header itself.
this.vcfDecoder.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
- }
+ }
public void setOutputStream(final OutputStream stream) {
@@ -58,7 +58,7 @@ public class VCFRecordCodec implements SortingCollection.Codec<VariantContext> {
public VCFRecordCodec clone() {
- return new VCFRecordCodec(this.vcfEncoder.getVCFHeader());
+ return new VCFRecordCodec(this.vcfEncoder.getVCFHeader(), this.vcfEncoder.getAllowMissingFieldsInHeader());
diff --git a/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java b/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
index 43fe3a9..4504ddc 100644
--- a/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
@@ -37,7 +37,7 @@ import java.io.File;
public class BAMFileWriterTest {
- private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
+ private SAMRecordSetBuilder getRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder);
ret.addPair("readB", 20, 200, 300);
ret.addPair("readA", 20, 100, 150);
@@ -55,7 +55,7 @@ public class BAMFileWriterTest {
* @param presorted If true, samText is in the order specified by sortOrder
private void testHelper(final SAMRecordSetBuilder samRecordSetBuilder, final SAMFileHeader.SortOrder sortOrder, final boolean presorted) throws Exception {
- SamReader samReader = samRecordSetBuilder.getSamReader();
+ final SamReader samReader = samRecordSetBuilder.getSamReader();
final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
@@ -68,43 +68,47 @@ public class BAMFileWriterTest {
- if (presorted) {
- // If SAM text input was presorted, then we can compare SAM object to BAM object
- final SamReader bamReader = SamReaderFactory.makeDefault().open(bamFile);
- samReader = samRecordSetBuilder.getSamReader();
- samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
- Assert.assertEquals(bamReader.getFileHeader(), samReader.getFileHeader());
- it = samReader.iterator();
- final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
- while (it.hasNext()) {
- Assert.assertTrue(bamIt.hasNext());
- final SAMRecord samRecord = it.next();
- final SAMRecord bamRecord = bamIt.next();
- // SAMRecords don't have this set, so stuff it in there
- samRecord.setIndexingBin(bamRecord.getIndexingBin());
- // Force reference index attributes to be populated
- samRecord.getReferenceIndex();
- bamRecord.getReferenceIndex();
- samRecord.getMateReferenceIndex();
- bamRecord.getMateReferenceIndex();
- Assert.assertEquals(bamRecord, samRecord);
- }
- Assert.assertFalse(bamIt.hasNext());
+ if (presorted) { // If SAM text input was presorted, then we can compare SAM object to BAM object
+ verifyBAMFile(samRecordSetBuilder, bamFile);
+ }
+ }
+ private void verifyBAMFile(final SAMRecordSetBuilder samRecordSetBuilder, final File bamFile) {
+ final SamReader bamReader = SamReaderFactory.makeDefault().open(bamFile);
+ final SamReader samReader = samRecordSetBuilder.getSamReader();
+ samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
+ Assert.assertEquals(bamReader.getFileHeader(), samReader.getFileHeader());
+ final CloseableIterator<SAMRecord> it = samReader.iterator();
+ final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
+ while (it.hasNext()) {
+ Assert.assertTrue(bamIt.hasNext());
+ final SAMRecord samRecord = it.next();
+ final SAMRecord bamRecord = bamIt.next();
+ // SAMRecords don't have this set, so stuff it in there
+ samRecord.setIndexingBin(bamRecord.getIndexingBin());
+ // Force reference index attributes to be populated
+ samRecord.getReferenceIndex();
+ bamRecord.getReferenceIndex();
+ samRecord.getMateReferenceIndex();
+ bamRecord.getMateReferenceIndex();
+ Assert.assertEquals(bamRecord, samRecord);
+ Assert.assertFalse(bamIt.hasNext());
@DataProvider(name = "test1")
public Object[][] createTestData() {
return new Object[][]{
- {"coordinate sorted", getSAMReader(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.coordinate, false},
- {"query sorted", getSAMReader(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.queryname, false},
- {"unsorted", getSAMReader(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.unsorted, false},
- {"coordinate presorted", getSAMReader(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.coordinate, true},
- {"query presorted", getSAMReader(true, SAMFileHeader.SortOrder.queryname), SAMFileHeader.SortOrder.queryname, true},
+ {"coordinate sorted", getRecordSetBuilder(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.coordinate, false},
+ {"query sorted", getRecordSetBuilder(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.queryname, false},
+ {"unsorted", getRecordSetBuilder(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.unsorted, false},
+ {"coordinate presorted", getRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.coordinate, true},
+ {"query presorted", getRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname), SAMFileHeader.SortOrder.queryname, true},
@@ -114,10 +118,73 @@ public class BAMFileWriterTest {
testHelper(samRecordSetBuilder, order, presorted);
+ @Test(dataProvider = "test1")
+ public void testNullRecordHeaders(final String testName, final SAMRecordSetBuilder samRecordSetBuilder, final SAMFileHeader.SortOrder order, final boolean presorted) throws Exception {
+ // test that BAMFileWriter can write records that have a null header
+ final SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
+ for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ rec.setHeader(null);
+ }
+ // make sure the records can actually be written out
+ final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
+ bamFile.deleteOnExit();
+ samHeader.setSortOrder(order);
+ final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, presorted, bamFile);
+ for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ bamWriter.addAlignment(rec);
+ }
+ bamWriter.close();
+ if (presorted) {
+ verifyBAMFile(samRecordSetBuilder, bamFile);
+ }
+ }
+ @Test
+ public void testNullRecordsMismatchedHeader() throws Exception {
+ final SAMRecordSetBuilder samRecordSetBuilder = getRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
+ for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ rec.setHeader(null);
+ }
+ // create a fake header to make sure the records can still be written using an invalid
+ // sequence dictionary and unresolvable references
+ final SAMFileHeader fakeHeader = new SAMFileHeader();
+ fakeHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
+ final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
+ bamFile.deleteOnExit();
+ final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fakeHeader, false, bamFile);
+ for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ bamWriter.addAlignment(rec);
+ }
+ bamWriter.close();
+ final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
+ final SamReader samReader = samRecordSetBuilder.getSamReader();
+ samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
+ final CloseableIterator<SAMRecord> it = samReader.iterator();
+ final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
+ while (it.hasNext()) {
+ Assert.assertTrue(bamIt.hasNext());
+ final SAMRecord samRecord = it.next();
+ final SAMRecord bamRecord = bamIt.next();
+ // test only reference names since we'll have lost reference indices due to the fake null header
+ Assert.assertEquals(bamRecord.getReferenceName(), samRecord.getReferenceName());
+ Assert.assertEquals(bamRecord.getAlignmentStart(), samRecord.getAlignmentStart());
+ }
+ Assert.assertFalse(bamIt.hasNext());
+ CloserUtil.close(samReader);
+ }
@Test(expectedExceptions = IllegalArgumentException.class)
public void testNegativePresorted() throws Exception {
- testHelper(getSAMReader(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.queryname, true);
+ testHelper(getRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.queryname, true);
Assert.fail("Exception should be thrown");
diff --git a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
index bc6e752..ae23787 100644
--- a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
@@ -2,6 +2,7 @@ package htsjdk.samtools;
import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@@ -105,7 +106,7 @@ public class CRAMComplianceTest {
- CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, source, ValidationStringency.SILENT);
+ CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream)null, source, ValidationStringency.SILENT);
SAMRecordIterator cramFileReaderIterator = cramFileReader.getIterator();
for (SAMRecord samRecord : samRecords) {
@@ -116,7 +117,7 @@ public class CRAMComplianceTest {
if (t.cramFile_21.exists()) {
- cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), null, source, ValidationStringency.SILENT);
+ cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), (SeekableStream)null, source, ValidationStringency.SILENT);
cramFileReaderIterator = cramFileReader.getIterator();
for (SAMRecord samRecord : samRecords) {
@@ -128,7 +129,7 @@ public class CRAMComplianceTest {
if (t.cramFile_30.exists()) {
- cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), null, source, ValidationStringency.SILENT);
+ cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), (SeekableStream)null, source, ValidationStringency.SILENT);
cramFileReaderIterator = cramFileReader.getIterator();
for (SAMRecord samRecord : samRecords) {
diff --git a/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java b/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
index 1098ec4..4d3b0a7 100644
--- a/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
@@ -1,7 +1,9 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
+import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@@ -9,6 +11,8 @@ import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
@@ -33,6 +37,20 @@ public class CRAMEdgeCasesTest {
testRecords(records, records.iterator().next().getReadBases());
+ // int test for CRAMException
+ // testing for a contig found in the reads but not in the reference
+ @Test(expectedExceptions = CRAMException.class)
+ public void testContigNotFoundInRef() throws IOException {
+ boolean sawException = false;
+ final File CRAMFile = new File("testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram");
+ final File refFile = new File("testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa");
+ final ReferenceSource refSource = new ReferenceSource(refFile);
+ final CRAMIterator iterator = new CRAMIterator(new FileInputStream(CRAMFile), refSource, ValidationStringency.STRICT);
+ while (iterator.hasNext()) {
+ iterator.next();
+ }
+ }
public void testBizilionTags() throws IOException {
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
@@ -42,7 +60,9 @@ public class CRAMEdgeCasesTest {
char b1 = (char) ('A' + i / 26);
char b2 = (char) ('A' + i % 26);
String tag = new String(new char[]{b1, b2});
- if ("RG".equals(tag)) continue;
+ if ("RG".equals(tag)) {
+ continue;
+ }
record.setAttribute(tag, i);
@@ -73,7 +93,7 @@ public class CRAMEdgeCasesTest {
- CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, source, ValidationStringency.SILENT);
+ CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT);
final SAMRecordIterator iterator = cramFileReader.getIterator();
@@ -101,7 +121,7 @@ public class CRAMEdgeCasesTest {
- CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, source, ValidationStringency.SILENT);
+ CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT);
final SAMRecordIterator iterator = cramFileReader.getIterator();
SAMRecord s2 = iterator.next();
@@ -127,8 +147,11 @@ public class CRAMEdgeCasesTest {
- if (bases == SAMRecord.NULL_SEQUENCE) s.setCigarString("10M");
- else s.setCigarString(s.getReadLength() + "M");
+ if (bases == SAMRecord.NULL_SEQUENCE) {
+ s.setCigarString("10M");
+ } else {
+ s.setCigarString(s.getReadLength() + "M");
+ }
testSingleRecord(s, ref);
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
index dd50ec3..b1e1f2d 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
@@ -5,6 +5,7 @@ import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.reference.FakeReferenceSequenceFile;
import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
+import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CoordMath;
@@ -43,6 +44,52 @@ public class CRAMFileIndexTest {
+ public void testConstructors () throws IOException {
+ CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source, ValidationStringency.SILENT);
+ CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
+ Assert.assertTrue(iterator.hasNext());
+ SAMRecord record = iterator.next();
+ Assert.assertEquals(record.getReferenceName(), "chrM");
+ Assert.assertTrue(record.getAlignmentStart() >= 1500);
+ reader.close();
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), indexFile, source, ValidationStringency.SILENT);
+ iterator = reader.queryAlignmentStart("chrM", 1500);
+ Assert.assertTrue(iterator.hasNext());
+ record = iterator.next();
+ Assert.assertEquals(record.getReferenceName(), "chrM");
+ Assert.assertTrue(record.getAlignmentStart() >= 1500);
+ reader.close();
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), new SeekableFileStream(indexFile), source, ValidationStringency.SILENT);
+ iterator = reader.queryAlignmentStart("chrM", 1500);
+ Assert.assertTrue(iterator.hasNext());
+ record = iterator.next();
+ Assert.assertEquals(record.getReferenceName(), "chrM");
+ Assert.assertTrue(record.getAlignmentStart() >= 1500);
+ reader.close();
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), (File)null, source, ValidationStringency.SILENT);
+ try {
+ reader.queryAlignmentStart("chrM", 1500);
+ Assert.fail("Expecting query to fail when there is no index");
+ } catch (SAMException e) {
+ }
+ reader.close();
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), (SeekableFileStream)null, source, ValidationStringency.SILENT);
+ try {
+ reader.queryAlignmentStart("chrM", 1500);
+ Assert.fail("Expecting query to fail when there is no index");
+ } catch (SAMException e) {
+ }
+ reader.close();
+ }
+ @Test
public void test_chrM_1500_location() throws IOException {
CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source);
@@ -85,6 +132,25 @@ public class CRAMFileIndexTest {
+ public void testNoStringencyConstructor() throws IOException {
+ final File CRAMFile = new File("testdata/htsjdk/samtools/cram/auxf#values.3.0.cram");
+ final File refFile = new File("testdata/htsjdk/samtools/cram/auxf.fa");
+ ReferenceSource refSource = new ReferenceSource(refFile);
+ File indexFile = null;
+ long start = 0;
+ long end = CRAMFile.length();
+ long[] boundaries = new long[] {start << 16, (end - 1) << 16};
+ final CRAMIterator iterator = new CRAMIterator(new SeekableFileStream(CRAMFile), refSource, boundaries);
+ long count = 0;
+ while (iterator.hasNext()) {
+ count++;
+ iterator.next();
+ }
+ Assert.assertEquals(count, 2);
+ }
+ @Test
public void testIteratorFromFileSpan_WholeFile() throws IOException {
CRAMFileReader reader = new CRAMFileReader(new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(baiBytes), source, ValidationStringency.SILENT);
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
index 1e1838b..1203121 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
@@ -180,9 +180,8 @@ public class CRAMFileWriterWithIndexTest {
Collections.sort(list, new SAMRecordCoordinateComparator());
for (SAMRecord record : list)
- writer.writeAlignment(record);
+ writer.addAlignment(record);
- System.out.println();
diff --git a/src/tests/java/htsjdk/samtools/CigarTest.java b/src/tests/java/htsjdk/samtools/CigarTest.java
index 9e9858a..1d7d4c6 100644
--- a/src/tests/java/htsjdk/samtools/CigarTest.java
+++ b/src/tests/java/htsjdk/samtools/CigarTest.java
@@ -24,6 +24,7 @@
package htsjdk.samtools;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.List;
@@ -33,49 +34,61 @@ import java.util.List;
public class CigarTest {
- @Test
- public void testPositive() {
- Assert.assertNull(TextCigarCodec.decode("").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("2M1P4M1P2D1P6D").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("10M5N1I12M").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("10M1I5N1I12M").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("9M1D5N1I12M").isValid(null, -1));
+ @DataProvider(name = "positiveTestsData")
+ public Object[][] testPositive() {
+ return new Object[][]{
+ {""},
+ {"2M1P4M1P2D1P6D"},
+ {"10M5N1I12M"},
+ {"10M1I5N1I12M"},
+ {"9M1D5N1I12M"},
- // I followed by D and vice versa is now allowed.
- Assert.assertNull(TextCigarCodec.decode("1M1I1D1M").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("1M1D1I1M").isValid(null, -1));
+ // I followed by D and vice versa is now allowed.
+ {"1M1I1D1M"},
+ {"1M1D1I1M"},
- // Soft-clip inside of hard-clip now allowed.
- Assert.assertNull(TextCigarCodec.decode("29M1S15H").isValid(null, -1));
+ // Soft-clip inside of hard-clip now allowed.
+ {"29M1S15H"},
+ };
- @Test
- public void testNegative() {
- // Cannot have two consecutive insertions
- List<SAMValidationError> errors = TextCigarCodec.decode("1M1I1I1M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR);
+ @Test(dataProvider = "positiveTestsData")
+ public void testPositive(final String cigar) {
+ Assert.assertNull(TextCigarCodec.decode(cigar).isValid(null, -1));
+ }
- // Cannot have two consecutive deletions
- errors = TextCigarCodec.decode("1M1D1D1M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR);
+ @DataProvider(name = "negativeTestsData")
+ public Object[][] negativeTestsData() {
- // Soft clip must be at end of read or inside of hard clip
- errors = TextCigarCodec.decode("1M1D1S1M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ return new Object[][]{
+ // Cannot have two consecutive insertions (of the same type)
+ {"1M1D1D1M", SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR},
+ {"1M1I1I1M", SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR},
- // Soft clip must be at end of read or inside of hard clip
- errors = TextCigarCodec.decode("1M1D1S1M1H").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ // Soft clip must be at end of read or inside of hard clip
+ {"1M1D1S1M", SAMValidationError.Type.INVALID_CIGAR},
+ {"1M1D1S1M1H", SAMValidationError.Type.INVALID_CIGAR},
+ {"1M1D1S1S", SAMValidationError.Type.INVALID_CIGAR},
+ {"1M1D1S1S1H", SAMValidationError.Type.INVALID_CIGAR},
+ {"1H1S1S1M1D", SAMValidationError.Type.INVALID_CIGAR},
+ {"1S1S1M1D", SAMValidationError.Type.INVALID_CIGAR},
+ // Soft clip must be at end of read or inside of hard clip, but there must be something left
+ {"1S1S", SAMValidationError.Type.INVALID_CIGAR},
+ {"1H1S", SAMValidationError.Type.INVALID_CIGAR},
+ {"1S1H", SAMValidationError.Type.INVALID_CIGAR},
+ {"1H1H", SAMValidationError.Type.INVALID_CIGAR},
+ };
- // Zero length for an element not allowed.
- errors = TextCigarCodec.decode("100M0D10M1D10M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ // Zero length for an element not allowed. TODO: not sure why this is commented out
+ {"100M0D10M1D10M", SAMValidationError.Type.INVALID_CIGAR}
+ @Test(dataProvider = "negativeTestsData")
+ public void testNegative(final String cigar, final SAMValidationError.Type type) {
+ final List<SAMValidationError> errors = TextCigarCodec.decode(cigar).isValid(null, -1);
+ Assert.assertEquals(errors.size(), 1, String.format("Got %d error, expected exactly one error.", errors.size()));
+ Assert.assertEquals(errors.get(0).getType(), type);
+ }
diff --git a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java b/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
index 0a42052..89e9a68 100644
--- a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
@@ -33,6 +33,8 @@ import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -46,10 +48,20 @@ public class CramFileWriterTest {
@Test(description = "Test for lossy CRAM compression invariants.")
- public void lossyCramInvariantsTest() throws Exception {
+ public void lossyCramInvariantsTest() {
+ @Test(description = "Tests a writing records with null SAMFileHeaders")
+ public void writeRecordsWithNullHeader() throws Exception {
+ final List<SAMRecord> samRecs = createRecords(50);
+ for (SAMRecord rec : samRecs) {
+ rec.setHeader(null);
+ }
+ doTest(samRecs);
+ }
@Test(description = "Tests a unmapped record with sequence and quality fields")
public void unmappedWithSequenceAndQualityField() throws Exception {
@@ -79,17 +91,18 @@ public class CramFileWriterTest {
- private List<SAMRecord> createRecords(int count) throws Exception {
+ private List<SAMRecord> createRecords(int count) {
List<SAMRecord> list = new ArrayList<SAMRecord>(count);
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
if (builder.getHeader().getReadGroups().isEmpty()) {
- throw new Exception("Read group expected in the header");
+ throw new IllegalStateException("Read group expected in the header");
int posInRef = 1;
- for (int i = 0; i < count / 2; i++)
+ for (int i = 0; i < count / 2; i++) {
builder.addPair(Integer.toString(i), 0, posInRef += 1,
posInRef += 3);
+ }
Collections.sort(list, new SAMRecordCoordinateComparator());
@@ -97,35 +110,38 @@ public class CramFileWriterTest {
return list;
- private void doTest(final List<SAMRecord> samRecords) {
+ private SAMFileHeader createSAMHeader(SAMFileHeader.SortOrder sortOrder) {
final SAMFileHeader header = new SAMFileHeader();
- header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ header.setSortOrder(sortOrder);
header.addSequence(new SAMSequenceRecord("chr1", 123));
SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("1");
+ return header;
+ }
+ private ReferenceSource createReferenceSource() {
byte[] refBases = new byte[1024 * 1024];
Arrays.fill(refBases, (byte) 'A');
InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
rsf.add("chr1", refBases);
- ReferenceSource source = new ReferenceSource(rsf);
+ return new ReferenceSource(rsf);
+ }
- ByteArrayOutputStream os = new ByteArrayOutputStream();
- CRAMFileWriter writer = new CRAMFileWriter(os, source, header, null);
+ private void writeRecordsToCRAM(CRAMFileWriter writer, List<SAMRecord> samRecords) {
for (SAMRecord record : samRecords) {
- writer.writeAlignment(record);
+ writer.addAlignment(record);
- writer.finish();
+ }
+ private void validateRecords(final List<SAMRecord> expectedRecords, ByteArrayInputStream is, ReferenceSource referenceSource) {
+ CRAMFileReader cReader = new CRAMFileReader(null, is, referenceSource);
- CRAMFileReader cReader = new CRAMFileReader(null,
- new ByteArrayInputStream(os.toByteArray()),
- new ReferenceSource(rsf));
SAMRecordIterator iterator2 = cReader.getIterator();
int index = 0;
while (iterator2.hasNext()) {
- SAMRecord actualRecord= iterator2.next();
- SAMRecord expectedRecord = samRecords.get(index++);
+ SAMRecord actualRecord = iterator2.next();
+ SAMRecord expectedRecord = expectedRecords.get(index++);
Assert.assertEquals(actualRecord.getReadName(), expectedRecord.getReadName());
Assert.assertEquals(actualRecord.getFlags(), expectedRecord.getFlags());
@@ -141,4 +157,78 @@ public class CramFileWriterTest {
+ private void doTest(final List<SAMRecord> samRecords) {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+ final ByteArrayOutputStream os = new ByteArrayOutputStream();
+ CRAMFileWriter writer = new CRAMFileWriter(os, refSource, header, null);
+ writeRecordsToCRAM(writer, samRecords);
+ validateRecords(samRecords, new ByteArrayInputStream(os.toByteArray()), refSource);
+ }
+ @Test(description = "Test CRAMWriter constructor with index stream")
+ public void testCRAMWriterWithIndex() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
+ final List<SAMRecord> samRecords = createRecords(100);
+ CRAMFileWriter writer = new CRAMFileWriter(outStream, indexStream, refSource, header, null);
+ writeRecordsToCRAM(writer, samRecords);
+ validateRecords(samRecords, new ByteArrayInputStream(outStream.toByteArray()), refSource);
+ Assert.assertTrue(indexStream.size() != 0);
+ }
+ @Test(description = "Test CRAMWriter constructor with presorted==false")
+ public void testCRAMWriterNotPresorted() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
+ CRAMFileWriter writer = new CRAMFileWriter(outStream, indexStream, false, refSource, header, null);
+ // force records to not be coordinate sorted to ensure we're relying on presorted=false
+ final List<SAMRecord> samRecords = createRecords(100);
+ Collections.sort(samRecords, new SAMRecordCoordinateComparator().reversed());
+ writeRecordsToCRAM(writer, samRecords);
+ // for validation, restore the sort order of the expected records so they match the order of the written records
+ Collections.sort(samRecords, new SAMRecordCoordinateComparator());
+ validateRecords(samRecords, new ByteArrayInputStream(outStream.toByteArray()), refSource);
+ Assert.assertTrue(indexStream.size() != 0);
+ }
+ @Test
+ public void test_roundtrip_tlen_preserved() throws IOException {
+ SamReader reader = SamReaderFactory.make().open(new File("testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam"));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final ReferenceSource source = new ReferenceSource(new File("testdata/htsjdk/samtools/cram_tlen.fasta"));
+ CRAMFileWriter writer = new CRAMFileWriter(baos, source, reader.getFileHeader(), "test.cram");
+ SAMRecordIterator iterator = reader.iterator();
+ List<SAMRecord> records = new ArrayList<SAMRecord>();
+ while (iterator.hasNext()) {
+ final SAMRecord record = iterator.next();
+ writer.addAlignment(record);
+ records.add(record);
+ }
+ writer.close();
+ CRAMFileReader cramReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (File) null, source, ValidationStringency.STRICT);
+ iterator = cramReader.getIterator();
+ int i = 0;
+ while (iterator.hasNext()) {
+ SAMRecord record1 = iterator.next();
+ SAMRecord record2 = records.get(i++);
+ Assert.assertEquals(record1.getInferredInsertSize(), record2.getInferredInsertSize(), record1.getReadName());
+ }
+ Assert.assertEquals(records.size(), i);
+ }
diff --git a/src/tests/java/htsjdk/samtools/DownsamplingIteratorTests.java b/src/tests/java/htsjdk/samtools/DownsamplingIteratorTests.java
new file mode 100644
index 0000000..d492f11
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/DownsamplingIteratorTests.java
@@ -0,0 +1,82 @@
+package htsjdk.samtools;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Random;
+import htsjdk.samtools.DownsamplingIteratorFactory.Strategy;
+ * Tests for the downsampling iterator class.
+ * @author Tim Fennell
+ */
+public class DownsamplingIteratorTests {
+ final int NUM_TEMPLATES = 50000;
+ final EnumMap<Strategy, Double> ACCURACY = new EnumMap<Strategy,Double>(Strategy.class){{
+ put(Strategy.HighAccuracy, 0.001);
+ put(Strategy.Chained, 0.005);
+ put(Strategy.ConstantMemory, 0.01);
+ }};
+ @Test
+ public void testBasicFunction() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ final Random r = new Random();
+ for (int i=0; i<NUM_TEMPLATES; ++i) {
+ builder.addPair("pair" + r.nextInt(), r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000));
+ }
+ final Collection<SAMRecord> recs = builder.getRecords();
+ runTests("testBasicFunction", recs);
+ }
+ private void runTests(final String name, final Collection<SAMRecord> recs) {
+ for (final DownsamplingIteratorFactory.Strategy strategy : DownsamplingIteratorFactory.Strategy.values()) {
+ final double accuracy = ACCURACY.get(strategy);
+ for (final double p : new double[]{0, 0.01, 0.1, 0.5, 0.9, 1}) {
+ final DownsamplingIterator iterator = DownsamplingIteratorFactory.make(recs.iterator(), strategy, p, accuracy, 42);
+ final List<SAMRecord> out = new ArrayList<SAMRecord>();
+ while (iterator.hasNext()) out.add(iterator.next());
+ final String testcase = name + ": strategy=" + strategy.name() + ", p=" + p + ", accuracy=" + accuracy;
+ final double readFraction = iterator.getAcceptedFraction();
+ Assert.assertEquals(out.size(), iterator.getAcceptedCount(), "Mismatched sizes with " + testcase);
+ Assert.assertTrue(readFraction > p - accuracy && readFraction < p + accuracy, "Read fraction " + readFraction + " out of bounds in " + testcase);
+ }
+ }
+ }
+ @Test
+ public void testMixOfPairsAndFrags() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ final Random r = new Random();
+ for (int i=0; i<NUM_TEMPLATES; ++i) {
+ builder.addFrag("frag" + r.nextInt(), r.nextInt(24), r.nextInt(1000000), false);
+ builder.addPair("pair" + r.nextInt(), r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000));
+ }
+ final Collection<SAMRecord> recs = builder.getRecords();
+ runTests("testMixOfPairsAndFrags", recs);
+ }
+ @Test
+ public void testSecondaryAlignments() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ final Random r = new Random();
+ for (int i=0; i<NUM_TEMPLATES; ++i) {
+ final int x = r.nextInt();
+ builder.addPair("pair" + x, r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000));
+ builder.addPair("pair" + x, r.nextInt(24), r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000), false, false, "50M", "50M", false, true, true, true, 20);
+ }
+ final Collection<SAMRecord> recs = builder.getRecords();
+ runTests("testSecondaryAlignments", recs);
+ }
diff --git a/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java b/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java
index c2a87a4..5952953 100644
--- a/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java
+++ b/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java
@@ -1,5 +1,6 @@
package htsjdk.samtools;
+import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.HashMap;
@@ -56,7 +57,7 @@ public class DuplicateSetIteratorTest {
//we expect 15 duplicate sets one for the initial two reads and one for each of the additional 14 reads.
- assert (allSets.size() == 15);
- assert (allSets.get("READ0").size() == 2);
+ Assert.assertEquals(allSets.size(), 15, "Wrong number of duplicate sets.");
+ Assert.assertEquals(allSets.get("READ0").size(), 2, "Should be two reads in the READ0 duplicate set, but there are not.");
diff --git a/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java b/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java
index 6c17d91..885321b 100644
--- a/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java
+++ b/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java
@@ -268,4 +268,45 @@ public class MergingSamRecordIteratorTest {
+ @Test
+ public void testReferenceIndexMapping() throws Exception {
+ // Create two SamReaders with sequence dictionaries such that a merging iterator with merged
+ // headers will require remapping a record's reference index to the merged dictionary
+ final SAMRecordSetBuilder builder1 = new SAMRecordSetBuilder();
+ SAMSequenceRecord fakeSequenceRec = new SAMSequenceRecord("FAKE_CONTIG_A", 0);
+ builder1.getHeader().addSequence(fakeSequenceRec);
+ final SAMRecordSetBuilder builder2 = new SAMRecordSetBuilder();
+ fakeSequenceRec = new SAMSequenceRecord("FAKE_CONTIG_B", 0);
+ builder2.getHeader().addSequence(fakeSequenceRec);
+ // create a record with a reference index that will need to be remapped after merging
+ SAMRecord recRequiresMapping = new SAMRecord(builder2.getHeader());
+ recRequiresMapping.setReadName("fakeread");
+ recRequiresMapping.setReferenceName("FAKE_CONTIG_B");
+ builder2.addRecord(recRequiresMapping);
+ // cache the original reference index
+ int originalRefIndex = recRequiresMapping.getReferenceIndex();
+ Assert.assertTrue(25 == originalRefIndex);
+ // get a merging iterator with a merged header
+ final SamReader samReader1 = builder1.getSamReader();
+ final SamReader samReader2 = builder2.getSamReader();
+ final List<SamReader> readerList = new ArrayList<SamReader>();
+ readerList.add(samReader1);
+ readerList.add(samReader2);
+ final List<SAMFileHeader> headerList = new ArrayList<SAMFileHeader>();
+ headerList.add(samReader1.getFileHeader());
+ headerList.add(samReader2.getFileHeader());
+ final SamFileHeaderMerger samFileHeaderMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headerList, true);
+ final MergingSamRecordIterator iterator = new MergingSamRecordIterator(samFileHeaderMerger, readerList, false);
+ Assert.assertTrue(iterator.hasNext());
+ final SAMRecord rec = iterator.next();
+ Assert.assertTrue(26 == rec.getReferenceIndex());
+ samReader1.close();
+ samReader2.close();
+ }
diff --git a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
index 7ad9143..c8378a6 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
@@ -23,6 +23,8 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
@@ -56,25 +58,36 @@ public class SAMFileReaderTest {
// tests for CRAM indexing
@Test(dataProvider = "SmallCRAMTest")
- public void CRAMIndexTest(final String inputFile) {
+ public void CRAMIndexTest(final String inputFile, final String referenceFile, QueryInterval queryInterval, String expectedReadName) {
final File input = new File(TEST_DATA_DIR, inputFile);
- final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ final File reference = new File(TEST_DATA_DIR, referenceFile);
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
+ final CloseableIterator<SAMRecord> iterator = reader.query(new QueryInterval[]{queryInterval}, false);
+ Assert.assertTrue(iterator.hasNext());
+ SAMRecord r1 = iterator.next();
+ Assert.assertEquals(r1.getReadName(), expectedReadName);
@DataProvider(name = "SmallCRAMTest")
public Object[][] CRAMIndexTestData() {
final Object[][] testFiles = new Object[][]{
- {"cram/test.cram"},
+ {"cram/test.cram", "cram/auxf.fa", new QueryInterval(0, 12, 13), "Jim"},
+ {"cram_with_bai_index.cram", "hg19mini.fasta", new QueryInterval(3, 700, 0), "k"},
+ {"cram_with_crai_index.cram", "hg19mini.fasta", new QueryInterval(2, 350, 0), "i"},
return testFiles;
@Test(dataProvider = "NoIndexCRAMTest")
- public void CRAMNoIndexTest(final String inputFile) {
+ public void CRAMNoIndexTest(final String inputFile, final String referenceFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
- final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ final File reference = new File(TEST_DATA_DIR, referenceFile);
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
@@ -82,7 +95,7 @@ public class SAMFileReaderTest {
@DataProvider(name = "NoIndexCRAMTest")
public Object[][] CRAMNoIndexTestData() {
final Object[][] testFiles = new Object[][]{
- {"cram/test2.cram"},
+ {"cram/test2.cram", "cram/auxf.fa"},
return testFiles;
@@ -120,4 +133,48 @@ public class SAMFileReaderTest {
else if (inputFile.endsWith(".bam")) Assert.assertEquals(factory.bamRecordsCreated, i);
+ @DataProvider(name = "cramNegativeTestCases")
+ public Object[][] cramTestNegativeCases() {
+ final Object[][] scenarios = new Object[][]{
+ {"cram_with_bai_index.cram",},
+ {"cram_with_crai_index.cram"},
+ };
+ return scenarios;
+ }
+ @Test(dataProvider = "cramNegativeTestCases", expectedExceptions=CRAMException.class)
+ public void testReferenceRequiredForCRAM(final String inputFile) {
+ final File input = new File(TEST_DATA_DIR, inputFile);
+ final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ for (final SAMRecord rec : reader) {
+ }
+ CloserUtil.close(reader);
+ }
+ @DataProvider(name = "cramPositiveTestCases")
+ public Object[][] cramTestPositiveCases() {
+ final Object[][] scenarios = new Object[][]{
+ {"cram_with_bai_index.cram", "hg19mini.fasta"},
+ {"cram_with_crai_index.cram", "hg19mini.fasta"},
+ };
+ return scenarios;
+ }
+ @Test(dataProvider = "cramPositiveTestCases")
+ public void testIterateCRAMWithIndex(final String inputFile, final String referenceFile) {
+ final File input = new File(TEST_DATA_DIR, inputFile);
+ final File reference = new File(TEST_DATA_DIR, referenceFile);
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
+ for (final SAMRecord rec : reader) {
+ }
+ CloserUtil.close(reader);
+ }
+ @Test
+ public void samRecordFactoryNullHeaderTest() {
+ final SAMRecordFactory factory = new DefaultSAMRecordFactory();
+ final SAMRecord samRec = factory.createSAMRecord(null);
+ Assert.assertTrue(samRec.getHeader() == null);
+ }
diff --git a/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java b/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
index ad940d8..7c6fa56 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
@@ -23,8 +23,11 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.build.CramIO;
+import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.ByteArrayOutputStream;
@@ -32,9 +35,9 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
+import java.io.IOException;
import java.io.OutputStream;
public class SAMFileWriterFactoryTest {
private static final File TEST_DATA_DIR = new File("testdata/htsjdk/samtools");
@@ -111,6 +114,35 @@ public class SAMFileWriterFactoryTest {
Assert.assertEquals(writtensam, originalsam);
+ @Test(description="Write SAM records with null SAMFileHeader")
+ public void samNullHeaderRoundTrip() throws Exception {
+ final File input = new File(TEST_DATA_DIR, "roundtrip.sam");
+ final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ final File outputFile = File.createTempFile("nullheader-out", ".sam");
+ outputFile.delete();
+ outputFile.deleteOnExit();
+ FileOutputStream os = new FileOutputStream(outputFile);
+ final SAMFileWriterFactory factory = new SAMFileWriterFactory();
+ final SAMFileWriter writer = factory.makeSAMWriter(reader.getFileHeader(), false, os);
+ for (SAMRecord rec : reader) {
+ rec.setHeader(null);
+ writer.addAlignment(rec);
+ }
+ writer.close();
+ os.close();
+ InputStream is = new FileInputStream(input);
+ String originalsam = IOUtil.readFully(is);
+ is.close();
+ is = new FileInputStream(outputFile);
+ String writtensam = IOUtil.readFully(is);
+ is.close();
+ Assert.assertEquals(writtensam, originalsam);
+ }
private void createSmallBam(final File outputFile) {
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
@@ -123,8 +155,8 @@ public class SAMFileWriterFactoryTest {
private void createSmallBamToOutputStream(final OutputStream outputStream,boolean binary) {
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
@@ -141,10 +173,123 @@ public class SAMFileWriterFactoryTest {
- private void fillSmallBam(SAMFileWriter writer) {
+ private int fillSmallBam(SAMFileWriter writer) {
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
- for (final SAMRecord rec: builder.getRecords()) writer.addAlignment(rec);
- }
+ int numRecs = builder.getRecords().size();
+ for (final SAMRecord rec: builder.getRecords()) {
+ writer.addAlignment(rec);
+ }
+ return numRecs;
+ }
+ private File prepareOutputFile(String extension) throws IOException {
+ final File outputFile = File.createTempFile("tmp.", extension);
+ outputFile.delete();
+ outputFile.deleteOnExit();
+ return outputFile;
+ }
+ // Create a writer factory that creates and index and md5 file and set the header to coord sorted
+ private SAMFileWriterFactory createWriterFactoryWithOptions(SAMFileHeader header) {
+ final SAMFileWriterFactory factory = new SAMFileWriterFactory();
+ factory.setCreateIndex(true);
+ factory.setCreateMd5File(true);
+ // index only created if coordinate sorted
+ header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ header.addSequence(new SAMSequenceRecord("chr1", 123));
+ header.addReadGroup(new SAMReadGroupRecord("1"));
+ return factory;
+ }
+ private void verifyWriterOutput(File outputFile, ReferenceSource refSource, int nRecs, boolean verifySupplementalFiles) {
+ if (verifySupplementalFiles) {
+ final File indexFile = SamFiles.findIndex(outputFile);
+ indexFile.deleteOnExit();
+ final File md5File = new File(outputFile.getParent(), outputFile.getName() + ".md5");
+ md5File.deleteOnExit();
+ Assert.assertTrue(indexFile.length() > 0);
+ Assert.assertTrue(md5File.length() > 0);
+ }
+ SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
+ if (refSource != null) {
+ factory.referenceSource(refSource);
+ }
+ SamReader reader = factory.open(outputFile);
+ SAMRecordIterator it = reader.iterator();
+ int count = 0;
+ for (; it.hasNext(); it.next()) {
+ count++;
+ }
+ Assert.assertTrue(count == nRecs);
+ }
+ @DataProvider(name="bamOrCramWriter")
+ public Object[][] bamOrCramWriter() {
+ return new Object[][] {
+ { BamFileIoUtils.BAM_FILE_EXTENSION, },
+ };
+ }
+ @Test(dataProvider="bamOrCramWriter")
+ public void testMakeWriter(String extension) throws Exception {
+ final File outputFile = prepareOutputFile(extension);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+ final SAMFileWriter samWriter = factory.makeWriter(header, false, outputFile, referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
+ }
+ @Test
+ public void testMakeCRAMWriterWithOptions() throws Exception {
+ final File outputFile = prepareOutputFile(CramIO.CRAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+ final SAMFileWriter samWriter = factory.makeCRAMWriter(header, false, outputFile, referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
+ }
+ @Test
+ public void testMakeCRAMWriterIgnoresOptions() throws Exception {
+ final File outputFile = prepareOutputFile(CramIO.CRAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+ // Note: does not honor factory settings for CREATE_MD5 or CREATE_INDEX.
+ final SAMFileWriter samWriter = factory.makeCRAMWriter(header, new FileOutputStream(outputFile), referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, false);
+ }
+ @Test
+ public void testMakeCRAMWriterPresortedDefault() throws Exception {
+ final File outputFile = prepareOutputFile(CramIO.CRAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+ // Defaults to preSorted==true
+ final SAMFileWriter samWriter = factory.makeCRAMWriter(header, outputFile, referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
+ }
diff --git a/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java b/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
index 4003435..bc5cc8e 100644
--- a/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
@@ -23,13 +23,21 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.CloserUtil;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
@@ -46,6 +54,10 @@ public class SAMIntegerTagTest {
private static final String UNSIGNED_INTEGER_TAG = "UI";
private static final String STRING_TAG = "ST";
+ private static final long TOO_LARGE_UNSIGNED_INT_VALUE = BinaryCodec.MAX_UINT + 1L;
public void testBAM() throws Exception {
final SAMRecord rec = writeAndReadSamRecord("bam");
@@ -68,22 +80,14 @@ public class SAMIntegerTagTest {
Assert.assertEquals(((Number) rec.getAttribute(INTEGER_TAG)).intValue(), 1);
- @Test(expectedExceptions = SAMException.class)
- public void testUnsignedIntegerBAM() throws Exception {
- SAMRecord rec = createSamRecord();
- final long val = 1l + Integer.MAX_VALUE;
- rec.setAttribute(UNSIGNED_INTEGER_TAG, val);
- Assert.fail("Exception should have been thrown.");
- }
- /**
- * Cannot store unsigned int in SAM text format.
- */
- @Test(expectedExceptions = SAMException.class)
+ @Test
public void testUnsignedIntegerSAM() throws Exception {
final SAMRecord rec = createSamRecord();
final long val = 1l + Integer.MAX_VALUE;
rec.setAttribute(UNSIGNED_INTEGER_TAG, val);
+ final Object roundTripValue = rec.getAttribute(UNSIGNED_INTEGER_TAG);
+ Assert.assertTrue(roundTripValue instanceof Long);
+ Assert.assertEquals(((Long)roundTripValue).longValue(), val);
@@ -174,29 +178,55 @@ public class SAMIntegerTagTest {
return builder.iterator().next();
- @Test(expectedExceptions = {SAMFormatException.class})
- public void testBadSamStrict() {
- final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, "variousAttributes.sam"));
+ private static SamInputResource createSamForIntAttr(long value) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream ps = new PrintStream(baos);
+ ps.println("@HD\tVN:1.0");
+ ps.print("1\t4\t*\t0\t0\t*\t*\t0\t0\tA\t<\tUI:i:");
+ ps.println(value);
+ ps.close();
+ return new SamInputResource(new InputStreamInputResource(new ByteArrayInputStream(baos.toByteArray())));
+ }
+ @Test
+ public void testGoodSamStrict() throws IOException {
+ final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.STRICT);
+ Assert.assertEquals(0, ((Number) factory.open(createSamForIntAttr(0)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(-1, ((Number) factory.open(createSamForIntAttr(-1)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(Integer.MIN_VALUE, ((Number) factory.open(createSamForIntAttr(Integer.MIN_VALUE)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(Integer.MAX_VALUE, ((Number) factory.open(createSamForIntAttr(Integer.MAX_VALUE)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(1L + (long) Integer.MAX_VALUE, ((Number) factory.open(createSamForIntAttr(1L + (long) Integer.MAX_VALUE)).iterator().next().getAttribute("UI")).longValue());
+ Assert.assertEquals(BinaryCodec.MAX_UINT, ((Number) factory.open(createSamForIntAttr(BinaryCodec.MAX_UINT)).iterator().next().getAttribute("UI")).longValue());
+ }
+ @Test(expectedExceptions = SAMException.class)
+ public void testBadSamStrict() throws IOException {
+ final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.STRICT).open(createSamForIntAttr(BinaryCodec.MAX_UINT + 1L));
- Assert.fail("Should not reach.");
- @Test(expectedExceptions = {RuntimeException.class})
- public void testBadBamStrict() {
- final SamReader reader = SamReaderFactory.makeDefault()
- .enable(SamReaderFactory.Option.EAGERLY_DECODE)
- .open(new File(TEST_DATA_DIR, "variousAttributes.bam"));
+ @Test
+ public void testBadSamSilent() throws IOException {
+ final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(createSamForIntAttr(BinaryCodec.MAX_UINT + 1L));
- Assert.fail("Should not reach.");
+ }
+ @DataProvider(name = "legalIntegerAttributesFiles")
+ public Object[][] getLegalIntegerAttributesFiles() {
+ return new Object[][] {
+ { new File(TEST_DATA_DIR, "variousAttributes.sam") },
+ { new File(TEST_DATA_DIR, "variousAttributes.bam") }
+ };
- @Test
- public void testBadBamLenient() {
+ @Test(dataProvider = "legalIntegerAttributesFiles")
+ public void testLegalIntegerAttributesFilesStrict( final File inputFile ) {
final SamReader reader = SamReaderFactory.makeDefault()
- .validationStringency(ValidationStringency.LENIENT)
- .open(new File(TEST_DATA_DIR, "variousAttributes.bam"));
+ .validationStringency(ValidationStringency.STRICT)
+ .open(inputFile);
final SAMRecord rec = reader.iterator().next();
final Map<String, Number> expectedTags = new HashMap<String, Number>();
@@ -209,8 +239,91 @@ public class SAMIntegerTagTest {
expectedTags.put("UI", 4294967295L);
for (final Map.Entry<String, Number> entry : expectedTags.entrySet()) {
final Object value = rec.getAttribute(entry.getKey());
- Assert.assertEquals(value, entry.getValue());
+ Assert.assertTrue(((Number) value).longValue() == entry.getValue().longValue());
+ @DataProvider(name = "valid_set")
+ public static Object[][] valid_set() {
+ List<Object[]> params = new ArrayList<Object[]>();
+ for (FORMAT format:FORMAT.values()) {
+ for (ValidationStringency stringency:ValidationStringency.values()) {
+ params.add(new Object[]{0, format, stringency});
+ params.add(new Object[]{1, format, stringency});
+ params.add(new Object[]{-1, format, stringency});
+ params.add(new Object[]{Integer.MIN_VALUE, format, stringency});
+ params.add(new Object[]{Integer.MAX_VALUE, format, stringency});
+ params.add(new Object[]{1L, format, stringency});
+ params.add(new Object[]{-1L, format, stringency});
+ params.add(new Object[]{(long)Integer.MAX_VALUE+1L, format, stringency});
+ params.add(new Object[]{BinaryCodec.MAX_UINT, format, stringency});
+ }
+ }
+ return params.toArray(new Object[3][params.size()]);
+ }
+ @DataProvider(name = "invalid_set")
+ public static Object[][] invalid_set() {
+ List<Object[]> params = new ArrayList<Object[]>();
+ for (FORMAT format:FORMAT.values()) {
+ for (ValidationStringency stringency:ValidationStringency.values()) {
+ params.add(new Object[]{(long)Integer.MIN_VALUE -1L, format, stringency});
+ params.add(new Object[]{TOO_LARGE_UNSIGNED_INT_VALUE, format, stringency});
+ }
+ }
+ return params.toArray(new Object[3][params.size()]);
+ }
+ @Test(dataProvider = "valid_set")
+ public void testValidIntegerAttributeRoundtrip(final long value, final FORMAT format, ValidationStringency validationStringency) throws IOException {
+ testRoundtripIntegerAttribute(value, format, validationStringency);
+ }
+ @Test(dataProvider = "invalid_set", expectedExceptions = RuntimeException.class)
+ public void testInvalidIntegerAttributeRoundtrip(final long value, final FORMAT format, ValidationStringency validationStringency) throws IOException {
+ testRoundtripIntegerAttribute(value, format, validationStringency);
+ }
+ private void testRoundtripIntegerAttribute(final Number value, final FORMAT format, ValidationStringency validationStringency) throws IOException {
+ final SAMFileHeader header = new SAMFileHeader();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final SAMFileWriter w;
+ switch (format) {
+ case SAM:
+ w = new SAMFileWriterFactory().makeSAMWriter(header, false, baos);
+ break;
+ case BAM:
+ w = new SAMFileWriterFactory().makeBAMWriter(header, false, baos);
+ break;
+ case CRAM:
+ w = new SAMFileWriterFactory().makeCRAMWriter(header, baos, null);
+ break;
+ default:
+ throw new RuntimeException("Unknown format: " + format);
+ }
+ final SAMRecord record = new SAMRecord(header);
+ record.setAttribute("UI", value);
+ record.setReadName("1");
+ record.setReadUnmappedFlag(true);
+ record.setReadBases("A".getBytes());
+ record.setBaseQualityString("!");
+ Assert.assertEquals(value, record.getAttribute("UI"));
+ w.addAlignment(record);
+ w.close();
+ final SamReader reader = SamReaderFactory.make().validationStringency(validationStringency).referenceSource(new ReferenceSource()).
+ open(SamInputResource.of(new ByteArrayInputStream(baos.toByteArray())));
+ final SAMRecordIterator iterator = reader.iterator();
+ Assert.assertTrue(iterator.hasNext());
+ final SAMRecord record2 = iterator.next();
+ final Number returnedValue = (Number) record2.getAttribute("UI");
+ Assert.assertEquals(value.longValue(), returnedValue.longValue());
+ }
diff --git a/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java b/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java
index f61bd3a..cb50925 100644
--- a/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java
@@ -27,6 +27,7 @@ import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -215,4 +216,18 @@ public class SAMRecordDuplicateComparatorTest {
assertEquals(Arrays.asList(-1,-1,-1), records, false);
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testNullHeaders() {
+ final SAMRecordSetBuilder records = getSAMRecordSetBuilder();
+ records.addPair("READ0", 1, 55, 55);
+ records.addPair("READ1", 2, 55, 55);
+ Collection<SAMRecord> recs = records.getRecords();
+ for (SAMRecord rec : recs) {
+ rec.setHeader(null);
+ }
+ assertEquals(Arrays.asList(-1, -1, -1), records, false);
+ }
diff --git a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
index 557c496..a3c3e68 100644
--- a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
@@ -24,12 +24,15 @@
package htsjdk.samtools;
+import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
+import java.util.Arrays;
+import java.util.List;
public class SAMRecordUnitTest {
@@ -52,4 +55,796 @@ public class SAMRecordUnitTest {
Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
+ @DataProvider
+ public Object [][] offsetAtReferenceData() {
+ return new Object[][]{
+ {"3S9M", 7, 10, false},
+ {"3S9M", 0, 0, false},
+ {"3S9M", -1, 0, false},
+ {"3S9M", 13, 0, false},
+ {"4M1D6M", 4, 4, false},
+ {"4M1D6M", 4, 4, true},
+ {"4M1D6M", 5, 0, false},
+ {"4M1D6M", 5, 4, true},
+ {"4M1I6M", 5, 6, false},
+ {"4M1I6M", 11, 0, false},
+ };
+ }
+ @Test(dataProvider = "offsetAtReferenceData")
+ public void testOffsetAtReference(String cigar, int posInReference, int expectedPosInRead, boolean returnLastBaseIfDeleted) {
+ SAMRecord sam = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, cigar, null, 2);
+ Assert.assertEquals(SAMRecord.getReadPositionAtReferencePosition(sam, posInReference, returnLastBaseIfDeleted), expectedPosInRead);
+ }
+ @DataProvider
+ public Object [][] referenceAtReadData() {
+ return new Object[][]{
+ {"3S9M", 7, 10},
+ {"3S9M", 0, 0},
+ {"3S9M", 0, 13},
+ {"4M1D6M", 4, 4},
+ {"4M1D6M", 6, 5},
+ {"4M1I6M", 0, 5},
+ {"4M1I6M", 5, 6},
+ };
+ }
+ @Test(dataProvider = "referenceAtReadData")
+ public void testOffsetAtRead(String cigar, int expectedReferencePos, int posInRead) {
+ SAMRecord sam = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, cigar, null, 2);
+ Assert.assertEquals(sam.getReferencePositionAtReadPosition(posInRead), expectedReferencePos);
+ }
+ @DataProvider(name = "deepCopyTestData")
+ public Object [][] deepCopyTestData() {
+ return new Object[][]{
+ { new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "3S9M", null, 2) },
+ { new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "4M1I6M", null, 2) }
+ };
+ }
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyRef(final SAMRecord sam) {
+ testDeepCopy(sam);
+ }
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyMutate(final SAMRecord sam) {
+ final byte[] initialBaseQualityCopy = Arrays.copyOf(sam.getBaseQualities(), sam.getBaseQualities().length);
+ final int initialStart = sam.getAlignmentStart();
+ final SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(Arrays.equals(sam.getBaseQualities(), deepCopy.getBaseQualities()));
+ Assert.assertTrue(sam.getAlignmentStart() == deepCopy.getAlignmentStart());
+ // mutate copy and make sure original remains unchanged
+ final byte[] copyBaseQuals = deepCopy.getBaseQualities();
+ for (int i = 0; i < copyBaseQuals.length; i++) {
+ copyBaseQuals[i]++;
+ }
+ deepCopy.setBaseQualities(copyBaseQuals);
+ deepCopy.setAlignmentStart(initialStart + 1);
+ Assert.assertTrue(Arrays.equals(sam.getBaseQualities(), initialBaseQualityCopy));
+ Assert.assertTrue(sam.getAlignmentStart() == initialStart);
+ }
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepByteAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+ final byte bytes[] = { -2, -1, 0, 1, 2 };
+ sam.setAttribute("BY", bytes);
+ deepCopy = sam.deepCopy();
+ // validate reference inequality and content equality
+ final byte samBytes[] = sam.getByteArrayAttribute("BY");
+ final byte copyBytes[] = deepCopy.getByteArrayAttribute("BY");
+ Assert.assertFalse(copyBytes == samBytes);
+ Assert.assertTrue(Arrays.equals(copyBytes, samBytes));
+ // validate mutation independence
+ final byte testByte = -1;
+ Assert.assertTrue(samBytes[2] != testByte); // ensure initial test condition
+ Assert.assertTrue(copyBytes[2] != testByte); // ensure initial test condition
+ samBytes[2] = testByte; // mutate original
+ Assert.assertTrue(samBytes[2] == testByte);
+ Assert.assertTrue(copyBytes[2] != testByte);
+ sam.setAttribute("BY", samBytes);
+ Assert.assertTrue(sam.getByteArrayAttribute("BY")[2] != deepCopy.getByteArrayAttribute("BY")[2]);
+ // now unsigned...
+ sam.setUnsignedArrayAttribute("BY", bytes);
+ deepCopy = sam.deepCopy();
+ final byte samUBytes[] = sam.getUnsignedByteArrayAttribute("BY");
+ final byte copyUBytes[] = deepCopy.getUnsignedByteArrayAttribute("BY");
+ Assert.assertFalse(copyUBytes == bytes);
+ Assert.assertTrue(Arrays.equals(copyUBytes, samUBytes));
+ // validate mutation independence
+ final byte uByte = 1;
+ Assert.assertTrue(samUBytes[2] != uByte); // ensure initial test condition
+ Assert.assertTrue(samUBytes[2] != uByte); // ensure initial test condition
+ samUBytes[2] = uByte; // mutate original
+ Assert.assertTrue(samUBytes[2] == uByte);
+ Assert.assertTrue(copyUBytes[2] != uByte);
+ sam.setUnsignedArrayAttribute("BY", samBytes);
+ Assert.assertTrue(sam.getUnsignedByteArrayAttribute("BY")[2] != deepCopy.getUnsignedByteArrayAttribute("BY")[2]);
+ }
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepShortAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+ final short shorts[] = { -20, -10, 0, 10, 20 };
+ sam.setAttribute("SH", shorts);
+ deepCopy = sam.deepCopy();
+ // validate reference inequality, content equality
+ final short samShorts[] = sam.getSignedShortArrayAttribute("SH");
+ final short copyShorts[] = deepCopy.getSignedShortArrayAttribute("SH");
+ Assert.assertFalse(copyShorts == samShorts);
+ Assert.assertTrue(Arrays.equals(copyShorts, samShorts));
+ // validate mutation independence
+ final short testShort = -1;
+ Assert.assertTrue(samShorts[2] != testShort); // ensure initial test condition
+ Assert.assertTrue(samShorts[2] != testShort); // ensure initial test condition
+ samShorts[2] = testShort; // mutate original
+ Assert.assertTrue(samShorts[2] == testShort);
+ Assert.assertTrue(copyShorts[2] != testShort);
+ sam.setAttribute("SH", samShorts);
+ Assert.assertTrue(sam.getSignedShortArrayAttribute("SH")[2] != deepCopy.getSignedShortArrayAttribute("SH")[2]);
+ // now unsigned...
+ sam.setUnsignedArrayAttribute("SH", shorts);
+ deepCopy = sam.deepCopy();
+ final short samUShorts[] = sam.getUnsignedShortArrayAttribute("SH");
+ final short copyUShorts[] = deepCopy.getUnsignedShortArrayAttribute("SH");
+ Assert.assertFalse(copyUShorts == shorts);
+ Assert.assertTrue(Arrays.equals(copyUShorts, samUShorts));
+ // validate mutation independence
+ final byte uShort = 1;
+ Assert.assertTrue(samUShorts[2] != uShort); // ensure initial test condition
+ Assert.assertTrue(samUShorts[2] != uShort); // ensure initial test condition
+ samUShorts[2] = uShort; // mutate original
+ Assert.assertTrue(samUShorts[2] == uShort);
+ Assert.assertTrue(copyUShorts[2] != uShort);
+ sam.setUnsignedArrayAttribute("SH", samShorts);
+ Assert.assertTrue(sam.getUnsignedShortArrayAttribute("SH")[2] != deepCopy.getUnsignedShortArrayAttribute("SH")[2]);
+ }
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepIntAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+ final int ints[] = { -200, -100, 0, 100, 200 };
+ sam.setAttribute("IN", ints);
+ deepCopy = sam.deepCopy();
+ // validate reference inequality and content equality
+ final int samInts[] = sam.getSignedIntArrayAttribute("IN");
+ final int copyInts[] = deepCopy.getSignedIntArrayAttribute("IN");
+ Assert.assertFalse(copyInts == ints);
+ Assert.assertTrue(Arrays.equals(copyInts, samInts));
+ // validate mutation independence
+ final short testInt = -1;
+ Assert.assertTrue(samInts[2] != testInt); // ensure initial test condition
+ Assert.assertTrue(samInts[2] != testInt); // ensure initial test condition
+ samInts[2] = testInt; // mutate original
+ Assert.assertTrue(samInts[2] == testInt);
+ Assert.assertTrue(copyInts[2] != testInt);
+ sam.setAttribute("IN", samInts);
+ Assert.assertTrue(sam.getSignedIntArrayAttribute("IN")[2] != deepCopy.getSignedIntArrayAttribute("IN")[2]);
+ // now unsigned...
+ sam.setUnsignedArrayAttribute("IN", ints);
+ deepCopy = sam.deepCopy();
+ final int samUInts[] = sam.getUnsignedIntArrayAttribute("IN");
+ final int copyUInts[] = deepCopy.getUnsignedIntArrayAttribute("IN");
+ Assert.assertFalse(copyUInts == ints);
+ Assert.assertTrue(Arrays.equals(copyUInts, samUInts));
+ // validate mutation independence
+ byte uInt = 1;
+ Assert.assertTrue(samUInts[2] != uInt); // ensure initial test condition
+ Assert.assertTrue(samUInts[2] != uInt); // ensure initial test condition
+ samInts[2] = uInt; // mutate original
+ Assert.assertTrue(samUInts[2] == uInt);
+ Assert.assertTrue(copyUInts[2] != uInt);
+ sam.setUnsignedArrayAttribute("IN", samInts);
+ Assert.assertTrue(sam.getUnsignedIntArrayAttribute("IN")[2] != deepCopy.getUnsignedIntArrayAttribute("IN")[2]);
+ }
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepFloatAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+ final float floats[] = { -2.4f, -1.2f, 0, 2.3f, 4.6f };
+ sam.setAttribute("FL", floats);
+ deepCopy = sam.deepCopy();
+ // validate reference inequality and content equality
+ final float samFloats[] = sam.getFloatArrayAttribute("FL");
+ final float copyFloats[] = deepCopy.getFloatArrayAttribute("FL");
+ Assert.assertFalse(copyFloats == floats);
+ Assert.assertFalse(copyFloats == samFloats);
+ Assert.assertTrue(Arrays.equals(copyFloats, samFloats));
+ // validate mutation independence
+ final float testFloat = -1.0f;
+ Assert.assertTrue(samFloats[2] != testFloat); // ensure initial test condition
+ Assert.assertTrue(samFloats[2] != testFloat); // ensure initial test condition
+ samFloats[2] = testFloat; // mutate original
+ Assert.assertTrue(samFloats[2] == testFloat);
+ Assert.assertTrue(copyFloats[2] != testFloat);
+ sam.setAttribute("FL", samFloats);
+ Assert.assertTrue(sam.getFloatArrayAttribute("FL")[2] != deepCopy.getFloatArrayAttribute("FL")[2]);
+ }
+ private SAMRecord testDeepCopy(SAMRecord sam) {
+ final SAMRecord deepCopy = sam.deepCopy();
+ // force the indexing bins to be computed in order to satisfy equality test
+ sam.setIndexingBin(sam.computeIndexingBin());
+ deepCopy.setIndexingBin(deepCopy.computeIndexingBin());
+ Assert.assertTrue(sam.equals(deepCopy));
+ return deepCopy;
+ }
+ @Test
+ public void test_getUnsignedIntegerAttribute_valid() {
+ final String stringTag = "UI";
+ final short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag(stringTag);
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertNull(record.getUnsignedIntegerAttribute(binaryTag));
+ record.setAttribute("UI", 0L);
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(binaryTag));
+ record.setAttribute("UI", BinaryCodec.MAX_UINT);
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(binaryTag));
+ final SAMBinaryTagAndValue tv_zero = new SAMBinaryTagAndUnsignedArrayValue(binaryTag, 0L);
+ record = new SAMRecord(header){
+ {
+ setAttributes(tv_zero);
+ }
+ };
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(binaryTag));
+ final SAMBinaryTagAndValue tv_max = new SAMBinaryTagAndUnsignedArrayValue(binaryTag, BinaryCodec.MAX_UINT);
+ record = new SAMRecord(header){
+ {
+ setAttributes(tv_max);
+ }
+ };
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(binaryTag));
+ }
+ /**
+ * This is an alternative to test_getUnsignedIntegerAttribute_valid().
+ * The purpose is to ensure that the hacky way of setting arbitrary tag values works ok.
+ * This is required for testing invalid (out of range) unsigned integer value.
+ */
+ @Test
+ public void test_getUnsignedIntegerAttribute_valid_alternative() {
+ final short tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record;
+ record = new SAMRecord(header) {
+ {
+ setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, 0L));
+ }
+ };
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(tag));
+ record = new SAMRecord(header) {
+ {
+ setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT));
+ }
+ };
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(tag));
+ // the following works because we bypass value checks implemented in SAMRecord:
+ record = new SAMRecord(header) {
+ {
+ setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT+1L));
+ }
+ };
+ // check that the invalid value is still there:
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT+1L), (Long)record.getBinaryAttributes().value);
+ }
+ @Test(expectedExceptions = SAMException.class)
+ public void test_getUnsignedIntegerAttribute_negative() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ final SAMBinaryTagAndValue tv = new SAMBinaryTagAndUnsignedArrayValue(tag, -1L);
+ record = new SAMRecord(header) {
+ {
+ setAttributes(tv);
+ }
+ };
+ } catch (Exception e) {
+ Assert.fail("Unexpected exception", e);
+ }
+ record.getUnsignedIntegerAttribute(tag);
+ }
+ @Test(expectedExceptions = SAMException.class)
+ public void test_getUnsignedIntegerAttribute_tooLarge() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ final SAMBinaryTagAndValue tv = new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT + 1);
+ record = new SAMRecord(header) {
+ {
+ setAttributes(tv);
+ }
+ };
+ } catch (Exception e) {
+ Assert.fail("Unexpected exception", e);
+ }
+ record.getUnsignedIntegerAttribute(tag);
+ }
+ @Test
+ public void test_isAllowedAttributeDataType() {
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Byte((byte) 0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Short((short) 0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Integer(0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue("a string"));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Character('C')));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Float(0.1F)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new byte[]{0}));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new short[]{0}));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new int[]{0}));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new float[]{0.1F}));
+ // unsigned integers:
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(BinaryCodec.MAX_UINT)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(-1L)));
+ Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(BinaryCodec.MAX_UINT + 1L)));
+ Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(Integer.MIN_VALUE - 1L)));
+ }
+ @Test(expectedExceptions = SAMException.class)
+ public void test_setAttribute_unsigned_int_negative() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ } catch (SAMException e) {
+ Assert.fail("Unexpected exception", e);
+ }
+ record.setAttribute(tag, (long)Integer.MIN_VALUE-1L);
+ }
+ @Test(expectedExceptions = SAMException.class)
+ public void test_setAttribute_unsigned_int_tooLarge() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ } catch (SAMException e) {
+ Assert.fail("Unexpected exception", e);
+ }
+ record.setAttribute(tag, BinaryCodec.MAX_UINT + 1L);
+ }
+ @Test
+ public void test_setAttribute_null_removes_tag() {
+ final short tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ record.setAttribute(tag, BinaryCodec.MAX_UINT);
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(tag));
+ record.setAttribute(tag, null);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ }
+ private SAMRecord createTestRecordHelper() {
+ return new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "3S9M", null, 2);
+ }
+ @Test
+ public void testReferenceName() {
+ SAMRecord sam = createTestRecordHelper();
+ sam.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ // valid reference name
+ sam = createTestRecordHelper();
+ sam.setReferenceName("chr4");
+ Assert.assertTrue(sam.getReferenceName().equals("chr4"));
+ Assert.assertTrue(sam.getReferenceIndex().equals(3));
+ // invalid reference name sets name but leaves ref index invalid
+ sam = createTestRecordHelper();
+ sam.setReferenceName("unresolvableName");
+ Assert.assertTrue(sam.getReferenceName().equals("unresolvableName"));
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ }
+ @Test
+ public void testReferenceIndex() {
+ SAMRecord sam = createTestRecordHelper();
+ sam.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ // valid reference
+ sam = createTestRecordHelper();
+ sam.setReferenceIndex(3);
+ Assert.assertTrue(sam.getReferenceIndex().equals(3));
+ Assert.assertTrue(sam.getReferenceName().equals("chr4"));
+ }
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testInvalidReferenceIndex() {
+ // unresolvable reference
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setReferenceIndex(9999);
+ }
+ @Test
+ public void testMateReferenceName() {
+ SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ // valid reference
+ sam = createTestRecordHelper();
+ sam.setMateReferenceName("chr4");
+ Assert.assertTrue(sam.getMateReferenceName().equals("chr4"));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(3));
+ // unresolvable reference
+ sam = createTestRecordHelper();
+ sam.setMateReferenceName("unresolvableName");
+ Assert.assertTrue(sam.getMateReferenceName().equals("unresolvableName"));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ }
+ @Test
+ public void testMateReferenceIndex() {
+ SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ // valid reference
+ sam = createTestRecordHelper();
+ sam.setMateReferenceIndex(3);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(3));
+ Assert.assertTrue(sam.getMateReferenceName().equals("chr4"));
+ }
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testInvalidMateReferenceIndex() {
+ // unresolvable reference
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceIndex(9999);
+ }
+ @Test
+ public void testRecordValidation() {
+ final SAMRecord sam = createTestRecordHelper();
+ List<SAMValidationError> validationErrors = sam.isValid(false);
+ Assert.assertTrue(validationErrors == null);
+ }
+ @Test
+ public void testInvalidAlignmentStartValidation() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setAlignmentStart(0);
+ List<SAMValidationError> validationErrors = sam.isValid(false);
+ Assert.assertTrue(validationErrors != null && validationErrors.size() == 1);
+ }
+ // ----------------- NULL header tests ---------------------
+ @Test
+ public void testNullHeaderReferenceName() {
+ final SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ Assert.assertTrue(null != samHeader);
+ final String originalRefName = sam.getReferenceName();
+ // setting header to null retains the previously assigned ref name
+ sam.setHeader(null);
+ Assert.assertTrue(originalRefName.equals(sam.getReferenceName()));
+ // null header allows reference name to be set to NO_ALIGNMENT_REFERENCE_NAME
+ sam.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ // null header allows reference name to be reset to a valid namw
+ sam.setReferenceName(originalRefName);
+ Assert.assertTrue(sam.getReferenceName().equals(originalRefName));
+ }
+ @Test
+ public void testNullHeaderReferenceIndex() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ int originalRefIndex = sam.getReferenceIndex();
+ Assert.assertTrue(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX != originalRefIndex);
+ // setting header to null resets the reference index to null
+ sam.setHeader(null);
+ Assert.assertTrue(null == sam.mReferenceIndex);
+ // restoring the header to restores the reference index back to the original
+ sam.setHeader(samHeader);
+ Assert.assertTrue(sam.getReferenceIndex().equals(originalRefIndex));
+ // setting the header to null allows setting the reference index to NO_ALIGNMENT_REFERENCE_INDEX
+ sam.setHeader(null);
+ sam.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ // force the internal SAMRecord reference index value to (null) initial state
+ sam = new SAMRecord(null);
+ Assert.assertTrue(null == sam.mReferenceIndex);
+ Assert.assertTrue(sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ // an unresolvable reference name doesn't throw
+ final String unresolvableRefName = "unresolvable";
+ sam.setReferenceName(unresolvableRefName);
+ // now force the SAMRecord to try to resolve the unresolvable name
+ sam.setHeader(samHeader);
+ Assert.assertTrue(null == sam.mReferenceIndex);
+ Assert.assertTrue(sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ }
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderSetReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ // setReferenceIndex with null header throws
+ sam.setReferenceIndex(3);
+ }
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderGetReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ // getReferenceIndex with null header throws
+ sam.getReferenceIndex();
+ }
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderForceIndexResolutionFailure() {
+ // force the internal SAMRecord reference index value to null initial state
+ final SAMRecord sam = new SAMRecord(null);
+ sam.setReferenceName("unresolvable");
+ sam.getReferenceIndex();
+ }
+ @Test
+ public void testNullHeaderMateReferenceName() {
+ final SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ Assert.assertTrue(null != samHeader);
+ final String originalMateRefName = sam.getMateReferenceName();
+ // setting header to null retains the previously assigned mate ref name
+ sam.setHeader(null);
+ Assert.assertTrue(originalMateRefName.equals(sam.getMateReferenceName()));
+ // null header allows mate reference name to be set to NO_ALIGNMENT_REFERENCE_NAME
+ sam.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ // null header allows reference name to be reset to a valid namw
+ sam.setMateReferenceName(originalMateRefName);
+ Assert.assertTrue(sam.getMateReferenceName().equals(originalMateRefName));
+ }
+ @Test
+ public void testNullHeaderMateReferenceIndex() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ sam.setMateReferenceName("chr1");
+ int originalMateRefIndex = sam.getMateReferenceIndex();
+ Assert.assertTrue(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX != originalMateRefIndex);
+ // setting header to null resets the mate reference index to null
+ sam.setHeader(null);
+ Assert.assertTrue(null == sam.mMateReferenceIndex);
+ // restoring the header to restores the reference index back to the original
+ sam.setHeader(samHeader);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(originalMateRefIndex));
+ // setting the header to null allows setting the mate reference index to NO_ALIGNMENT_REFERENCE_INDEX
+ sam.setHeader(null);
+ sam.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ // force the internal SAMRecord mate reference index value to (null) initial state
+ sam = new SAMRecord(null);
+ Assert.assertTrue(null == sam.mMateReferenceIndex);
+ Assert.assertTrue(sam.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ // an unresolvable mate reference name doesn't throw
+ final String unresolvableRefName = "unresolvable";
+ sam.setMateReferenceName(unresolvableRefName);
+ // now force the SAMRecord to try to resolve the unresolvable mate reference name
+ sam.setHeader(samHeader);
+ Assert.assertTrue(null == sam.mMateReferenceIndex);
+ Assert.assertTrue(sam.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ }
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderSetMateReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ sam.setMateReferenceIndex(3);
+ }
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderGetMateReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceName("chr1");
+ sam.setHeader(null);
+ // getMateReferenceIndex with null header throws
+ sam.getMateReferenceIndex();
+ }
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderForceMateIndexResolutionFailure() {
+ // force the internal SAMRecord reference index value to null initial state
+ final SAMRecord sam = new SAMRecord(null);
+ sam.setMateReferenceName("unresolvable");
+ sam.getMateReferenceIndex();
+ }
+ @Test
+ public void testNullHeaderGetReadGroup() {
+ final SAMRecord sam = createTestRecordHelper();
+ Assert.assertTrue(null != sam.getHeader());
+ Assert.assertTrue(null != sam.getReadGroup() && sam.getReadGroup().getId().equals("1"));
+ sam.setHeader(null);
+ Assert.assertNull(sam.getReadGroup());
+ }
+ @Test(dataProvider = "serializationTestData")
+ public void testNullHeaderSerialization(final File inputFile) throws Exception {
+ final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
+ final SAMRecord initialSAMRecord = reader.iterator().next();
+ reader.close();
+ initialSAMRecord.setHeader(null);
+ final SAMRecord deserializedSAMRecord = TestUtil.serializeAndDeserialize(initialSAMRecord);
+ Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
+ }
+ @Test
+ public void testNullHeaderRecordValidation() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ List<SAMValidationError> validationErrors = sam.isValid(false);
+ Assert.assertTrue(validationErrors == null);
+ }
+ @Test
+ private SAMRecord testNullHeaderDeepCopy() {
+ SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ final SAMRecord deepCopy = sam.deepCopy();
+ // force the indexing bins to be computed in order to satisfy equality test
+ sam.setIndexingBin(sam.computeIndexingBin());
+ deepCopy.setIndexingBin(deepCopy.computeIndexingBin());
+ Assert.assertTrue(sam.equals(deepCopy));
+ return deepCopy;
+ }
+ private void testNullHeaderCigar(SAMRecord rec) {
+ Cigar origCigar = rec.getCigar();
+ Assert.assertNotNull(origCigar);
+ String originalCigarString = rec.getCigarString();
+ // set the cigar to null and then reset the cigar string in order to force getCigar to decode it
+ rec.setCigar(null);
+ Assert.assertNull(rec.getCigar());
+ rec.setCigarString(originalCigarString);
+ rec.setValidationStringency(ValidationStringency.STRICT);
+ rec.setHeader(null);
+ Assert.assertTrue(rec.getValidationStringency() == ValidationStringency.STRICT);
+ // force getCigar to decode the cigar string, validate that SAMRecord doesn't try to validate the cigar
+ Cigar cig = rec.getCigar();
+ Assert.assertNotNull(cig);
+ String cigString = TextCigarCodec.encode(cig);
+ Assert.assertEquals(cigString, originalCigarString);
+ }
+ @Test
+ private void testNullHeadGetCigarSAM() {
+ SAMRecord sam = createTestRecordHelper();
+ testNullHeaderCigar(sam);
+ }
+ @Test
+ private void testNullHeadGetCigarBAM() {
+ SAMRecord sam = createTestRecordHelper();
+ SAMRecordFactory factory = new DefaultSAMRecordFactory();
+ BAMRecord bamRec = factory.createBAMRecord(
+ sam.getHeader(),
+ sam.getReferenceIndex(),
+ sam.getAlignmentStart(),
+ (short) sam.getReadNameLength(),
+ (short) sam.getMappingQuality(),
+ 0,
+ sam.getCigarLength(),
+ sam.getFlags(),
+ sam.getReadLength(),
+ sam.getMateReferenceIndex(),
+ sam.getMateAlignmentStart(),
+ 0, null);
+ bamRec.setCigarString(sam.getCigarString());
+ testNullHeaderCigar(bamRec);
+ }
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java b/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java
index 28b0745..46ce5be 100644
--- a/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java
@@ -44,7 +44,19 @@ public class SAMTextWriterTest {
public void testBasic() throws Exception {
+ doTest(getSAMReader(true, SAMFileHeader.SortOrder.coordinate));
+ }
+ @Test
+ public void testNullHeader() throws Exception {
final SAMRecordSetBuilder recordSetBuilder = getSAMReader(true, SAMFileHeader.SortOrder.coordinate);
+ for (final SAMRecord rec : recordSetBuilder.getRecords()) {
+ rec.setHeader(null);
+ }
+ doTest(recordSetBuilder);
+ }
+ private void doTest(final SAMRecordSetBuilder recordSetBuilder) throws Exception{
SamReader inputSAM = recordSetBuilder.getSamReader();
final File samFile = File.createTempFile("tmp.", ".sam");
diff --git a/src/tests/java/htsjdk/samtools/SAMUtilsTest.java b/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
index 8c0096c..441d662 100644
--- a/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
@@ -26,6 +26,8 @@ package htsjdk.samtools;
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.util.Arrays;
public class SAMUtilsTest {
public void testCompareMapqs() {
@@ -42,4 +44,108 @@ public class SAMUtilsTest {
Assert.assertTrue(SAMUtils.compareMapqs(1, 255) > 0);
Assert.assertTrue(SAMUtils.compareMapqs(2, 1) > 0);
+ @Test
+ public void testSimpleClippingOfRecord() {
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("10M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(6); // should overlap 5M
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 5);
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("5M5S")));
+ }
+ @Test
+ public void testClippingOfRecordWithSoftClipBasesAtTheEnd() {
+ /**
+ * Tests that if we need to clip a read with soft-clipping at the end, it does the right thing.
+ */
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("5M5S"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(5); // should overlap 1M5S
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 1);
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S")));
+ }
+ @Test
+ public void testClippingOfRecordWithInsertion() {
+ /**
+ * Tests that if we need to clip a read with an insertion that overlaps
+ */
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("5M1I5M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(5); // should overlap the 1M1I5M
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAAA".getBytes());
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 7);
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M7S")));
+ }
+ // TODO: deletion
+ @Test
+ public void testClippingOfRecordWithDeletion() {
+ /**
+ * Tests that if we need to clip a read with an deletion that overlaps
+ */
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("5M1D5M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(5); // should overlap the 1M1D5M
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 6);
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S")));
+ }
diff --git a/src/tests/java/htsjdk/samtools/SamFilesTest.java b/src/tests/java/htsjdk/samtools/SamFilesTest.java
new file mode 100644
index 0000000..b37fc3a
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/SamFilesTest.java
@@ -0,0 +1,60 @@
+package htsjdk.samtools;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.io.File;
+import java.io.IOException;
+ * Test valid combinations of bam/cram vs bai/crai files.
+ * Created by vadim on 10/08/2015.
+ */
+public class SamFilesTest {
+ @DataProvider(name = "FindIndexParams")
+ public static Object[][] paramsFindIndexForSuffixes() {
+ return new Object[][]{
+ // no index available sanity checks:
+ {".tmp", null, null},
+ {".bam", null, null},
+ {".cram", null, null},
+ // legit cases for BAM files:
+ {".bam", ".bai", ".bai"},
+ {".bam", ".bam.bai", ".bam.bai"},
+ // legit cases for CRAM files:
+ {".cram", ".cram.bai", ".cram.bai"},
+ {".cram", ".cram.crai", ".cram.crai"},
+ // special prohibited cases:
+ {".bam", ".crai", null},
+ {".tmp", ".crai", null},
+ };
+ }
+ @Test(dataProvider = "FindIndexParams")
+ public void testFindIndexForSuffixes(final String dataFileSuffix, final String indexFileSuffix, final String expectIndexSuffix) throws IOException {
+ final File dataFile = File.createTempFile("test", dataFileSuffix);
+ dataFile.deleteOnExit();
+ Assert.assertNull(SamFiles.findIndex(dataFile));
+ File indexFile = null;
+ if (indexFileSuffix != null) {
+ indexFile = new File(dataFile.getAbsolutePath().replaceFirst("\\.\\S+$", indexFileSuffix));
+ indexFile.createNewFile();
+ indexFile.deleteOnExit();
+ }
+ final File foundIndexFile = SamFiles.findIndex(dataFile);
+ if (expectIndexSuffix == null) {
+ Assert.assertNull(foundIndexFile);
+ return;
+ }
+ Assert.assertNotNull(foundIndexFile);
+ Assert.assertTrue(foundIndexFile.getName().endsWith(expectIndexSuffix));
+ }
diff --git a/src/tests/java/htsjdk/samtools/SamIndexesTest.java b/src/tests/java/htsjdk/samtools/SamIndexesTest.java
new file mode 100644
index 0000000..7e0bc85
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/SamIndexesTest.java
@@ -0,0 +1,192 @@
+package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAIEntry;
+import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.seekablestream.SeekableFileStream;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.util.IOUtil;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.GZIPOutputStream;
+public class SamIndexesTest {
+ @Test
+ public void testEmptyBai() throws IOException {
+ final File baiFile = File.createTempFile("test", ".bai");
+ baiFile.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(baiFile);
+ fos.write(SamIndexes.BAI.magic);
+ fos.close();
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ baos.write(SamIndexes.BAI.magic);
+ baos.close();
+ final InputStream inputStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), null);
+ for (final byte b : SamIndexes.BAI.magic) {
+ Assert.assertEquals(inputStream.read(), 0xFF & b);
+ }
+ }
+ @Test(expectedExceptions = NullPointerException.class)
+ public void testCraiRequiresDictionary() throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ gos.close();
+ SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), null);
+ }
+ @Test
+ public void testEmptyCraiReadAsBai() throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ gos.close();
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ final InputStream inputStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), dictionary);
+ for (final byte b : SamIndexes.BAI.magic) {
+ Assert.assertEquals(inputStream.read(), 0xFF & b);
+ }
+ }
+ @Test
+ public void testCraiInMemory() throws IOException {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ final InputStream baiStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), dictionary);
+ Assert.assertNotNull(baiStream);
+ baos = new ByteArrayOutputStream();
+ IOUtil.copyStream(baiStream, baos);
+ final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
+ @Test
+ public void testCraiFromFile() throws IOException {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+ final File file = File.createTempFile("test", ".crai");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ final GZIPOutputStream gos = new GZIPOutputStream(fos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ final SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(new SeekableFileStream(file), dictionary);
+ Assert.assertNotNull(baiStream);
+ final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(baiStream, dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
+ @Test(expectedExceptions = NullPointerException.class)
+ public void testOpenIndexFileAsBaiOrNull_NPE() throws IOException {
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ Assert.assertNull(SamIndexes.openIndexFileAsBaiOrNull(null, dictionary));
+ }
+ @Test
+ public void testOpenIndexFileAsBaiOrNull_ReturnsNull() throws IOException {
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ File file = File.createTempFile("test", ".notbai");
+ file.deleteOnExit();
+ Assert.assertNull(SamIndexes.openIndexFileAsBaiOrNull(file, dictionary));
+ file.delete();
+ file = File.createTempFile("test", ".notcrai");
+ file.deleteOnExit();
+ Assert.assertNull(SamIndexes.openIndexFileAsBaiOrNull(file, dictionary));
+ file.delete();
+ }
+ @Test
+ public void testOpenIndexUrlAsBaiOrNull() throws IOException {
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+ final File file = File.createTempFile("test", ".crai");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ final GZIPOutputStream gos = new GZIPOutputStream(fos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+ final InputStream baiStream = SamIndexes.openIndexUrlAsBaiOrNull(file.toURI().toURL(), dictionary);
+ Assert.assertNotNull(baiStream);
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ IOUtil.copyStream(baiStream, baos);
+ final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
diff --git a/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java b/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java
index 85d3c53..fc3d37b 100644
--- a/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java
+++ b/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java
@@ -110,6 +110,25 @@ public class SamReaderFactoryTest {
else if (inputFile.endsWith(".bam")) Assert.assertEquals(recordFactory.bamRecordsCreated, i);
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void samRecordFactoryNullHeaderBAMTest() {
+ final SAMRecordFactory recordFactory = new DefaultSAMRecordFactory();
+ recordFactory.createBAMRecord(
+ null, // null header
+ 0,
+ 0,
+ (short) 0,
+ (short) 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ null);
+ }
* Unit tests for asserting all permutations of data and index sources read the same records and header.
@@ -132,8 +151,14 @@ public class SamReaderFactoryTest {
public Object[][] composeAllPermutationsOfSamInputResource() {
final List<SamInputResource> sources = new ArrayList<SamInputResource>();
for (final InputResource.Type dataType : InputResource.Type.values()) {
+ if (dataType.equals(InputResource.Type.SRA_ACCESSION))
+ continue;
sources.add(new SamInputResource(composeInputResourceForType(dataType, false)));
for (final InputResource.Type indexType : InputResource.Type.values()) {
+ if (indexType.equals(InputResource.Type.SRA_ACCESSION))
+ continue;
sources.add(new SamInputResource(
composeInputResourceForType(dataType, false),
composeInputResourceForType(indexType, true)
diff --git a/src/tests/java/htsjdk/samtools/SamSpecIntTest.java b/src/tests/java/htsjdk/samtools/SamSpecIntTest.java
index 154b69d..9be6d6c 100644
--- a/src/tests/java/htsjdk/samtools/SamSpecIntTest.java
+++ b/src/tests/java/htsjdk/samtools/SamSpecIntTest.java
@@ -61,7 +61,7 @@ public class SamSpecIntTest {
- Assert.assertEquals(errorMessages.size(), 2);
+ Assert.assertEquals(errorMessages.size(), 0);
@@ -89,7 +89,7 @@ public class SamSpecIntTest {
- Assert.assertEquals(errorMessages.size(), 2);
+ Assert.assertEquals(errorMessages.size(), 0);
diff --git a/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
new file mode 100644
index 0000000..c27f2f2
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
@@ -0,0 +1,145 @@
+package htsjdk.samtools.cram;
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.Slice;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+ * Created by vadim on 25/08/2015.
+ */
+public class CRAIEntryTest {
+ @Test
+ public void testFromContainer() {
+ final Container container = new Container();
+ final Slice slice = new Slice();
+ slice.sequenceId = 1;
+ slice.alignmentStart = 2;
+ slice.alignmentSpan = 3;
+ slice.containerOffset = 4;
+ container.landmarks = new int[]{5};
+ container.slices = new Slice[]{slice};
+ final List<CRAIEntry> entries = CRAIEntry.fromContainer(container);
+ Assert.assertNotNull(entries);
+ Assert.assertEquals(entries.size(), 1);
+ final CRAIEntry entry = entries.get(0);
+ Assert.assertEquals(entry.sequenceId, slice.sequenceId);
+ Assert.assertEquals(entry.alignmentStart, slice.alignmentStart);
+ Assert.assertEquals(entry.alignmentSpan, slice.alignmentSpan);
+ Assert.assertEquals(entry.containerStartOffset, slice.containerOffset);
+ }
+ @Test
+ public void testFromCraiLine() {
+ int counter = 1;
+ final int sequenceId = counter++;
+ final int alignmentStart = counter++;
+ final int alignmentSpan = counter++;
+ final int containerOffset = Integer.MAX_VALUE + counter++;
+ final int sliceOffset = counter++;
+ final int sliceSise = counter++;
+ final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSise);
+ final CRAIEntry entry = CRAIEntry.fromCraiLine(line);
+ Assert.assertNotNull(entry);
+ Assert.assertEquals(entry.sequenceId, sequenceId);
+ Assert.assertEquals(entry.alignmentStart, alignmentStart);
+ Assert.assertEquals(entry.alignmentSpan, alignmentSpan);
+ Assert.assertEquals(entry.containerStartOffset, containerOffset);
+ }
+ @Test
+ public void testIntersetcsZeroSpan() {
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 0)));
+ }
+ @Test
+ public void testIntersetcsSame() {
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 1)));
+ }
+ @Test
+ public void testIntersetcsIncluded() {
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(1, 1)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1)));
+ // is symmetrical?
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 2)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(2, 1), newEntry(1, 2)));
+ }
+ @Test
+ public void testIntersetcsOvertlaping() {
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 1)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 2)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1)));
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(3, 1)));
+ }
+ @Test
+ public void testIntersetcsAnotherSequence() {
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(10, 2, 1)));
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(11, 2, 1)));
+ }
+ @Test
+ public void testCompareTo () {
+ final List<CRAIEntry> list = new ArrayList<CRAIEntry>(2);
+ CRAIEntry e1;
+ CRAIEntry e2;
+ e1 = new CRAIEntry();
+ e1.sequenceId = 100;
+ e2 = new CRAIEntry();
+ e2.sequenceId = 200;
+ list.add(e2);
+ list.add(e1);
+ Assert.assertTrue(list.get(1).sequenceId < list.get(0).sequenceId);
+ Collections.sort(list);
+ Assert.assertTrue(list.get(0).sequenceId < list.get(1).sequenceId);
+ list.clear();
+ e1 = new CRAIEntry();
+ e1.alignmentStart = 100;
+ e2 = new CRAIEntry();
+ e2.alignmentStart = 200;
+ list.add(e2);
+ list.add(e1);
+ Assert.assertTrue(list.get(1).alignmentStart < list.get(0).alignmentStart);
+ Collections.sort(list);
+ Assert.assertTrue(list.get(0).alignmentStart < list.get(1).alignmentStart);
+ list.clear();
+ e1 = new CRAIEntry();
+ e1.containerStartOffset = 100;
+ e2 = new CRAIEntry();
+ e2.containerStartOffset = 200;
+ list.add(e2);
+ list.add(e1);
+ Assert.assertTrue(list.get(1).containerStartOffset < list.get(0).containerStartOffset);
+ Collections.sort(list);
+ Assert.assertTrue(list.get(0).containerStartOffset < list.get(1).containerStartOffset);
+ }
+ private static CRAIEntry newEntry(final int start, final int span) {
+ return newEntry(1, start, span);
+ }
+ private static CRAIEntry newEntry(final int seqId, final int start, final int span) {
+ final CRAIEntry e1 = new CRAIEntry();
+ e1.sequenceId = seqId;
+ e1.alignmentStart = start;
+ e1.alignmentSpan = span;
+ return e1;
+ }
diff --git a/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
new file mode 100644
index 0000000..8989963
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
@@ -0,0 +1,133 @@
+package htsjdk.samtools.cram;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+ * Created by vadim on 25/08/2015.
+ */
+public class CRAIIndexTest {
+ @Test
+ public void testFind() throws IOException, CloneNotSupportedException {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final int sequenceId = 1;
+ CRAIEntry e = new CRAIEntry();
+ e.sequenceId = sequenceId;
+ e.alignmentStart = 1;
+ e.alignmentSpan = 1;
+ e.containerStartOffset = 1;
+ e.sliceOffset = 1;
+ e.sliceSize = 0;
+ index.add(e);
+ e = e.clone();
+ e.sequenceId = sequenceId;
+ e.alignmentStart = 2;
+ e.alignmentSpan = 1;
+ e.containerStartOffset = 2;
+ e.sliceOffset = 1;
+ e.sliceSize = 0;
+ index.add(e);
+ e = e.clone();
+ e.sequenceId = sequenceId;
+ e.alignmentStart = 3;
+ e.alignmentSpan = 1;
+ e.containerStartOffset = 3;
+ e.sliceOffset = 1;
+ e.sliceSize = 0;
+ index.add(e);
+ Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 0));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 1));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 2));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 2, 1));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 3));
+ final int nonExistentSequenceId = 2;
+ Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, nonExistentSequenceId, 2, 1));
+ // a query starting beyond all entries:
+ Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 4, 1));
+ }
+ private boolean allFoundEntriesIntersectQueryInFind(final List<CRAIEntry> index, final int sequenceId, final int start, final int span) {
+ int foundCount = 0;
+ for (final CRAIEntry found : CRAIIndex.find(index, sequenceId, start, span)) {
+ foundCount++;
+ Assert.assertEquals(found.sequenceId, sequenceId);
+ boolean intersects = false;
+ for (int pos = Math.min(found.alignmentStart, start); pos <= Math.max(found.alignmentStart + found.alignmentSpan, start + span); pos++) {
+ if (pos >= found.alignmentStart && pos >= start &&
+ pos <= found.alignmentStart + found.alignmentSpan && pos <= start + span) {
+ intersects = true;
+ break;
+ }
+ }
+ if (!intersects) {
+ return false;
+ }
+ }
+ return foundCount > 0;
+ }
+ @Test
+ public void testGetLeftmost() {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ Assert.assertNull(CRAIIndex.getLeftmost(index));
+ final CRAIEntry e1 = new CRAIEntry();
+ e1.sequenceId = 1;
+ e1.alignmentStart = 2;
+ e1.alignmentSpan = 3;
+ e1.containerStartOffset = 4;
+ e1.sliceOffset = 5;
+ e1.sliceSize = 6;
+ index.add(e1);
+ // trivial case of single entry in index:
+ Assert.assertEquals(e1, CRAIIndex.getLeftmost(index));
+ final CRAIEntry e2 = new CRAIEntry();
+ e2.sequenceId = 1;
+ e2.alignmentStart = e1.alignmentStart + 1;
+ e2.alignmentSpan = 3;
+ e2.containerStartOffset = 4;
+ e2.sliceOffset = 5;
+ e2.sliceSize = 6;
+ index.add(e2);
+ Assert.assertEquals(e1, CRAIIndex.getLeftmost(index));
+ }
+ @Test
+ public void testFindLastAlignedEntry() {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ Assert.assertEquals(-1, CRAIIndex.findLastAlignedEntry(index));
+ // Scan all allowed combinations of 10 mapped/unmapped entries and assert the found last aligned entry:
+ final int indexSize = 10;
+ for (int lastAligned = 0; lastAligned < indexSize; lastAligned++) {
+ index.clear();
+ for (int i = 0; i < indexSize; i++) {
+ final CRAIEntry e = new CRAIEntry();
+ e.sequenceId = (i <= lastAligned ? 0 : -1);
+ e.alignmentStart = i;
+ index.add(e);
+ }
+ // check expectations are correct before calling findLastAlignedEntry method:
+ Assert.assertTrue(index.get(lastAligned).sequenceId != -1);
+ if (lastAligned < index.size() - 1) {
+ Assert.assertTrue(index.get(lastAligned + 1).sequenceId == -1);
+ }
+ // assert the the found value matches the expectation:
+ Assert.assertEquals(CRAIIndex.findLastAlignedEntry(index), lastAligned);
+ }
+ }
diff --git a/src/tests/java/htsjdk/samtools/cram/build/CramIOTest.java b/src/tests/java/htsjdk/samtools/cram/build/CramIOTest.java
new file mode 100644
index 0000000..1035f24
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/build/CramIOTest.java
@@ -0,0 +1,82 @@
+package htsjdk.samtools.cram.build;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.cram.common.CramVersions;
+import htsjdk.samtools.cram.structure.CramHeader;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+ * Created by vadim on 25/08/2015.
+ */
+public class CramIOTest {
+ @Test
+ public void testCheckHeaderAndEOF_v2() throws IOException {
+ final String id = "testid";
+ final CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v2_1, id, new SAMFileHeader());
+ final File file = File.createTempFile("test", ".cram");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ CramIO.writeCramHeader(cramHeader, fos);
+ CramIO.issueEOF(cramHeader.getVersion(), fos);
+ fos.close();
+ Assert.assertTrue(CramIO.checkHeaderAndEOF(file));
+ file.delete();
+ }
+ @Test
+ public void testCheckHeaderAndEOF_v3() throws IOException {
+ final String id = "testid";
+ final CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v3, id, new SAMFileHeader());
+ final File file = File.createTempFile("test", ".cram");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ CramIO.writeCramHeader(cramHeader, fos);
+ CramIO.issueEOF(cramHeader.getVersion(), fos);
+ fos.close();
+ Assert.assertTrue(CramIO.checkHeaderAndEOF(file));
+ file.delete();
+ }
+ @Test
+ public void testReplaceCramHeader() throws IOException {
+ final String id = "testid";
+ final CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v3, id, new SAMFileHeader());
+ Assert.assertTrue(cramHeader.getSamFileHeader().getSequenceDictionary().isEmpty());
+ final File file = File.createTempFile("test", ".cram");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ CramIO.writeCramHeader(cramHeader, fos);
+ CramIO.issueEOF(cramHeader.getVersion(), fos);
+ fos.close();
+ final long length = file.length();
+ final SAMFileHeader samFileHeader = new SAMFileHeader();
+ final SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", 123);
+ samFileHeader.addSequence(sequenceRecord);
+ final String id2 = "testid2";
+ final CramHeader cramHeader2 = new CramHeader(CramVersions.CRAM_v3, id2, samFileHeader);
+ final boolean replaced = CramIO.replaceCramHeader(file, cramHeader2);
+ Assert.assertTrue(replaced);
+ Assert.assertEquals(file.length(), length);
+ Assert.assertTrue(CramIO.checkHeaderAndEOF(file));
+ final CramHeader cramHeader3 = CramIO.readCramHeader(new FileInputStream(file));
+ Assert.assertEquals(cramHeader3.getVersion(), CramVersions.CRAM_v3);
+ Assert.assertFalse(cramHeader3.getSamFileHeader().getSequenceDictionary().isEmpty());
+ Assert.assertNotNull(cramHeader3.getSamFileHeader().getSequenceDictionary().getSequence(0));
+ Assert.assertEquals(cramHeader3.getSamFileHeader().getSequence(sequenceRecord.getSequenceName()).getSequenceLength(), sequenceRecord.getSequenceLength());
+ file.delete();
+ }
diff --git a/src/tests/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java b/src/tests/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java
new file mode 100644
index 0000000..03360bd
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java
@@ -0,0 +1,68 @@
+package htsjdk.samtools.cram.structure;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
+import htsjdk.samtools.cram.encoding.readfeatures.InsertBase;
+import htsjdk.samtools.cram.encoding.readfeatures.Insertion;
+import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
+import htsjdk.samtools.cram.encoding.readfeatures.SoftClip;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+ * Created by vadim on 28/09/2015.
+ */
+public class CramCompressionRecordTest {
+ @Test
+ public void test_getAlignmentEnd() {
+ CramCompressionRecord r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.setSegmentUnmapped(true);
+ Assert.assertEquals(r.getAlignmentEnd(), SAMRecord.NO_ALIGNMENT_START);
+ r = new CramCompressionRecord();
+ int readLength = 100;
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1);
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ String softClip = "AAA";
+ r.readFeatures.add(new SoftClip(1, softClip.getBytes()));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - softClip.length());
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ int deletionLength = 5;
+ r.readFeatures.add(new Deletion(1, deletionLength));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 + deletionLength);
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ String insertion = "CCCCCCCCCC";
+ r.readFeatures.add(new Insertion(1, insertion.getBytes()));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - insertion.length());
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ r.readFeatures.add(new InsertBase(1, (byte) 'A'));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - 1);
+ }
diff --git a/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java b/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java
index 362678f..3ed0b40 100644
--- a/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java
+++ b/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java
@@ -25,6 +25,7 @@ package htsjdk.samtools.cram.structure;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.ValidationStringency;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -53,25 +54,37 @@ public class ReadTagTest {
byte[] data = ReadTag.writeSingleValue((byte) 'i', intValue, false);
ByteBuffer byteBuffer = ByteBuffer.wrap(data);
- Object value = ReadTag.readSingleValue((byte) 'i', byteBuffer);
+ Object value = ReadTag.readSingleValue((byte) 'i', byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals (((Integer) value).intValue(), intValue);
String sValue = "value";
data = ReadTag.writeSingleValue((byte) 'Z', sValue, false);
byteBuffer = ByteBuffer.wrap(data);
- value = ReadTag.readSingleValue((byte) 'Z', byteBuffer);
+ value = ReadTag.readSingleValue((byte) 'Z', byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals(sValue, value);
byte[] baValue = "value".getBytes();
data = ReadTag.writeSingleValue((byte) 'B', baValue, false);
byteBuffer = ByteBuffer.wrap(data);
- value = ReadTag.readSingleValue((byte) 'B', byteBuffer);
+ value = ReadTag.readSingleValue((byte) 'B', byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals((byte[]) value, baValue);
+ public void testUnsignedInt() {
+ long intValue = Integer.MAX_VALUE+1L;
+ byte[] data = ReadTag.writeSingleValue((byte) 'I', intValue, false);
+ ByteBuffer byteBuffer = ByteBuffer.wrap(data);
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+ Object value = ReadTag.readSingleValue((byte) 'I', byteBuffer, ValidationStringency.SILENT);
+ Assert.assertTrue(value instanceof Long);
+ long lValue = (Long)value;
+ Assert.assertEquals (lValue & 0xFFFFFFFF, intValue);
+ }
+ @Test
public void testParallelReadTag() throws Exception {
// NOTE: testng 5.5 (circa 2007) doesn't support parallel data providers, but modern versions do.
// For now, roll our own.
@@ -109,7 +122,7 @@ public class ReadTagTest {
final byte[] data = ReadTag.writeSingleValue(tagType, originalValue, false);
final ByteBuffer byteBuffer = ByteBuffer.wrap(data);
- final Object readValue = ReadTag.readSingleValue(tagType, byteBuffer);
+ final Object readValue = ReadTag.readSingleValue(tagType, byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals(readValue, originalValue);
diff --git a/src/tests/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java b/src/tests/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java
new file mode 100644
index 0000000..bff8491
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java
@@ -0,0 +1,83 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.samtools.filter;
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordSetBuilder;
+import org.testng.Assert;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+public class OverclippedReadFilterTest {
+ private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ private final int unclippedBasesThreshold = 30;
+ private SAMRecord buildFrag(final String name, final String cigarString) {
+ // for this test, all we care about is the CIGAR
+ return builder.addFrag(name, 0, 1, false, false, cigarString, null, 30);
+ }
+ @Test(dataProvider = "data")
+ public void testOverclippedReadFilter(final String name, final String cigar, final boolean filterSingleEndClips, final boolean shouldFail) {
+ final OverclippedReadFilter filter = new OverclippedReadFilter(unclippedBasesThreshold, filterSingleEndClips);
+ final SAMRecord rec = buildFrag(name, cigar);
+ Assert.assertEquals(filter.filterOut(rec), shouldFail);
+ }
+ @DataProvider(name = "data")
+ private Object[][] testData() {
+ return new Object[][]{
+ {"foo", "1S10M1S", false, true},
+ {"foo", "1S10X1S", false, true},
+ {"foo", "1H1S10M1S1H", false, true},
+ {"foo", "1S40M1S", false, false},
+ {"foo", "1S40X1S", false, false},
+ {"foo", "1H10M1S", false, false},
+ {"foo", "1S10M1H", false, false},
+ {"foo", "10M1S", false, false},
+ {"foo", "1S10M", false, false},
+ {"foo", "10M1S", true, true},
+ {"foo", "1S10M", true, true},
+ {"foo", "1S10M10D10M1S", false, true},
+ {"foo", "1S1M40I1S", false, false},
+ {"foo", "1S10I1S", false, true},
+ {"foo", "1S40I1S", false, false},
+ {"foo", "1S40I1S", true, false},
+ {"foo", "25S40I25M", true, false},
+ {"foo", "25S25M", true, true},
+ {"foo", "25S25X", true, true},
+ {"foo", "25S25H", true, true},
+ {"foo", "25S25H", false, false},
+ {"foo", "25S25M25S", false, true},
+ {"foo", "25M25S", true, true},
+ {"foo", "25S25M", true, true},
+ {"foo", "25S35S", true, true},
+ {"foo", "25S35M25S", true, false},
+ {"foo", "35M25S", true, false},
+ {"foo", "25S35M", true, false}
+ };
+ }
diff --git a/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java b/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
index e3c81b6..2393031 100644
--- a/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
+++ b/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
@@ -97,6 +97,8 @@ public class MetricsFileTest {
MetricsFile<FloatingPointMetric,Integer> file2 = writeThenReadBack(file);
Assert.assertEquals(file, file2);
@@ -178,6 +180,22 @@ public class MetricsFileTest {
Assert.assertEquals(file, file3);
+ @Test
+ public void areMetricsFilesEqualTest(){
+ final File TEST_DIR = new File("testdata/htsjdk/samtools/metrics/");
+ final File file1 = new File(TEST_DIR,"metricsOne.metrics");
+ final File file2 = new File(TEST_DIR,"metricsOneCopy.metrics");
+ final File fileModifiedHist = new File(TEST_DIR,"metricsOneModifiedHistogram.metrics");
+ final File fileModifiedMet = new File(TEST_DIR,"metricsOneModifiedMetrics.metrics");
+ Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file2));
+ Assert.assertTrue(MetricsFile.areMetricsEqual(file1, fileModifiedHist));
+ Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedHist));
+ Assert.assertFalse(MetricsFile.areMetricsEqual(file1, fileModifiedMet));
+ Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedMet));
+ }
/** Helper method to persist metrics to file and read them back again. */
private <METRIC extends MetricBase> MetricsFile<METRIC, Integer> writeThenReadBack(MetricsFile<METRIC,Integer> in) throws IOException {
File f = File.createTempFile("test", ".metrics");
@@ -189,4 +207,7 @@ public class MetricsFileTest {
retval.read(new FileReader(f));
return retval;
diff --git a/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java b/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java
index c762c2f..511b1ab 100644
--- a/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java
@@ -42,13 +42,17 @@ public class FastaSequenceIndexTest {
public Object[][] provideHomoSapiens() throws FileNotFoundException {
final File sequenceIndexFile = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.fasta.fai");
- return new Object[][] { new Object[] { new FastaSequenceIndex(sequenceIndexFile) } };
+ return new Object[][] { new Object[]
+ { new FastaSequenceIndex(sequenceIndexFile) },
+ { new FastaSequenceIndex(sequenceIndexFile.toPath()) } };
public Object[][] provideSpecialCharacters() throws FileNotFoundException {
final File sequenceIndexFile = new File(TEST_DATA_DIR,"testing.fai");
- return new Object[][] { new Object[] { new FastaSequenceIndex(sequenceIndexFile) } };
+ return new Object[][] { new Object[]
+ { new FastaSequenceIndex(sequenceIndexFile) },
+ { new FastaSequenceIndex(sequenceIndexFile.toPath()) } };
diff --git a/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java b/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java
index 9970bbc..5c1a9ac 100644
--- a/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java
+++ b/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java
@@ -53,7 +53,9 @@ public class IndexedFastaSequenceFileTest{
public Object[][] provideSequenceFile() throws FileNotFoundException {
return new Object[][] { new Object[]
{ new IndexedFastaSequenceFile(SEQUENCE_FILE) },
- { new IndexedFastaSequenceFile(SEQUENCE_FILE_NODICT) }};
+ { new IndexedFastaSequenceFile(SEQUENCE_FILE_NODICT) },
+ { new IndexedFastaSequenceFile(SEQUENCE_FILE.toPath()) },
+ { new IndexedFastaSequenceFile(SEQUENCE_FILE_NODICT.toPath()) }};
@@ -62,7 +64,11 @@ public class IndexedFastaSequenceFileTest{
new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE),
new IndexedFastaSequenceFile(SEQUENCE_FILE) },
new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE, true),
- new IndexedFastaSequenceFile(SEQUENCE_FILE) },};
+ new IndexedFastaSequenceFile(SEQUENCE_FILE) },
+ new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE.toPath()),
+ new IndexedFastaSequenceFile(SEQUENCE_FILE.toPath()) },
+ new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE.toPath(), true),
+ new IndexedFastaSequenceFile(SEQUENCE_FILE.toPath()) },};
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java b/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
new file mode 100644
index 0000000..9cf0c28
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
@@ -0,0 +1,150 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import htsjdk.samtools.BAMFileSpan;
+import htsjdk.samtools.Bin;
+import htsjdk.samtools.GenomicIndexUtil;
+import htsjdk.samtools.SRAFileReader;
+import htsjdk.samtools.SRAIndex;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+ * Unit tests for SRAIndex
+ *
+ * Created by andrii.nikitiuk on 10/28/15.
+ */
+public class SRAIndexTest {
+ private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR1298981");
+ private static final int LAST_BIN_LEVEL = GenomicIndexUtil.LEVEL_STARTS.length - 1;
+ private static final int SRA_BIN_OFFSET = GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL];
+ @Test
+ public void testLevelSize() {
+ if (!SRAAccession.isSupported()) return;
+ SRAIndex index = getIndex(DEFAULT_ACCESSION);
+ Assert.assertEquals(index.getLevelSize(0), GenomicIndexUtil.LEVEL_STARTS[1] - GenomicIndexUtil.LEVEL_STARTS[0]);
+ Assert.assertEquals(index.getLevelSize(LAST_BIN_LEVEL), GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL] - 1);
+ }
+ @Test
+ public void testLevelForBin() {
+ if (!SRAAccession.isSupported()) return;
+ SRAIndex index = getIndex(DEFAULT_ACCESSION);
+ Bin bin = new Bin(0, SRA_BIN_OFFSET);
+ Assert.assertEquals(index.getLevelForBin(bin), LAST_BIN_LEVEL);
+ }
+ @DataProvider(name = "testBinLocuses")
+ public Object[][] createDataForBinLocuses() {
+ return new Object[][] {
+ };
+ }
+ @Test(dataProvider = "testBinLocuses")
+ public void testBinLocuses(SRAAccession acc, int reference, int binIndex, int firstLocus, int lastLocus) {
+ if (!SRAAccession.isSupported()) return;
+ SRAIndex index = getIndex(acc);
+ Bin bin = new Bin(reference, SRA_BIN_OFFSET + binIndex);
+ Assert.assertEquals(index.getFirstLocusInBin(bin), firstLocus);
+ Assert.assertEquals(index.getLastLocusInBin(bin), lastLocus);
+ }
+ @DataProvider(name = "testBinOverlappings")
+ public Object[][] createDataForBinOverlappings() {
+ return new Object[][] {
+ {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new HashSet<Integer>(Arrays.asList(0))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2, new HashSet<Integer>(Arrays.asList(1))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 3, new HashSet<Integer>(Arrays.asList(1, 2))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new HashSet<Integer>(Arrays.asList(1, 2))}
+ };
+ }
+ @Test(dataProvider = "testBinOverlappings")
+ public void testBinOverlappings(SRAAccession acc, int reference, int firstLocus, int lastLocus, Set<Integer> binNumbers) {
+ if (!SRAAccession.isSupported()) return;
+ SRAIndex index = getIndex(acc);
+ Iterator<Bin> binIterator = index.getBinsOverlapping(reference, firstLocus, lastLocus).iterator();
+ Set<Integer> binNumbersFromIndex = new HashSet<Integer>();
+ while (binIterator.hasNext()) {
+ Bin bin = binIterator.next();
+ binNumbersFromIndex.add(bin.getBinNumber() - SRA_BIN_OFFSET);
+ }
+ Assert.assertEquals(binNumbers, binNumbersFromIndex);
+ }
+ @DataProvider(name = "testSpanOverlappings")
+ public Object[][] createDataForSpanOverlappings() {
+ return new Object[][] {
+ {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new long[] {0, SRAIndex.SRA_CHUNK_SIZE} },
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new long[]{0, SRAIndex.SRA_CHUNK_SIZE} },
+ };
+ }
+ @Test(dataProvider = "testSpanOverlappings")
+ public void testSpanOverlappings(SRAAccession acc, int reference, int firstLocus, int lastLocus, long[] spanCoordinates) {
+ if (!SRAAccession.isSupported()) return;
+ SRAIndex index = getIndex(acc);
+ BAMFileSpan span = index.getSpanOverlapping(reference, firstLocus, lastLocus);
+ long[] coordinatesFromIndex = span.toCoordinateArray();
+ List<Long> coordinatesListFromIndex = new ArrayList<Long>();
+ for (long coordinate : coordinatesFromIndex) {
+ coordinatesListFromIndex.add(coordinate);
+ }
+ Assert.assertTrue(Arrays.equals(coordinatesFromIndex, spanCoordinates),
+ "Coordinates mismatch. Expected: " + Arrays.toString(spanCoordinates) +
+ " but was : " + Arrays.toString(coordinatesFromIndex));
+ }
+ private SRAIndex getIndex(SRAAccession acc) {
+ SRAFileReader reader = new SRAFileReader(acc);
+ return (SRAIndex) reader.getIndex();
+ }
diff --git a/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java b/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
new file mode 100644
index 0000000..9b6dccb
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
@@ -0,0 +1,51 @@
+package htsjdk.samtools.sra;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SRAFileReader;
+import htsjdk.samtools.util.TestUtil;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+ * Tests for SRA extension of SAMRecord objects which load fields on demand
+ */
+public class SRALazyRecordTest {
+ private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR1298981");
+ @DataProvider(name = "serializationTestData")
+ public Object[][] getSerializationTestData() {
+ return new Object[][] {
+ };
+ }
+ @Test(dataProvider = "serializationTestData")
+ public void testSerialization(SRAAccession accession) throws Exception {
+ SRAFileReader reader = new SRAFileReader(accession);
+ final SAMRecord initialSAMRecord = reader.getIterator().next();
+ reader.close();
+ final SAMRecord deserializedSAMRecord = TestUtil.serializeAndDeserialize(initialSAMRecord);
+ Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
+ }
+ @Test
+ public void testCloneAndEquals() throws Exception {
+ SRAFileReader reader = new SRAFileReader(DEFAULT_ACCESSION);
+ final SAMRecord record = reader.getIterator().next();
+ reader.close();
+ SAMRecord newRecord = (SAMRecord)record.clone();
+ Assert.assertFalse(record == newRecord);
+ Assert.assertNotSame(record, newRecord);
+ Assert.assertEquals(record, newRecord);
+ Assert.assertEquals(newRecord, record);
+ newRecord.setAlignmentStart(record.getAlignmentStart() + 100);
+ Assert.assertFalse(record.equals(newRecord));
+ Assert.assertFalse(newRecord.equals(record));
+ }
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java b/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
new file mode 100644
index 0000000..b37c37a
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
@@ -0,0 +1,116 @@
+package htsjdk.samtools.sra;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.NoSuchElementException;
+public class SRAQueryTest {
+ @DataProvider(name = "testUnmappedCounts")
+ public Object[][] createDataForUnmappedCounts() {
+ return new Object[][] {
+ {"SRR2096940", 498}
+ };
+ }
+ @Test(dataProvider = "testUnmappedCounts")
+ public void testUnmappedCounts(String acc, int numberUnalignments) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.queryUnmapped();
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, 0, numberUnalignments);
+ }
+ @DataProvider(name = "testReferenceAlignedCounts")
+ public Object[][] createDataForReferenceAlignedCounts() {
+ return new Object[][] {
+ {"SRR2096940", "CM000681.1", 0, 10591},
+ {"SRR2096940", "CM000681.1", 55627015, 10591},
+ {"SRR2096940", "CM000681.1", 55627016, 0},
+ };
+ }
+ @Test(dataProvider = "testReferenceAlignedCounts")
+ public void testReferenceAlignedCounts(String acc, String reference, int refernceStart, int numberAlignments) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.queryAlignmentStart(reference, refernceStart);
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, 0);
+ }
+ @DataProvider(name = "testQueryCounts")
+ public Object[][] createDataForQueryCounts() {
+ return new Object[][] {
+ {"SRR2096940", "CM000681.1", 0, 59128983, true, 10591, 0},
+ {"SRR2096940", "CM000681.1", 55627015, 59128983, true, 10591, 0},
+ {"SRR2096940", "CM000681.1", 55627016, 59128983, true, 0, 0},
+ {"SRR2096940", "CM000681.1", 55627016, 59128983, false, 10591, -1},
+ };
+ }
+ @Test(dataProvider = "testQueryCounts")
+ public void testQueryCounts(String acc, String reference, int refernceStart, int referenceEnd, boolean contained, int numberAlignments, int numberUnalignment) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.query(reference, refernceStart, referenceEnd, contained);
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignment);
+ }
+ private void checkAlignedUnalignedCountsByIterator(SAMRecordIterator samRecordIterator,
+ int numberAlignments, int numberUnalignments) {
+ int countAlignments = 0, countUnalignments = 0;
+ while (true) {
+ boolean hasRecord = samRecordIterator.hasNext();
+ SAMRecord record = null;
+ try {
+ record = samRecordIterator.next();
+ Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
+ } catch (NoSuchElementException e) {
+ Assert.assertFalse(hasRecord);
+ }
+ Assert.assertEquals(hasRecord, record != null);
+ if (record == null) {
+ break;
+ }
+ if (record.getReadUnmappedFlag()) {
+ countUnalignments++;
+ } else {
+ countAlignments++;
+ }
+ }
+ if (numberAlignments != -1) {
+ Assert.assertEquals(numberAlignments, countAlignments);
+ }
+ if (numberUnalignments != -1) {
+ Assert.assertEquals(numberUnalignments, countUnalignments);
+ }
+ }
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java b/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
new file mode 100644
index 0000000..1313b4d
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
@@ -0,0 +1,25 @@
+package htsjdk.samtools.sra;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+public class SRAReferenceTest {
+ @DataProvider(name = "testReference")
+ public Object[][] createDataForReference() {
+ return new Object[][] {
+ };
+ }
+ @Test(dataProvider = "testReference")
+ public void testReference(String acc, String refContig, int refStart, int refStop, String refBases) {
+ if (!SRAAccession.isSupported()) return;
+ ReferenceSequenceFile refSeqFile = new SRAIndexedSequenceFile(new SRAAccession(acc));
+ ReferenceSequence refSeq = refSeqFile.getSubsequenceAt(refContig, refStart, refStop);
+ Assert.assertEquals(new String(refSeq.getBases()), refBases);
+ }
diff --git a/src/tests/java/htsjdk/samtools/sra/SRATest.java b/src/tests/java/htsjdk/samtools/sra/SRATest.java
new file mode 100644
index 0000000..86a5218
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRATest.java
@@ -0,0 +1,464 @@
+* National Center for Biotechnology Information
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+* Please cite the author in any work or product based on this material.
+* ===========================================================================
+package htsjdk.samtools.sra;
+import htsjdk.samtools.*;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.*;
+ * Integration tests for SRA functionality
+ *
+ * Created by andrii.nikitiuk on 8/24/15.
+ */
+public class SRATest {
+ @DataProvider(name = "testCounts")
+ public Object[][] createDataForCounts() {
+ return new Object[][] {
+ {"SRR2096940", 10591, 498}
+ };
+ }
+ @Test(dataProvider = "testCounts")
+ public void testCounts(String acc, int numberAlignments, int numberUnalignments) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignments);
+ }
+ @DataProvider(name = "testCountsBySpan")
+ public Object[][] createDataForCountsBySpan() {
+ return new Object[][] {
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59128983), new Chunk(59128983, 59141089)), 10591, 498},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 29128983), new Chunk(29128983, 59141089)), 10591, 498},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59134983), new Chunk(59134983, 59141089)), 10591, 498},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59130000)), 10591, 0},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59140889)), 10591, 298}
+ };
+ }
+ @Test(dataProvider = "testCountsBySpan")
+ public void testCountsBySpan(String acc, List<Chunk> chunks, int numberAlignments, int numberUnalignments) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunks));
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignments);
+ }
+ @DataProvider(name = "testGroups")
+ public Object[][] createDataForGroups() {
+ return new Object[][] {
+ {"SRR822962", new TreeSet<String>(Arrays.asList(
+ "GS54389-FS3-L08", "GS57511-FS3-L08", "GS54387-FS3-L02", "GS54387-FS3-L01",
+ "GS57510-FS3-L01", "GS57510-FS3-L03", "GS54389-FS3-L07", "GS54389-FS3-L05",
+ "GS54389-FS3-L06", "GS57510-FS3-L02", "GS57510-FS3-L04", "GS54387-FS3-L03",
+ "GS46253-FS3-L03"))
+ },
+ {"SRR2096940", new HashSet<String>(Arrays.asList("SRR2096940"))}
+ };
+ }
+ @Test(dataProvider = "testGroups")
+ public void testGroups(String acc, Set<String> groups) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+ SAMFileHeader header = reader.getFileHeader();
+ Set<String> headerGroups = new TreeSet<String>();
+ for (SAMReadGroupRecord group : header.getReadGroups()) {
+ Assert.assertEquals(group.getReadGroupId(), group.getId());
+ headerGroups.add(group.getReadGroupId());
+ }
+ Assert.assertEquals(groups, headerGroups);
+ Set<String> foundGroups = new TreeSet<String>();
+ for (int i = 0; i < 10000; i++) {
+ if (!samRecordIterator.hasNext()) {
+ break;
+ }
+ SAMRecord record = samRecordIterator.next();
+ String groupName = (String)record.getAttribute("RG");
+ foundGroups.add(groupName);
+ }
+ // please note that some groups may be introduced after 10k records, which is not an error
+ Assert.assertEquals(groups, foundGroups);
+ }
+ @DataProvider(name = "testReferences")
+ public Object[][] createDataForReferences() {
+ return new Object[][] {
+ // primary alignment only
+ {"SRR1063272", 1,
+ Arrays.asList("supercont2.1", "supercont2.2", "supercont2.3", "supercont2.4",
+ "supercont2.5", "supercont2.6", "supercont2.7", "supercont2.8",
+ "supercont2.9", "supercont2.10", "supercont2.11", "supercont2.12",
+ "supercont2.13", "supercont2.14"),
+ Arrays.asList(2291499, 1621675, 1575141, 1084805,
+ 1814975, 1422463, 1399503, 1398693,
+ 1186808, 1059964, 1561994, 774062,
+ 756744, 926563)},
+ };
+ }
+ @Test(dataProvider = "testReferences")
+ public void testReferences(String acc, int numberFirstReferenceFound, List<String> references, List<Integer> refLengths) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+ SAMFileHeader header = reader.getFileHeader();
+ Set<String> headerRefNames = new TreeSet<String>();
+ for (SAMSequenceRecord ref : header.getSequenceDictionary().getSequences()) {
+ String refName = ref.getSequenceName();
+ int refIndex = references.indexOf(refName);
+ Assert.assertTrue(refIndex != -1, "Unexpected reference: " + refName);
+ Assert.assertEquals(refLengths.get(refIndex), (Integer) ref.getSequenceLength(), "Reference length is incorrect");
+ headerRefNames.add(refName);
+ }
+ Assert.assertEquals(new TreeSet<String>(references), headerRefNames);
+ Set<String> foundRefNames = new TreeSet<String>();
+ for (int i = 0; i < 10000; i++) {
+ if (!samRecordIterator.hasNext()) {
+ break;
+ }
+ SAMRecord record = samRecordIterator.next();
+ if (record.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)) {
+ continue;
+ }
+ String refName = record.getReferenceName();
+ Assert.assertNotNull(refName);
+ foundRefNames.add(refName);
+ }
+ Assert.assertEquals(new TreeSet<String>(references.subList(0, numberFirstReferenceFound)), foundRefNames);
+ }
+ @DataProvider(name = "testRows")
+ public Object[][] createDataForRowsTest() {
+ return new Object[][] {
+ // primary alignment only
+ {"SRR1063272", 0, 99, "SRR1063272.R.1",
+ 86, "101M", "supercont2.1", 60, true, false},
+ // small SRA archive
+ {"SRR2096940", 1, 16, "SRR2096940.R.3",
+ 55627016, "167M", "CM000681.1", 42, false, false},
+ {"SRR2096940", 10591, 4, "SRR2096940.R.10592",
+ -1, null, null, -1, false, false},
+ // primary and secondary alignments
+ {"SRR833251", 81, 393, "SRR833251.R.51",
+ 1787186, "38M63S", "gi|169794206|ref|NC_010410.1|", 11, true, true},
+ // local SRA file
+ {"testdata/htsjdk/samtools/sra/test_archive.sra", 1, 99, "test_archive.R.2",
+ 2811570, "150M", "NC_007121.5", 60, true, false}
+ };
+ }
+ @Test(dataProvider = "testRows")
+ public void testRows(String acc, int recordIndex, int flags, String readName, String bases, String quals, int refStart, String cigar,
+ String refName, int mapQ, boolean hasMate, boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+ SAMRecord record = getRecordByIndex(acc, recordIndex, false);
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+ @Test(dataProvider = "testRows")
+ public void testRowsAfterIteratorDetach(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+ SAMRecord record = getRecordByIndex(acc, recordIndex, true);
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+ @Test(dataProvider = "testRows")
+ public void testRowsOverrideValues(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+ SAMRecord record = getRecordByIndex(acc, recordIndex, true);
+ SAMFileHeader header = record.getHeader();
+ record.setFlags(0);
+ record.setReadUnmappedFlag(refStart == -1);
+ record.setReadBases("C".getBytes());
+ record.setBaseQualities(SAMUtils.fastqToPhred("A"));
+ if (refStart == -1) {
+ checkSAMRecord(record, 4, readName, "C", "A", refStart, "1M", refName, mapQ, false, false);
+ } else {
+ int sequenceIndex = header.getSequenceIndex(refName);
+ Assert.assertFalse(sequenceIndex == -1);
+ if (sequenceIndex == 0) {
+ if (header.getSequenceDictionary().getSequences().size() > 1) {
+ sequenceIndex++;
+ }
+ } else {
+ sequenceIndex--;
+ }
+ refName = header.getSequence(sequenceIndex).getSequenceName();
+ record.setAlignmentStart(refStart - 100);
+ record.setCigarString("1M");
+ record.setMappingQuality(mapQ - 1);
+ record.setReferenceIndex(sequenceIndex);
+ checkSAMRecord(record, 0, readName, "C", "A", refStart - 100, "1M", refName, mapQ - 1, false, false);
+ }
+ }
+ @Test(dataProvider = "testRows")
+ public void testRowsBySpan(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ SAMFileHeader header = reader.getFileHeader();
+ Chunk chunk;
+ if (refStart != -1) {
+ long refOffset = 0;
+ int refIndex = header.getSequenceDictionary().getSequence(refName).getSequenceIndex();
+ for (SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
+ if (sequenceRecord.getSequenceIndex() < refIndex) {
+ refOffset += sequenceRecord.getSequenceLength();
+ }
+ }
+ chunk = new Chunk(refOffset + refStart - 1, refOffset + refStart);
+ } else {
+ long totalRefLength = header.getSequenceDictionary().getReferenceLength();
+ long totalRecordRange = ((BAMFileSpan)reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1];
+ chunk = new Chunk(totalRefLength, totalRecordRange);
+ }
+ final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunk));
+ SAMRecord record = null;
+ while (samRecordIterator.hasNext()) {
+ SAMRecord currentRecord = samRecordIterator.next();
+ if (currentRecord.getReadName().equals(readName)) {
+ record = currentRecord;
+ break;
+ }
+ }
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+ @Test(dataProvider = "testRows")
+ public void testRowsByIndex(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ Assert.assertTrue(reader.hasIndex());
+ Assert.assertTrue(reader.indexing().hasBrowseableIndex());
+ SAMFileHeader header = reader.getFileHeader();
+ BrowseableBAMIndex index = reader.indexing().getBrowseableIndex();
+ BAMFileSpan span;
+ if (refStart != -1) {
+ int refIndex = header.getSequenceDictionary().getSequence(refName).getSequenceIndex();
+ span = index.getSpanOverlapping(refIndex, refStart, refStart + 1);
+ } else {
+ long chunkStart = index.getStartOfLastLinearBin();
+ long totalRecordRange = ((BAMFileSpan) reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1];
+ span = new BAMFileSpan(new Chunk(chunkStart, totalRecordRange));
+ }
+ final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(span);
+ SAMRecord record = null;
+ while (samRecordIterator.hasNext()) {
+ SAMRecord currentRecord = samRecordIterator.next();
+ if (refStart != -1 && currentRecord.getAlignmentStart() + currentRecord.getReadLength() < refStart) {
+ continue;
+ }
+ if (currentRecord.getReadName().equals(readName)) {
+ record = currentRecord;
+ break;
+ }
+ }
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+ private SAMRecord getRecordByIndex(String acc, int recordIndex, boolean detach) {
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+ while (recordIndex != 0) {
+ Assert.assertTrue(samRecordIterator.hasNext(), "Record set is too small");
+ samRecordIterator.next();
+ recordIndex--;
+ }
+ Assert.assertTrue(samRecordIterator.hasNext(), "Record set is too small");
+ SAMRecord record = samRecordIterator.next();
+ if (detach) {
+ samRecordIterator.next();
+ }
+ return record;
+ }
+ private void checkSAMRecord(SAMRecord record, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ Assert.assertNotNull(record, "Record with read id: " + readName + " was not found by span created from index");
+ List<SAMValidationError> validationErrors = record.isValid();
+ Assert.assertNull(validationErrors, "SRA Lazy record is invalid. List of errors: " +
+ (validationErrors != null ? validationErrors.toString() : ""));
+ Assert.assertEquals(new String(record.getReadBases()), bases);
+ Assert.assertEquals(record.getBaseQualityString(), quals);
+ Assert.assertEquals(record.getReadPairedFlag(), hasMate);
+ Assert.assertEquals(record.getFlags(), flags);
+ Assert.assertEquals(record.getNotPrimaryAlignmentFlag(), isSecondaryAlignment);
+ if (refStart == -1) {
+ Assert.assertEquals(record.getReadUnmappedFlag(), true);
+ Assert.assertEquals(record.getAlignmentStart(), 0);
+ Assert.assertEquals(record.getCigarString(), "*");
+ Assert.assertEquals(record.getReferenceName(), "*");
+ Assert.assertEquals(record.getMappingQuality(), 0);
+ } else {
+ Assert.assertEquals(record.getReadUnmappedFlag(), false);
+ Assert.assertEquals(record.getAlignmentStart(), refStart);
+ Assert.assertEquals(record.getCigarString(), cigar);
+ Assert.assertEquals(record.getReferenceName(), refName);
+ Assert.assertEquals(record.getMappingQuality(), mapQ);
+ }
+ }
+ private void checkAlignedUnalignedCountsByIterator(SAMRecordIterator samRecordIterator,
+ int numberAlignments, int numberUnalignments) {
+ int countAlignments = 0, countUnalignments = 0;
+ while (true) {
+ boolean hasRecord = samRecordIterator.hasNext();
+ SAMRecord record = null;
+ try {
+ record = samRecordIterator.next();
+ Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
+ } catch (NoSuchElementException e) {
+ Assert.assertFalse(hasRecord);
+ }
+ Assert.assertEquals(hasRecord, record != null);
+ if (record == null) {
+ break;
+ }
+ if (record.getReadUnmappedFlag()) {
+ countUnalignments++;
+ } else {
+ countAlignments++;
+ }
+ }
+ Assert.assertEquals(numberAlignments, countAlignments);
+ Assert.assertEquals(numberUnalignments, countUnalignments);
+ }
diff --git a/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java b/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java
index ea3435e..e8b9957 100644
--- a/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java
+++ b/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java
@@ -10,6 +10,6 @@ public class CodeUtilTest {
final String notNull = "Not null!";
Assert.assertEquals(CodeUtil.getOrElse(notNull, null), notNull);
Assert.assertEquals(CodeUtil.getOrElse(null, notNull), notNull);
- Assert.assertEquals(CodeUtil.getOrElse(null, null), null);
+ Assert.assertEquals((Object) CodeUtil.getOrElse(null, null), (Object) null);
diff --git a/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java b/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java
index 7c8b8fe..88b05e2 100644
--- a/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java
+++ b/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java
@@ -25,14 +25,15 @@
package htsjdk.samtools.util;
import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Collections;
- * Created by bradt on 4/28/14.
- */
public class DiskBackedQueueTest extends SortingCollectionTest {
@DataProvider(name = "diskBackedQueueProvider")
public Object[][] createDBQTestData() {
@@ -49,6 +50,9 @@ public class DiskBackedQueueTest extends SortingCollectionTest {
+ @BeforeMethod void setup() { resetTmpDir(); }
+ @AfterMethod void tearDown() { resetTmpDir(); }
* Generate some strings, put into SortingCollection, confirm that the right number of
* Strings come out, and in the right order.
@@ -85,12 +89,12 @@ public class DiskBackedQueueTest extends SortingCollectionTest {
private DiskBackedQueue<String> makeDiskBackedQueue(final int maxRecordsInRam) {
- return DiskBackedQueue.newInstance(new StringCodec(), maxRecordsInRam, Collections.singletonList(tmpDir));
+ return DiskBackedQueue.newInstance(new StringCodec(), maxRecordsInRam, Collections.singletonList(tmpDir()));
public void testReadOnlyQueueJustBeforeReadingFromDisk() {
- DiskBackedQueue<String> queue = makeDiskBackedQueue(2);
+ final DiskBackedQueue<String> queue = makeDiskBackedQueue(2);
@@ -109,4 +113,20 @@ public class DiskBackedQueueTest extends SortingCollectionTest {
+ /** See: https://github.com/broadinstitute/picard/issues/327 */
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testPathologyIssue327() {
+ final DiskBackedQueue<String> queue = makeDiskBackedQueue(2);
+ // testing a particular order of adding to the queue, setting the result state, and emitting.
+ queue.add("0");
+ queue.add("1");
+ queue.add("2"); // spills to disk
+ Assert.assertEquals(queue.poll(), "0"); // gets from ram, so now there is space in ram, but a record on disk
+ queue.add("3"); // adds, but we assumed we added all records before removing them
+ Assert.assertEquals(queue.poll(), "1");
+ Assert.assertEquals(queue.poll(), "2");
+ Assert.assertEquals(queue.poll(), "3");
+ }
diff --git a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
index e47d866..2ce0c79 100644
--- a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
+++ b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
@@ -26,6 +26,7 @@ package htsjdk.samtools.util;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.TextCigarCodec;
@@ -55,6 +56,27 @@ public class SequenceUtilTest {
SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2);
+ @DataProvider
+ public Object[][] compatibleNonEqualLists(){
+ final String s = HEADER +
+ String.format("@SQ\tSN:phix174.seq\tLN:%d\tUR:%s\tAS:PhiX174\tM5:%s\n", 5386, "/seq/references/PhiX174/v0/PhiX174.fasta", "3332ed720ac7eaa9b3655c06f6b9e196")+
+ String.format("@SQ\tSN:phix175.seq\tLN:%d\tUR:%s\tAS:HiMom\tM5:%s\n", 5385, "/seq/references/PhiX174/v0/HiMom.fasta", "deadbeed");
+ return new Object[][]{ {makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
+ "3332ed720ac7eaa9b3655c06f6b9e196"),
+ new SAMTextHeaderCodec().decode(new StringLineReader(s), null).getSequenceDictionary()}};
+ }
+ @Test(dataProvider = "compatibleNonEqualLists")
+ public void testCompatible(SAMSequenceDictionary sd1, SAMSequenceDictionary sd2) {
+ SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2, true);
+ }
+ @Test(dataProvider = "compatibleNonEqualLists",expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
+ public void testinCompatible(SAMSequenceDictionary sd1, SAMSequenceDictionary sd2) {
+ SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2, false);
+ }
@Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
public void testMismatch() {
final SAMSequenceDictionary sd1 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
diff --git a/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java b/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java
index 8770938..1ec928d 100644
--- a/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java
+++ b/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java
@@ -24,7 +24,9 @@
package htsjdk.samtools.util;
import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -41,31 +43,23 @@ import java.util.Random;
public class SortingCollectionTest {
// Create a separate directory for files so it is possible to confirm that the directory is emptied
- protected final File tmpDir = new File(System.getProperty("java.io.tmpdir") + "/" + System.getProperty("user.name"),
- "SortingCollectionTest");
- @BeforeTest void setup() {
- // Clear out any existing files if the directory exists
- if (tmpDir.exists()) {
- for (final File f : tmpDir.listFiles()) {
- f.delete();
- }
- }
- tmpDir.mkdirs();
+ protected File tmpDir() {
+ return new File(System.getProperty("java.io.tmpdir") + "/" + System.getProperty("user.name"), getClass().getSimpleName());
+ @BeforeMethod void setup() { resetTmpDir(); }
+ @AfterMethod void tearDown() { resetTmpDir(); }
+ /** Deletes and re-creates the temporary directory. */
+ void resetTmpDir() {
+ System.err.println("Resetting tmpdir");
+ IOUtil.deleteDirectoryTree(tmpDir());
+ if (!tmpDir().mkdirs()) throw new IllegalStateException("Could not create tmpdir: " + tmpDir().getAbsolutePath());
- @AfterTest void tearDown() {
- System.err.println("In SortingCollectionTest.tearDown. tmpDir: " + tmpDir);
- if (tmpDir.exists()) {
- for (final File f : tmpDir.listFiles()) {
- f.delete();
- }
- tmpDir.delete();
- }
protected boolean tmpDirIsEmpty() {
- System.err.println("In SortingCollectionTest.tmpDirIsEmpty. tmpDir: " + tmpDir);
- return tmpDir.listFiles().length == 0;
+ return tmpDir().listFiles().length == 0;
@DataProvider(name = "test1")
@@ -105,7 +99,7 @@ public class SortingCollectionTest {
assertIteratorEqualsList(strings, sortingCollection.iterator());
- Assert.assertEquals(tmpDir.list().length, 0);
+ Assert.assertEquals(tmpDir().list().length, 0);
private void assertIteratorEqualsList(final String[] strings, final Iterator<String> sortingCollection) {
@@ -118,8 +112,7 @@ public class SortingCollectionTest {
private SortingCollection<String> makeSortingCollection(final int maxRecordsInRam) {
- return SortingCollection.newInstance(String.class, new StringCodec(), new StringComparator(),
- maxRecordsInRam, tmpDir);
+ return SortingCollection.newInstance(String.class, new StringCodec(), new StringComparator(), maxRecordsInRam, tmpDir());
diff --git a/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java b/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
index 81fe49a..91804c4 100644
--- a/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
+++ b/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
@@ -93,6 +93,21 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
+ /**
+ * Wrapper class for HeaderOrderTestProvider test cases to prevent TestNG from calling toString()
+ * on the VCFHeaders and spamming the log output.
+ */
+ private static class HeaderOrderTestCase {
+ public final VCFHeader inputHeader;
+ public final VCFHeader testHeader;
+ public final boolean expectedConsistent;
+ public HeaderOrderTestCase( final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent ) {
+ this.inputHeader = inputHeader;
+ this.testHeader = testHeader;
+ this.expectedConsistent = expectedConsistent;
+ }
+ }
@DataProvider(name = "HeaderOrderTestProvider")
public Object[][] makeHeaderOrderTestProvider() {
@@ -132,7 +147,7 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(allLines));
final boolean expectedConsistent = expectedConsistent(testHeader, inputLineCounter);
- tests.add(new Object[]{inputHeader, testHeader, expectedConsistent});
+ tests.add(new Object[]{new HeaderOrderTestCase(inputHeader, testHeader, expectedConsistent)});
@@ -153,7 +168,7 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
for ( final List<String> testSamplesPermutation : permutations ) {
final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation);
final boolean expectedConsistent = testSamples.equals(inSamples);
- tests.add(new Object[]{inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent});
+ tests.add(new Object[]{new HeaderOrderTestCase(inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent)});
@@ -182,9 +197,9 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
// even when the header file is slightly different
@Test(dataProvider = "HeaderOrderTestProvider")
- public void testHeaderOrder(final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent) {
- final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testHeader, inputHeader);
- Assert.assertEquals(actualOrderConsistency, expectedConsistent);
+ public void testHeaderOrder( final HeaderOrderTestCase testCase ) {
+ final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testCase.testHeader, testCase.inputHeader);
+ Assert.assertEquals(actualOrderConsistency, testCase.expectedConsistent);
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/tests/java/htsjdk/variant/variantcontext/filter/AllFailFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/tests/java/htsjdk/variant/variantcontext/filter/AllFailFilter.java
index 7dac5a2..d62e146 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/AllFailFilter.java
@@ -1,7 +1,7 @@
* The MIT License
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,21 @@
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A trivial filter (always false) that can be used in testing
+ *
+ * @author Yossi Farjoun
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class AllFailFilter implements VariantContextFilter {
+ /* @return false so that all VCs are filtered out. */
+ @Override
+ public boolean test(final VariantContext record) {
+ return false;
+ }
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/tests/java/htsjdk/variant/variantcontext/filter/AllPassFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/tests/java/htsjdk/variant/variantcontext/filter/AllPassFilter.java
index 7dac5a2..b29aa51 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/AllPassFilter.java
@@ -1,7 +1,7 @@
* The MIT License
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,21 @@
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A trivial filter (always true) that can be used in testing
+ *
+ * @author Yossi Farjoun
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class AllPassFilter implements VariantContextFilter {
+ /* @return true so that all VCs are kept. */
+ @Override
+ public boolean test(final VariantContext record) {
+ return true;
+ }
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java
new file mode 100644
index 0000000..0a49853
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java
@@ -0,0 +1,78 @@
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+ * Created by farjoun on 9/9/15.
+ */
+public class CompoundFilterTest {
+ static AllPassFilter pass = new AllPassFilter();
+ static AllFailFilter fail = new AllFailFilter();
+ static Allele refA = Allele.create("A", true);
+ static Allele G = Allele.create("G", false);
+ static VariantContext vc = new VariantContextBuilder("dummy", "chr1", 1, 1, Arrays.asList(refA, G)).make();
+ @DataProvider
+ Iterator<Object[]> testCompoundFilterProvider() {
+ final List<Object[]> filters = new ArrayList<Object[]>(10);
+ // requireAll = TRUE
+ { // all pass
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ compoundFilter.add(pass);
+ compoundFilter.add(pass);
+ compoundFilter.add(pass);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ { // one fail
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ compoundFilter.add(pass);
+ compoundFilter.add(fail);
+ compoundFilter.add(pass);
+ filters.add(new Object[]{compoundFilter, false});
+ }
+ { // empty
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ //requireAll = FALSE
+ { // all fail
+ final CompoundFilter compoundFilter = new CompoundFilter(false);
+ compoundFilter.add(fail);
+ compoundFilter.add(fail);
+ compoundFilter.add(fail);
+ filters.add(new Object[]{compoundFilter, false});
+ }
+ { // one fail
+ final CompoundFilter compoundFilter = new CompoundFilter(false);
+ compoundFilter.add(pass);
+ compoundFilter.add(fail);
+ compoundFilter.add(pass);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ { // empty
+ final CompoundFilter compoundFilter = new CompoundFilter(false);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ return filters.iterator();
+ }
+ @Test(dataProvider = "testCompoundFilterProvider")
+ public void testCompoundFilter(final VariantContextFilter filter, final boolean shouldPass) {
+ Assert.assertEquals(filter.test(vc), shouldPass, filter.toString());
+ }
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java
new file mode 100644
index 0000000..0964309
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java
@@ -0,0 +1,88 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFileReader;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.io.File;
+ * Tests for testing the (VariantContext)FilteringIterator, and the HeterozygosityFilter
+ */
+public class FilteringIteratorTest {
+ final File testDir = new File("testdata/htsjdk/variant");
+ @DataProvider
+ public Object [][] filteringIteratorData() {
+ return new Object[][] {
+ {new HeterozygosityFilter(true, "NA00001"), 2},
+ {new HeterozygosityFilter(false, "NA00001"), 3},
+ {new HeterozygosityFilter(true, null), 2},
+ {new HeterozygosityFilter(false, null), 3},
+ {new AllPassFilter(), 5},
+ {new HeterozygosityFilter(true, "NA00002"), 4},
+ {new HeterozygosityFilter(false, "NA00002"), 1},
+ };
+ }
+ @Test(dataProvider = "filteringIteratorData")
+ public void testFilteringIterator(final VariantContextFilter filter, final int expectedCount) {
+ final File vcf = new File(testDir,"ex2.vcf");
+ final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
+ final FilteringIterator filteringIterator = new FilteringIterator(vcfReader.iterator(), filter);
+ int count = 0;
+ for(final VariantContext vc : filteringIterator) {
+ count++;
+ }
+ Assert.assertEquals(count, expectedCount);
+ }
+ @DataProvider
+ public Object [][] badSampleData() {
+ return new Object[][] {
+ {"ex2.vcf", "DOES_NOT_EXIST"},
+ {"breakpoint.vcf", null},
+ };
+ }
+ @Test(dataProvider = "badSampleData", expectedExceptions = IllegalArgumentException.class)
+ public void testMissingSample(final String file, final String sample) {
+ final File vcf = new File(testDir, file);
+ final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
+ final HeterozygosityFilter heterozygosityFilter = new HeterozygosityFilter(true, sample);
+ new FilteringIterator(vcfReader.iterator(), heterozygosityFilter).next();
+ }
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java
new file mode 100644
index 0000000..809133f
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java
@@ -0,0 +1,105 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypeBuilder;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+public class GenotypeQualityFilterTest {
+ Allele refA = Allele.create("A", true);
+ Allele G = Allele.create("G", false);
+ @DataProvider
+ public Iterator<Object[]> genotypeProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder("test").alleles(Arrays.asList(refA, G));
+ final List<Object[]> variants = new ArrayList<Object[]>(10);
+ //without gq
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.make()).make(), null, false});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.make()).make(), "test", false});
+ //without sample
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ( 1).make()).make(), null, false});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(10).make()).make(), null, true});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(20).make()).make(), null, true});
+ //with sample
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ( 1).make()).make(), "test", false});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(10).make()).make(), "test", true});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(20).make()).make(), "test", true});
+ return variants.iterator();
+ }
+ @Test(dataProvider = "genotypeProvider")
+ public void testHetFilter(final VariantContext vc, final String sample, final boolean shouldPass) {
+ final GenotypeQualityFilter gqFilter = getFilter(sample);
+ Assert.assertEquals(gqFilter.test(vc), shouldPass, vc.toString());
+ }
+ @DataProvider(name = "badSamplesProvider")
+ public Iterator<Object[]> badSamplesProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder();
+ final List<Object[]> hets = new ArrayList<Object[]>(10);
+ hets.add(new Object[]{vc_builder.make(), null});
+ hets.add(new Object[]{vc_builder.genotypes(Arrays.asList(gt_builder.name("test1").make(), gt_builder.name("test2").make())).make(), "notNull"});
+ hets.add(new Object[]{vc_builder.genotypes(Collections.singleton(gt_builder.name("This").make())).make(), "That"});
+ return hets.iterator();
+ }
+ @Test(dataProvider = "badSamplesProvider", expectedExceptions = IllegalArgumentException.class)
+ public void testbadSample(final VariantContext vc, final String sample) {
+ final GenotypeQualityFilter gqFilter = getFilter(sample);
+ //should fail
+ gqFilter.test(vc);
+ }
+ private GenotypeQualityFilter getFilter(String sample){
+ if (sample == null) {
+ return new GenotypeQualityFilter(10);
+ } else {
+ return new GenotypeQualityFilter(10, sample);
+ }
+ }
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
new file mode 100644
index 0000000..5ceed9f
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
@@ -0,0 +1,128 @@
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ */
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypeBuilder;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+public class HeterozygosityFilterTest {
+ Allele refA = Allele.create("A", true);
+ Allele G = Allele.create("G", false);
+ @DataProvider(name = "Hets")
+ public Iterator<Object[]> hetsProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder("test");
+ final List<Object[]> hets = new ArrayList<Object[]>(10);
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make(), null, true});
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make(), "test", true});
+ //non-variant
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make(), "test", false});
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make(), null, false});
+ return hets.iterator();
+ }
+ @Test(dataProvider = "Hets")
+ public void testHetFilter(final VariantContext vc, final String sample, final boolean shouldPass) {
+ final HeterozygosityFilter hf = getFilter(shouldPass, sample);
+ Assert.assertTrue(hf.test(vc));
+ }
+ @DataProvider(name = "badSamplesProvider")
+ public Iterator<Object[]> badSamplesProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder();
+ final List<Object[]> hets = new ArrayList<Object[]>(10);
+ hets.add(new Object[]{vc_builder.make(), null});
+ hets.add(new Object[]{vc_builder.genotypes(Arrays.asList(gt_builder.name("test1").make(), gt_builder.name("test2").make())).make(), "notNull"});
+ hets.add(new Object[]{vc_builder.genotypes(Collections.singleton(gt_builder.name("This").make())).make(), "That"});
+ return hets.iterator();
+ }
+ @Test(dataProvider = "badSamplesProvider", expectedExceptions = IllegalArgumentException.class)
+ public void testbadSample(final VariantContext vc, final String sample) {
+ final HeterozygosityFilter hf = getFilter(true, sample);
+ //should fail
+ hf.test(vc);
+ }
+ @DataProvider(name = "variantsProvider")
+ public Object[][] variantsProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder("test");
+ final List<VariantContext> vcs = new ArrayList<VariantContext>(10);
+ //hets:
+ vcs.add(vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make());
+ vcs.add(vc_builder.loc("chr1", 10, 10).genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make());
+ //non-variant:
+ vcs.add(vc_builder.loc("chr1", 20, 20).genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make());
+ vcs.add(vc_builder.loc("chr1", 30, 30).genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make());
+ return new Object[][]{new Object[]{vcs.iterator(), new int[]{1, 10}}};
+ }
+ @Test(dataProvider = "variantsProvider")
+ public void testFilteringIterator(final Iterator<VariantContext> vcs, final int[] passingPositions) {
+ final Iterator<VariantContext> filteringIterator = new FilteringIterator(vcs, new HeterozygosityFilter(true, "test"));
+ int i = 0;
+ while (filteringIterator.hasNext()) {
+ final VariantContext vc = filteringIterator.next();
+ Assert.assertTrue(i < passingPositions.length);
+ Assert.assertEquals(vc.getStart(), passingPositions[i++]);
+ }
+ }
+ private HeterozygosityFilter getFilter(final boolean shouldPass, String sample) {
+ if (sample == null) {
+ return new HeterozygosityFilter(shouldPass);
+ } else {
+ return new HeterozygosityFilter(shouldPass, sample);
+ }
+ }
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java
new file mode 100644
index 0000000..3cbb60c
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java
@@ -0,0 +1,46 @@
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+ * Created by farjoun on 9/10/15.
+ */
+public class PassingVariantFilterTest {
+ Allele refA = Allele.create("A", true);
+ Allele G = Allele.create("G", false);
+ @DataProvider()
+ public Iterator<Object[]> variantProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("test", "chr1", 1, 1, Arrays.asList(refA, G));
+ final List<Object[]> variants = new ArrayList<Object[]>(10);
+ // unfiltered
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)).make(), true});
+ // passing
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)).passFilters().make(), true});
+ // failing
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)).filters(Collections.singleton("FILTER")).make(), false});
+ return variants.iterator();
+ }
+ @Test(dataProvider = "variantProvider")
+ public void testPassingVariantFilter(final VariantContext vc, final boolean shouldPass) {
+ final PassingVariantFilter passingVariantFilter = new PassingVariantFilter();
+ Assert.assertEquals(passingVariantFilter.test(vc), shouldPass, vc.toString());
+ }
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java
new file mode 100644
index 0000000..74f1bb5
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java
@@ -0,0 +1,54 @@
+package htsjdk.variant.variantcontext.filter;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+ * Created by farjoun on 9/9/15.
+ */
+public class SnpFilterTest {
+ Allele refA = Allele.create("A", true);
+ Allele refAG = Allele.create("AG", true);
+ Allele G = Allele.create("G", false);
+ Allele T = Allele.create("T", false);
+ Allele AG = Allele.create("AG", false);
+ Allele AT = Allele.create("AT", false);
+ Allele star = Allele.create("<*>", false);
+ @DataProvider()
+ public Iterator<Object[]> variantProvider() {
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Collections.<Allele>emptyList());
+ final List<Object[]> variants = new ArrayList<Object[]>(10);
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)) .make(), true}); // SNP
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G, T)) .make(), true}); // SNP
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, AG)) .make(), false}); // INDEL
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G, AG)) .make(), false}); // MIXED
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, star)) .make(), false}); // SYMBOLIC
+ variants.add(new Object[]{vc_builder.stop(2).alleles(Arrays.asList(refAG, T)) .make(), false}); // INDEL
+ variants.add(new Object[]{vc_builder.stop(2).alleles(Arrays.asList(refAG, AT)).make(), false}); // MNP
+ return variants.iterator();
+ }
+ @Test(dataProvider = "variantProvider")
+ public void testSnpFilter(final VariantContext vc, final boolean shouldPass) {
+ final SnpFilter snpFilter = new SnpFilter();
+ Assert.assertEquals(snpFilter.test(vc), shouldPass, vc.toString());
+ }
diff --git a/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
index 5a8705e..9b8f6e8 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
@@ -349,4 +349,14 @@ public class VariantContextWriterBuilderUnitTest extends VariantBaseTest {
+ @Test
+ public void testClearOptions() {
+ // Verify that clearOptions doesn't have a side effect of carrying previously set options
+ // forward to subsequent builders
+ VariantContextWriterBuilder vcwb = new VariantContextWriterBuilder();
+ vcwb.clearOptions().setOption(Options.INDEX_ON_THE_FLY);
+ final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().clearOptions();
+ Assert.assertTrue(builder.options.isEmpty());
+ }
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index 0bf5f74..1a53cd6 100644
--- a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -34,7 +34,6 @@ import htsjdk.tribble.readers.LineReaderUtil;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.VariantContext;
import org.testng.Assert;
-import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
@@ -127,20 +126,17 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();
- @DataProvider(name = "HiSeqVCFHeaderDataProvider")
- public Object[][] getHiSeqVCFHeaderData() {
+ private VCFHeader getHiSeqVCFHeader() {
final File vcf = new File("testdata/htsjdk/variant/HiSeq.10000.vcf");
final VCFFileReader reader = new VCFFileReader(vcf, false);
final VCFHeader header = reader.getFileHeader();
- return new Object[][] {
- { header }
- };
+ return header;
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddInfoLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddInfoLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("TestInfoLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info line");
@@ -154,8 +150,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(infoLine), "TestInfoLine present in other header lines");
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddFormatLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddFormatLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFFormatHeaderLine formatLine = new VCFFormatHeaderLine("TestFormatLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test format line");
@@ -169,8 +166,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(formatLine), "TestFormatLine present in other header lines");
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddFilterLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddFilterLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine");
@@ -184,8 +182,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(filterLine), "TestFilterLine present in other header lines");
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddContigLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddContigLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFContigHeaderLine contigLine = new VCFContigHeaderLine("<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, "chr1", 0);
@@ -198,8 +197,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(contigLine), "Test contig line present in other header lines");
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddOtherLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddOtherLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFHeaderLine otherLine = new VCFHeaderLine("TestOtherLine", "val");
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram
new file mode 100644
index 0000000..59f11d2
Binary files /dev/null and b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram differ
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram.bai b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram.bai
new file mode 100644
index 0000000..fcb31fc
Binary files /dev/null and b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram.bai differ
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.dict b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.dict
new file mode 100644
index 0000000..7f41717
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.dict
@@ -0,0 +1,2 @@
+ at HD VN:1.4 SO:unsorted
+ at SQ SN:Shelly LN:20 M5:7ddd8a4b4f2c1dec43476a738b1a9b72 UR:file:/Users/edwardk/Documents/htsjdk/testdata/htsjdk/samtools/cram/auxf.fa
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa
new file mode 100644
index 0000000..63e0c92
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa
@@ -0,0 +1,2 @@
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa.fai b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa.fai
new file mode 100644
index 0000000..3deea7f
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa.fai
@@ -0,0 +1 @@
+Shelly 20 8 20 21
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fasta b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fasta
new file mode 100644
index 0000000..11d25dd
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fasta
@@ -0,0 +1,2 @@
diff --git a/testdata/htsjdk/samtools/cram_tlen.fasta b/testdata/htsjdk/samtools/cram_tlen.fasta
new file mode 100644
index 0000000..01b8f8a
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_tlen.fasta
@@ -0,0 +1,41 @@
\ No newline at end of file
diff --git a/testdata/htsjdk/samtools/cram_tlen.fasta.fai b/testdata/htsjdk/samtools/cram_tlen.fasta.fai
new file mode 100644
index 0000000..4f7bd29
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_tlen.fasta.fai
@@ -0,0 +1,8 @@
+chr1 101 6 50 51
+chr2 101 116 50 51
+chr3 101 226 50 51
+chr4 101 336 50 51
+chr5 101 446 50 51
+chr6 101 556 50 51
+chr7 454 666 50 51
+chr8 202 1136 50 51
diff --git a/testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam b/testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam
new file mode 100644
index 0000000..0d1947e
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam
@@ -0,0 +1,19 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:454
+ at SQ SN:chr8 LN:202
+ at PG ID:1 VN:2.0 PN:Hey!
+both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
+read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
+both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
diff --git a/testdata/htsjdk/samtools/cram_with_bai_index.cram b/testdata/htsjdk/samtools/cram_with_bai_index.cram
new file mode 100644
index 0000000..5609d5e
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_bai_index.cram differ
diff --git a/testdata/htsjdk/samtools/cram_with_bai_index.cram.bai b/testdata/htsjdk/samtools/cram_with_bai_index.cram.bai
new file mode 100644
index 0000000..db53e08
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_bai_index.cram.bai differ
diff --git a/testdata/htsjdk/samtools/cram_with_crai_index.cram b/testdata/htsjdk/samtools/cram_with_crai_index.cram
new file mode 100644
index 0000000..5609d5e
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_crai_index.cram differ
diff --git a/testdata/htsjdk/samtools/cram_with_crai_index.cram.crai b/testdata/htsjdk/samtools/cram_with_crai_index.cram.crai
new file mode 100644
index 0000000..309f06f
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_crai_index.cram.crai differ
diff --git a/testdata/htsjdk/samtools/hg19mini.fasta b/testdata/htsjdk/samtools/hg19mini.fasta
new file mode 100644
index 0000000..038dd84
--- /dev/null
+++ b/testdata/htsjdk/samtools/hg19mini.fasta
@@ -0,0 +1,804 @@
+>1 dna:chromosome chromosome:GRCh37:1:1:16000:1
+>2 dna:chromosome chromosome:GRCh37:2:1:16000:1
+>3 dna:chromosome chromosome:GRCh37:3:1:16000:1
+>4 dna:chromosome chromosome:GRCh37:4:1:16000:1
diff --git a/testdata/htsjdk/samtools/hg19mini.fasta.fai b/testdata/htsjdk/samtools/hg19mini.fasta.fai
new file mode 100644
index 0000000..2a20cf3
--- /dev/null
+++ b/testdata/htsjdk/samtools/hg19mini.fasta.fai
@@ -0,0 +1,4 @@
+1 16000 48 80 81
+2 16000 16296 80 81
+3 16000 32544 80 81
+4 16000 48792 80 81
diff --git a/testdata/htsjdk/samtools/metrics/metricsOne.metrics b/testdata/htsjdk/samtools/metrics/metricsOne.metrics
new file mode 100644
index 0000000..46c724e
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOne.metrics
@@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
diff --git a/testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics b/testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics
new file mode 100644
index 0000000..46c724e
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics
@@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
diff --git a/testdata/htsjdk/samtools/metrics/metricsOneModifiedHistogram.metrics b/testdata/htsjdk/samtools/metrics/metricsOneModifiedHistogram.metrics
new file mode 100644
index 0000000..3e6f088
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOneModifiedHistogram.metrics
@@ -0,0 +1,14 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
+8 1
diff --git a/testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics b/testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics
new file mode 100644
index 0000000..a4d23d8
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics
@@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+Hello World 2008-12-31 122 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
diff --git a/testdata/htsjdk/samtools/sra/test_archive.sra b/testdata/htsjdk/samtools/sra/test_archive.sra
new file mode 100644
index 0000000..a9b6e70
Binary files /dev/null and b/testdata/htsjdk/samtools/sra/test_archive.sra differ
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htsjdk.git
More information about the debian-med-commit
mailing list